Directory Comparison Tool in Python
Creating a Directory Comparison Tool in Python
Introduction:
In this blog post, we'll walk through the process of creating a Python script that compares two directories and generates a detailed HTML report of the differences. This tool can be incredibly useful for tracking changes in file systems, comparing backups, or managing different versions of a project.
Step 1: Setting Up the Environment
First, let's import the necessary libraries:
```python
import os
import difflib
import html
from datetime import datetime
```
Step 2: Directory Traversal
We'll start by creating a function to traverse directories:
```python
def traverse_directory(directory):
file_list = []
for root, dirs, files in os.walk(directory):
for file in files:
file_list.append(os.path.relpath(os.path.join(root, file), directory))
return file_list
```
This function uses `os.walk()` to recursively traverse the directory and return a list of all files.
Step 3: File Comparison
Next, we'll create a function to compare two files:
```python
def compare_files(file1, file2):
def read_file(filename):
encodings = ['utf-8', 'latin-1', 'ascii']
for encoding in encodings:
try:
with open(filename, 'r', encoding=encoding) as f:
return f.readlines()
except UnicodeDecodeError:
continue
raise UnicodeDecodeError(f"Unable to decode the file {filename} with the attempted encodings.")
try:
lines1 = read_file(file1)
lines2 = read_file(file2)
diff = list(difflib.unified_diff(lines1, lines2, lineterm=''))
changes = [line for line in diff if line.startswith('+') or line.startswith('-')]
insertions = sum(1 for line in changes if line.startswith('+'))
deletions = sum(1 for line in changes if line.startswith('-'))
return len(changes), insertions, deletions, diff
except UnicodeDecodeError as e:
print(f"Error reading file: {e}")
return 0, 0, 0, []
```
This function attempts to read files with different encodings and uses `difflib` to compare their contents.
Step 4: Generating the Report
Now, let's create a function to generate the HTML report:
```python
def generate_report(dir1, dir2, differences):
now = datetime.now()
report = f"<h2>Comparison Report</h2>\n"
report += f"<p>Date and Time: {now.strftime('%Y-%m-%d %H:%M:%S')}</p>\n"
report += f"<p>Directory 1: {html.escape(dir1)}</p>\n"
report += f"<p>Directory 2: {html.escape(dir2)}</p>\n"
total_changes = sum(changes for _, (changes, _, _, _) in differences.items())
total_insertions = sum(insertions for _, (_, insertions, _, _) in differences.items())
total_deletions = sum(deletions for _, (_, _, deletions, _) in differences.items())
report += f"<h3>Summary</h3>\n"
report += f"<p>Total Changes: {total_changes}</p>\n"
report += f"<p>Total Insertions: {total_insertions}</p>\n"
report += f"<p>Total Deletions: {total_deletions}</p>\n"
report += "<table border='1'>\n"
report += "<tr><th>File</th><th>Changes</th><th>Insertions</th><th>Deletions</th><th>Diff</th></tr>\n"
for file, (changes, insertions, deletions, diff) in differences.items():
report += f"<tr><td>{html.escape(file)}</td><td>{changes}</td><td>{insertions}</td><td>{deletions}</td>"
report += "<td><pre>"
for line in diff:
if line.startswith('+'):
report += f"<span style='color:green'>{html.escape(line)}</span><br>"
elif line.startswith('-'):
report += f"<span style='color:red'>{html.escape(line)}</span><br>"
else:
report += f"{html.escape(line)}<br>"
report += "</pre></td></tr>\n"
report += "</table>"
return report
```
This function creates an HTML report with a summary and detailed diff for each file.
Step 5: Comparing Directories
Let's create a function to compare two directories:
```python
def compare_directories(dir1, dir2):
files1 = set(traverse_directory(dir1))
files2 = set(traverse_directory(dir2))
all_files = files1.union(files2)
differences = {}
for file in all_files:
file1 = os.path.join(dir1, file)
file2 = os.path.join(dir2, file)
if file in files1 and file in files2:
changes, insertions, deletions, diff = compare_files(file1, file2)
if changes > 0:
differences[file] = (changes, insertions, deletions, diff)
elif file in files1:
size = get_file_size(file1)
differences[file] = (size, 0, size, [f'-{file} (File only in {dir1})'])
elif file in files2:
size = get_file_size(file2)
differences[file] = (size, size, 0, [f'+{file} (File only in {dir2})'])
return differences
def get_file_size(file_path):
try:
return os.path.getsize(file_path)
except OSError:
print(f"Error getting file size for {file_path}")
return 0
```
This function compares the contents of two directories and returns a dictionary of differences.
Step 6: Creating the Report Filename
We'll create a function to generate a unique filename for our report:
```python
def get_report_filename(dir2):
last_folder = os.path.basename(os.path.normpath(dir2))
current_time = datetime.now().strftime("%Y_%b_%d_%H_%M_%S")
filename = f"{last_folder}_{current_time}.html"
return filename
```
Step 7: Putting It All Together
Finally, let's create our main function:
```python
def main():
dir1 = input("Enter the path of the first directory: ")
dir2 = input("Enter the path of the second directory: ")
differences = compare_directories(dir1, dir2)
report = generate_report(dir1, dir2, differences)
report_filename = get_report_filename(dir2)
with open(report_filename, 'w', encoding='utf-8') as f:
f.write(report)
print(f"Comparison report has been generated as '{report_filename}'")
if __name__ == "__main__":
main()
```
This function prompts the user for two directory paths, compares them, generates a report, and saves it as an HTML file.
Conclusion:
We've created a powerful directory comparison tool that generates a detailed HTML report. This tool can be used to track changes between different versions of a project, compare backups, or analyze differences in file systems. Feel free to modify and expand upon this script to suit your specific needs!
Remember to save the entire script in a .py file and run it from the command line. The generated HTML report will provide a comprehensive view of the differences between the two directories.
Comments
Post a Comment