I needed to track differences between software release packages, so that if anything changed dramatically, eg. some file missing or much smaller than expected, I can then get a notification to review the new potentially flawed package.
I found that filecmp.dircmp class in Python is spot on for this job. Here’s my snippet to compare differences of 2 directories recursively:
#!/usr/bin/env python3 import argparse from filecmp import dircmp from os.path import getsize changed_files = {} deleted_files = {} added_files = {} def diff_file_size(file1, file2): return getsize(file2) - getsize(file1) def diff_report(): for k, v in deleted_files.items(): print(k, v) for k, v in added_files.items(): print(k, v) for k, v in changed_files.items(): print(k, v) def compare_dir(dir): for changed_file in dir.diff_files: file1 = "{0}/{1}".format(dir.left, changed_file) file2 = "{0}/{1}".format(dir.right, changed_file) changed_files[ file2 ] = diff_file_size(file1, file2) for deleted_file in dir.left_only: file1 = "{0}/{1}".format(dir.right, deleted_file) deleted_files[ file1 ] = "DELETED!" for added_file in dir.right_only: file1 = "{0}/{1}".format(dir.right, added_file) added_files[ file1 ] = "ADDED!" for sub_dir in dir.subdirs.values(): compare_dir(sub_dir) def main(): parser = argparse.ArgumentParser(description="Usage for diff_dir.py") parser.add_argument('--dir1', type=str, required=True) parser.add_argument('--dir2', type=str, required=True) args = parser.parse_args() dir = dircmp(args.dir1, args.dir2) compare_dir(dir) diff_report() if __name__ == '__main__': main()
🙂