Use Python to Check Difference between Directories Recursively


I needed to track differences between software release packages, so that if anything changed dramatically, eg. some file missing or much smaller than expected, I can then get a notification to review the new potentially flawed package.

I found that filecmp.dircmp class in Python is spot on for this job. Here’s my snippet to compare differences of 2 directories recursively:

#!/usr/bin/env python3
import argparse
from filecmp import dircmp
from os.path import getsize

changed_files = {}
deleted_files = {}
added_files = {}

def diff_file_size(file1, file2):
  return getsize(file2) - getsize(file1)

def diff_report():
  for k, v in deleted_files.items():
    print(k, v)

  for k, v in added_files.items():
    print(k, v)

  for k, v in changed_files.items():
    print(k, v)

def compare_dir(dir):
  for changed_file in dir.diff_files:
    file1 = "{0}/{1}".format(dir.left, changed_file)
    file2 = "{0}/{1}".format(dir.right, changed_file)
    changed_files[ file2 ] = diff_file_size(file1, file2)

  for deleted_file in dir.left_only:
    file1 = "{0}/{1}".format(dir.right, deleted_file)
    deleted_files[ file1 ] = "DELETED!"

  for added_file in dir.right_only:
    file1 = "{0}/{1}".format(dir.right, added_file)
    added_files[ file1 ] = "ADDED!"

  for sub_dir in dir.subdirs.values():
    compare_dir(sub_dir)

def main():
  parser = argparse.ArgumentParser(description="Usage for diff_dir.py")
  parser.add_argument('--dir1', type=str, required=True)
  parser.add_argument('--dir2', type=str, required=True)
  args = parser.parse_args()

  dir = dircmp(args.dir1, args.dir2)
  compare_dir(dir)
  diff_report()

if __name__ == '__main__':
  main()

🙂