Python difflib: Comparing Sequences and Generating Diffs

The difflib module compares sequences—files, strings, lists. It's the engine behind diff tools, fuzzy matching, and similarity scoring.

Basic Sequence Comparison

from difflib import SequenceMatcher
 
s1 = "Hello World"
s2 = "Hello Python"
 
matcher = SequenceMatcher(None, s1, s2)
 
# Similarity ratio (0 to 1)
print(matcher.ratio())  # 0.5217...
 
# Matching blocks
for block in matcher.get_matching_blocks():
    print(f"a[{block.a}:{block.a+block.size}] == b[{block.b}:{block.b+block.size}]")
# a[0:6] == b[0:6]  ("Hello ")

Unified Diff

Standard diff output format:

from difflib import unified_diff
 
original = """line 1
line 2
line 3
line 4""".splitlines(keepends=True)
 
modified = """line 1
line 2 modified
line 3
line 5""".splitlines(keepends=True)
 
diff = unified_diff(
    original, 
    modified,
    fromfile='original.txt',
    tofile='modified.txt'
)
 
print(''.join(diff))
# --- original.txt
# +++ modified.txt
# @@ -1,4 +1,4 @@
#  line 1
# -line 2
# +line 2 modified
#  line 3
# -line 4
# +line 5

Context Diff

from difflib import context_diff
 
diff = context_diff(
    original,
    modified,
    fromfile='original.txt',
    tofile='modified.txt',
    n=2  # Context lines
)
 
print(''.join(diff))

HTML Diff

Generate visual HTML diff:

from difflib import HtmlDiff
 
original = ["line 1", "line 2", "line 3"]
modified = ["line 1", "line 2 changed", "line 3", "line 4"]
 
html_diff = HtmlDiff()
html = html_diff.make_file(original, modified)
 
with open('diff.html', 'w') as f:
    f.write(html)

Find Close Matches

Fuzzy string matching:

from difflib import get_close_matches
 
words = ['python', 'perl', 'ruby', 'java', 'javascript']
 
# Find similar words
matches = get_close_matches('pythn', words)
print(matches)  # ['python']
 
matches = get_close_matches('jav', words, n=3, cutoff=0.6)
print(matches)  # ['java', 'javascript']

Spell Checker

from difflib import get_close_matches
 
DICTIONARY = ['hello', 'world', 'python', 'programming', 'developer']
 
def suggest_correction(word: str, max_suggestions: int = 3) -> list[str]:
    """Suggest spelling corrections."""
    return get_close_matches(
        word.lower(), 
        DICTIONARY, 
        n=max_suggestions, 
        cutoff=0.6
    )
 
print(suggest_correction('pyton'))  # ['python']
print(suggest_correction('developr'))  # ['developer']
print(suggest_correction('progrming'))  # ['programming']

Differ for Detailed Diffs

from difflib import Differ
 
d = Differ()
 
text1 = ['one\n', 'two\n', 'three\n']
text2 = ['one\n', 'TWO\n', 'three\n', 'four\n']
 
result = list(d.compare(text1, text2))
print(''.join(result))
#   one
# - two
# + TWO
#   three
# + four
 
# Codes:
# '  ' = common
# '- ' = in first only
# '+ ' = in second only
# '? ' = guide (shows changes)

Inline Character Diff

from difflib import Differ, ndiff
 
# ndiff shows character-level changes
diff = ndiff(['foo bar\n'], ['foo baz\n'])
print(''.join(diff))
# - foo bar
# ?      ^
# + foo baz
# ?      ^

File Comparison

from difflib import unified_diff
from pathlib import Path
 
def diff_files(file1: str, file2: str) -> str:
    """Generate unified diff between two files."""
    lines1 = Path(file1).read_text().splitlines(keepends=True)
    lines2 = Path(file2).read_text().splitlines(keepends=True)
    
    diff = unified_diff(
        lines1, lines2,
        fromfile=file1,
        tofile=file2
    )
    
    return ''.join(diff)
 
# Usage
result = diff_files('version1.py', 'version2.py')
print(result)

Similarity Scoring

from difflib import SequenceMatcher
 
def similarity(s1: str, s2: str) -> float:
    """Calculate similarity ratio between strings."""
    return SequenceMatcher(None, s1, s2).ratio()
 
def quick_similarity(s1: str, s2: str) -> float:
    """Fast similarity estimate (may be less accurate)."""
    return SequenceMatcher(None, s1, s2).quick_ratio()
 
def real_quick_similarity(s1: str, s2: str) -> float:
    """Very fast upper bound on similarity."""
    return SequenceMatcher(None, s1, s2).real_quick_ratio()
 
# Compare strings
print(similarity("hello world", "hello python"))  # 0.52...
print(similarity("hello world", "hello world"))   # 1.0
print(similarity("abc", "xyz"))                    # 0.0

Ignoring Junk Characters

from difflib import SequenceMatcher
 
# Ignore spaces when comparing
def is_junk(char):
    return char == ' '
 
s1 = "hello world"
s2 = "helloworld"
 
# With junk filtering
matcher = SequenceMatcher(is_junk, s1, s2)
print(matcher.ratio())  # Higher because spaces ignored

Finding Longest Common Substring

from difflib import SequenceMatcher
 
def longest_common_substring(s1: str, s2: str) -> str:
    """Find the longest common substring."""
    matcher = SequenceMatcher(None, s1, s2)
    
    # Get matching blocks, find longest
    blocks = matcher.get_matching_blocks()
    longest = max(blocks, key=lambda b: b.size)
    
    return s1[longest.a:longest.a + longest.size]
 
print(longest_common_substring("abcdefg", "xyzcdeklm"))  # "cde"

Delta Encoding

from difflib import SequenceMatcher
 
def get_operations(s1: str, s2: str) -> list:
    """Get operations to transform s1 into s2."""
    matcher = SequenceMatcher(None, s1, s2)
    
    operations = []
    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        operations.append({
            'operation': tag,
            'from_range': (i1, i2),
            'to_range': (j1, j2),
            'from_text': s1[i1:i2],
            'to_text': s2[j1:j2],
        })
    
    return operations
 
ops = get_operations("Hello World", "Hello Python")
for op in ops:
    print(f"{op['operation']}: '{op['from_text']}' -> '{op['to_text']}'")
# equal: 'Hello ' -> 'Hello '
# replace: 'World' -> 'Python'

Restore Original from Diff

from difflib import restore
 
# Unified diff can be used to restore either version
diff_lines = [
    '--- a\n',
    '+++ b\n', 
    '@@ -1,2 +1,2 @@\n',
    ' line 1\n',
    '-line 2\n',
    '+line 2 changed\n',
]
 
# Restore version 1 (original)
original = list(restore(diff_lines, 1))
 
# Restore version 2 (modified)
modified = list(restore(diff_lines, 2))

Configuration File Diff

from difflib import unified_diff
import json
 
def diff_json(obj1: dict, obj2: dict) -> str:
    """Generate diff between two JSON objects."""
    json1 = json.dumps(obj1, indent=2, sort_keys=True).splitlines(keepends=True)
    json2 = json.dumps(obj2, indent=2, sort_keys=True).splitlines(keepends=True)
    
    return ''.join(unified_diff(json1, json2, fromfile='before', tofile='after'))
 
config_old = {"debug": True, "port": 8080}
config_new = {"debug": False, "port": 8080, "host": "0.0.0.0"}
 
print(diff_json(config_old, config_new))

Best Match in List

from difflib import SequenceMatcher
 
def best_match(query: str, candidates: list[str]) -> tuple[str, float]:
    """Find best matching string and its score."""
    best = None
    best_score = 0
    
    for candidate in candidates:
        score = SequenceMatcher(None, query, candidate).ratio()
        if score > best_score:
            best_score = score
            best = candidate
    
    return best, best_score
 
commands = ['start', 'stop', 'restart', 'status', 'reload']
match, score = best_match('stat', commands)
print(f"Best match: {match} ({score:.2%})")  # status (75%)

The difflib module powers text comparison, from simple similarity checks to full diff generation. Use it for spell checking, fuzzy search, version comparison, and anywhere you need to understand how sequences differ.

React to this post:

#Basic Sequence Comparison

#Unified Diff

#Context Diff

#HTML Diff

#Find Close Matches

#Spell Checker

#Differ for Detailed Diffs

#Inline Character Diff

#File Comparison

#Similarity Scoring

#Ignoring Junk Characters

#Finding Longest Common Substring

#Delta Encoding

#Restore Original from Diff

#Configuration File Diff

#Best Match in List

Need help shipping fast?