The difflib module compares sequences—files, strings, lists. It's the engine behind diff tools, fuzzy matching, and similarity scoring.
Basic Sequence Comparison
from difflib import SequenceMatcher
s1 = "Hello World"
s2 = "Hello Python"
matcher = SequenceMatcher(None, s1, s2)
# Similarity ratio (0 to 1)
print(matcher.ratio()) # 0.5217...
# Matching blocks
for block in matcher.get_matching_blocks():
print(f"a[{block.a}:{block.a+block.size}] == b[{block.b}:{block.b+block.size}]")
# a[0:6] == b[0:6] ("Hello ")Unified Diff
Standard diff output format:
from difflib import unified_diff
original = """line 1
line 2
line 3
line 4""".splitlines(keepends=True)
modified = """line 1
line 2 modified
line 3
line 5""".splitlines(keepends=True)
diff = unified_diff(
original,
modified,
fromfile='original.txt',
tofile='modified.txt'
)
print(''.join(diff))
# --- original.txt
# +++ modified.txt
# @@ -1,4 +1,4 @@
# line 1
# -line 2
# +line 2 modified
# line 3
# -line 4
# +line 5Context Diff
from difflib import context_diff
diff = context_diff(
original,
modified,
fromfile='original.txt',
tofile='modified.txt',
n=2 # Context lines
)
print(''.join(diff))HTML Diff
Generate visual HTML diff:
from difflib import HtmlDiff
original = ["line 1", "line 2", "line 3"]
modified = ["line 1", "line 2 changed", "line 3", "line 4"]
html_diff = HtmlDiff()
html = html_diff.make_file(original, modified)
with open('diff.html', 'w') as f:
f.write(html)Find Close Matches
Fuzzy string matching:
from difflib import get_close_matches
words = ['python', 'perl', 'ruby', 'java', 'javascript']
# Find similar words
matches = get_close_matches('pythn', words)
print(matches) # ['python']
matches = get_close_matches('jav', words, n=3, cutoff=0.6)
print(matches) # ['java', 'javascript']Spell Checker
from difflib import get_close_matches
DICTIONARY = ['hello', 'world', 'python', 'programming', 'developer']
def suggest_correction(word: str, max_suggestions: int = 3) -> list[str]:
"""Suggest spelling corrections."""
return get_close_matches(
word.lower(),
DICTIONARY,
n=max_suggestions,
cutoff=0.6
)
print(suggest_correction('pyton')) # ['python']
print(suggest_correction('developr')) # ['developer']
print(suggest_correction('progrming')) # ['programming']Differ for Detailed Diffs
from difflib import Differ
d = Differ()
text1 = ['one\n', 'two\n', 'three\n']
text2 = ['one\n', 'TWO\n', 'three\n', 'four\n']
result = list(d.compare(text1, text2))
print(''.join(result))
# one
# - two
# + TWO
# three
# + four
# Codes:
# ' ' = common
# '- ' = in first only
# '+ ' = in second only
# '? ' = guide (shows changes)Inline Character Diff
from difflib import Differ, ndiff
# ndiff shows character-level changes
diff = ndiff(['foo bar\n'], ['foo baz\n'])
print(''.join(diff))
# - foo bar
# ? ^
# + foo baz
# ? ^File Comparison
from difflib import unified_diff
from pathlib import Path
def diff_files(file1: str, file2: str) -> str:
"""Generate unified diff between two files."""
lines1 = Path(file1).read_text().splitlines(keepends=True)
lines2 = Path(file2).read_text().splitlines(keepends=True)
diff = unified_diff(
lines1, lines2,
fromfile=file1,
tofile=file2
)
return ''.join(diff)
# Usage
result = diff_files('version1.py', 'version2.py')
print(result)Similarity Scoring
from difflib import SequenceMatcher
def similarity(s1: str, s2: str) -> float:
"""Calculate similarity ratio between strings."""
return SequenceMatcher(None, s1, s2).ratio()
def quick_similarity(s1: str, s2: str) -> float:
"""Fast similarity estimate (may be less accurate)."""
return SequenceMatcher(None, s1, s2).quick_ratio()
def real_quick_similarity(s1: str, s2: str) -> float:
"""Very fast upper bound on similarity."""
return SequenceMatcher(None, s1, s2).real_quick_ratio()
# Compare strings
print(similarity("hello world", "hello python")) # 0.52...
print(similarity("hello world", "hello world")) # 1.0
print(similarity("abc", "xyz")) # 0.0Ignoring Junk Characters
from difflib import SequenceMatcher
# Ignore spaces when comparing
def is_junk(char):
return char == ' '
s1 = "hello world"
s2 = "helloworld"
# With junk filtering
matcher = SequenceMatcher(is_junk, s1, s2)
print(matcher.ratio()) # Higher because spaces ignoredFinding Longest Common Substring
from difflib import SequenceMatcher
def longest_common_substring(s1: str, s2: str) -> str:
"""Find the longest common substring."""
matcher = SequenceMatcher(None, s1, s2)
# Get matching blocks, find longest
blocks = matcher.get_matching_blocks()
longest = max(blocks, key=lambda b: b.size)
return s1[longest.a:longest.a + longest.size]
print(longest_common_substring("abcdefg", "xyzcdeklm")) # "cde"Delta Encoding
from difflib import SequenceMatcher
def get_operations(s1: str, s2: str) -> list:
"""Get operations to transform s1 into s2."""
matcher = SequenceMatcher(None, s1, s2)
operations = []
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
operations.append({
'operation': tag,
'from_range': (i1, i2),
'to_range': (j1, j2),
'from_text': s1[i1:i2],
'to_text': s2[j1:j2],
})
return operations
ops = get_operations("Hello World", "Hello Python")
for op in ops:
print(f"{op['operation']}: '{op['from_text']}' -> '{op['to_text']}'")
# equal: 'Hello ' -> 'Hello '
# replace: 'World' -> 'Python'Restore Original from Diff
from difflib import restore
# Unified diff can be used to restore either version
diff_lines = [
'--- a\n',
'+++ b\n',
'@@ -1,2 +1,2 @@\n',
' line 1\n',
'-line 2\n',
'+line 2 changed\n',
]
# Restore version 1 (original)
original = list(restore(diff_lines, 1))
# Restore version 2 (modified)
modified = list(restore(diff_lines, 2))Configuration File Diff
from difflib import unified_diff
import json
def diff_json(obj1: dict, obj2: dict) -> str:
"""Generate diff between two JSON objects."""
json1 = json.dumps(obj1, indent=2, sort_keys=True).splitlines(keepends=True)
json2 = json.dumps(obj2, indent=2, sort_keys=True).splitlines(keepends=True)
return ''.join(unified_diff(json1, json2, fromfile='before', tofile='after'))
config_old = {"debug": True, "port": 8080}
config_new = {"debug": False, "port": 8080, "host": "0.0.0.0"}
print(diff_json(config_old, config_new))Best Match in List
from difflib import SequenceMatcher
def best_match(query: str, candidates: list[str]) -> tuple[str, float]:
"""Find best matching string and its score."""
best = None
best_score = 0
for candidate in candidates:
score = SequenceMatcher(None, query, candidate).ratio()
if score > best_score:
best_score = score
best = candidate
return best, best_score
commands = ['start', 'stop', 'restart', 'status', 'reload']
match, score = best_match('stat', commands)
print(f"Best match: {match} ({score:.2%})") # status (75%)The difflib module powers text comparison, from simple similarity checks to full diff generation. Use it for spell checking, fuzzy search, version comparison, and anywhere you need to understand how sequences differ.
React to this post: