The glob module finds files matching shell-style wildcards. Simpler than walking directories manually.
Basic Patterns
import glob
# All .txt files in current directory
glob.glob('*.txt')
# ['file1.txt', 'file2.txt', 'notes.txt']
# All Python files
glob.glob('*.py')
# Files starting with "test"
glob.glob('test*')
# Single character wildcard
glob.glob('file?.txt')
# ['file1.txt', 'file2.txt'] but not 'file10.txt'Recursive Patterns
import glob
# All .py files in subdirectories (Python 3.5+)
glob.glob('**/*.py', recursive=True)
# Everything recursively
glob.glob('**/*', recursive=True)
# Specific depth
glob.glob('*/*.py') # One level deep
glob.glob('*/*/*.py') # Two levels deepiglob: Iterator Version
import glob
# Returns iterator (memory efficient for many files)
for filepath in glob.iglob('**/*.log', recursive=True):
print(filepath)
# glob() returns list (loads all into memory)
files = glob.glob('**/*.log', recursive=True)Character Classes
import glob
# Match specific characters
glob.glob('[abc].txt')
# ['a.txt', 'b.txt', 'c.txt']
# Match range
glob.glob('[0-9].txt')
# ['1.txt', '2.txt', ...]
# Exclude characters
glob.glob('[!0-9]*.txt')
# Files not starting with digitsHidden Files
import glob
# By default, doesn't match hidden files (starting with .)
glob.glob('*') # Skips .gitignore, .env, etc.
# Include hidden files (Python 3.11+)
glob.glob('*', include_hidden=True)
# Or match explicitly
glob.glob('.*') # Only hidden files
glob.glob('*') + glob.glob('.*') # All filesAbsolute Paths
import glob
from pathlib import Path
# Relative paths (default)
glob.glob('*.txt')
# ['file.txt', 'notes.txt']
# Absolute paths
glob.glob('/home/user/docs/*.txt')
# Using Path for cross-platform
base = Path.home() / 'Documents'
glob.glob(str(base / '*.pdf'))pathlib Integration
from pathlib import Path
# Path.glob() method
p = Path('.')
for txt in p.glob('*.txt'):
print(txt)
# Recursive
for py in p.glob('**/*.py'):
print(py)
# rglob = recursive glob
for py in p.rglob('*.py'):
print(py)Practical Examples
Find and Process Files
import glob
for filepath in glob.iglob('data/**/*.csv', recursive=True):
print(f"Processing {filepath}")
# process_csv(filepath)Cleanup Old Logs
import glob
import os
from datetime import datetime, timedelta
def cleanup_old_logs(days=30):
cutoff = datetime.now() - timedelta(days=days)
for log in glob.iglob('logs/**/*.log', recursive=True):
mtime = datetime.fromtimestamp(os.path.getmtime(log))
if mtime < cutoff:
os.remove(log)
print(f"Deleted: {log}")Collect File Stats
import glob
import os
from pathlib import Path
def get_project_stats(pattern='**/*.py'):
stats = {'files': 0, 'lines': 0, 'bytes': 0}
for filepath in glob.iglob(pattern, recursive=True):
stats['files'] += 1
stats['bytes'] += os.path.getsize(filepath)
with open(filepath) as f:
stats['lines'] += sum(1 for _ in f)
return stats
print(get_project_stats())Multi-Pattern Matching
import glob
from itertools import chain
def multi_glob(*patterns):
"""Match multiple patterns."""
return list(chain.from_iterable(
glob.iglob(p, recursive=True) for p in patterns
))
# Find all source files
sources = multi_glob('**/*.py', '**/*.js', '**/*.ts')Exclude Patterns
import glob
import fnmatch
def glob_exclude(pattern, exclude_patterns):
"""Glob with exclusions."""
for path in glob.iglob(pattern, recursive=True):
if not any(fnmatch.fnmatch(path, ex) for ex in exclude_patterns):
yield path
# All Python files except tests
for f in glob_exclude('**/*.py', ['**/test_*.py', '**/*_test.py']):
print(f)Pattern Reference
| Pattern | Matches |
|---|---|
* | Any characters except / |
** | Any characters including / (recursive) |
? | Single character |
[abc] | a, b, or c |
[0-9] | Digit |
[!abc] | Not a, b, or c |
glob vs os.walk vs pathlib
# glob: pattern matching
glob.glob('**/*.py', recursive=True)
# os.walk: full directory traversal with control
for root, dirs, files in os.walk('.'):
dirs[:] = [d for d in dirs if not d.startswith('.')] # Skip hidden
for f in files:
if f.endswith('.py'):
print(os.path.join(root, f))
# pathlib: object-oriented
Path('.').rglob('*.py')Quick Reference
import glob
# Find files
glob.glob(pattern) # Returns list
glob.iglob(pattern) # Returns iterator
# Recursive (Python 3.5+)
glob.glob('**/*.py', recursive=True)
# Options (Python 3.11+)
glob.glob('*', include_hidden=True)
glob.glob('*', root_dir='/path')
# pathlib alternative
from pathlib import Path
Path('.').glob('*.txt')
Path('.').rglob('*.txt') # Recursiveglob is the quick way to find files by pattern. Use it when os.listdir() isn't enough but os.walk() is overkill.
React to this post: