Python uses reference counting plus cyclic garbage collection. The gc module gives you control over the collector and tools to debug memory issues.
Basic gc Operations
import gc
# Force garbage collection
collected = gc.collect()
print(f"Collected {collected} objects")
# Check if enabled
print(gc.isenabled()) # True
# Disable/enable
gc.disable()
gc.enable()Understanding Generations
import gc
# Python uses generational GC
# Generation 0: New objects
# Generation 1: Survived one collection
# Generation 2: Long-lived objects
# Get thresholds
print(gc.get_threshold()) # (700, 10, 10)
# Collect gen 0 after 700 allocations
# Collect gen 1 after 10 gen-0 collections
# Collect gen 2 after 10 gen-1 collections
# Set custom thresholds
gc.set_threshold(1000, 15, 15)
# Get counts per generation
print(gc.get_count()) # (123, 5, 2)Finding Circular References
import gc
class Node:
def __init__(self, name):
self.name = name
self.ref = None
# Create circular reference
a = Node('a')
b = Node('b')
a.ref = b
b.ref = a # Circular!
# Delete references
del a, b
# Objects still exist due to cycle
gc.collect()
# Find uncollectable objects (with __del__)
print(gc.garbage) # Objects that couldn't be collectedDebugging Memory Leaks
import gc
# Enable debug flags
gc.set_debug(gc.DEBUG_LEAK) # Report uncollectable objects
# Other debug flags
gc.set_debug(gc.DEBUG_STATS) # Print collection stats
gc.set_debug(gc.DEBUG_COLLECTABLE) # Report collectable objects
gc.set_debug(gc.DEBUG_UNCOLLECTABLE) # Report uncollectable
gc.set_debug(gc.DEBUG_SAVEALL) # Save all to gc.garbage
# Combine flags
gc.set_debug(gc.DEBUG_LEAK | gc.DEBUG_STATS)
# Disable debug
gc.set_debug(0)Tracking Objects
import gc
# Get all objects tracked by GC
all_objects = gc.get_objects()
print(f"Tracked objects: {len(all_objects)}")
# Filter by type
import types
functions = [obj for obj in gc.get_objects()
if isinstance(obj, types.FunctionType)]
# Find objects of specific type
class MyClass:
pass
my_instances = [obj for obj in gc.get_objects()
if isinstance(obj, MyClass)]
print(f"MyClass instances: {len(my_instances)}")Reference Analysis
import gc
class Example:
pass
obj = Example()
# What references this object?
referrers = gc.get_referrers(obj)
print(f"Referrers: {len(referrers)}")
# What does this object reference?
referents = gc.get_referents(obj)
print(f"Referents: {len(referents)}")
# Find reference chains
def find_references(obj, depth=3):
if depth == 0:
return
for ref in gc.get_referrers(obj):
if not isinstance(ref, dict): # Skip frame dicts
print(f"Referenced by: {type(ref)}")
find_references(ref, depth - 1)Freeze for Performance
import gc
# After initialization, freeze long-lived objects
# They won't be checked during collection
gc.freeze()
# Get count of frozen objects
print(gc.get_freeze_count())
# Unfreeze
gc.unfreeze()Callbacks
import gc
def gc_callback(phase, info):
if phase == 'start':
print(f"GC starting: generation {info['generation']}")
elif phase == 'stop':
print(f"GC complete: collected {info['collected']}")
# Register callback
gc.callbacks.append(gc_callback)
# Trigger collection
gc.collect()
# Remove callback
gc.callbacks.remove(gc_callback)Memory Leak Detection
import gc
import sys
def detect_leaks(func, iterations=10):
"""Detect memory leaks by running function multiple times."""
gc.collect()
baseline = len(gc.get_objects())
for i in range(iterations):
func()
gc.collect()
final = len(gc.get_objects())
leaked = final - baseline
if leaked > 0:
print(f"Potential leak: {leaked} new objects")
return True
return False
# Usage
def potentially_leaky():
data = [1, 2, 3]
# Forgot to clean up somehow...
detect_leaks(potentially_leaky)Weak References vs Strong
import gc
import weakref
class Resource:
def __init__(self, name):
self.name = name
# Strong reference keeps object alive
strong = Resource("strong")
# Weak reference doesn't
resource = Resource("weak")
weak = weakref.ref(resource)
del resource
gc.collect()
print(weak()) # None - object was collected
print(strong) # Still existsdel and GC Issues
import gc
class BadClass:
def __init__(self, other=None):
self.other = other
def __del__(self):
print(f"Deleting {self}")
# Objects with __del__ in cycles were problematic (pre-3.4)
# Now they're collected but order is undefined
a = BadClass()
b = BadClass(a)
a.other = b
del a, b
gc.collect() # Both collected, __del__ calledOptimizing GC
import gc
# Disable during performance-critical sections
gc.disable()
try:
# Performance-critical code
result = process_large_dataset()
finally:
gc.enable()
gc.collect()
# Adjust thresholds for your workload
# Higher = less frequent, more pause time
# Lower = more frequent, less pause time
gc.set_threshold(50000, 500, 100) # For long-running processStatistics
import gc
# Get collection statistics
stats = gc.get_stats()
for i, gen in enumerate(stats):
print(f"Generation {i}:")
print(f" Collections: {gen['collections']}")
print(f" Collected: {gen['collected']}")
print(f" Uncollectable: {gen['uncollectable']}")Memory Profiling Helper
import gc
import sys
from collections import Counter
def memory_report():
gc.collect()
objects = gc.get_objects()
type_counts = Counter(type(obj).__name__ for obj in objects)
print(f"Total objects: {len(objects)}")
print("\nTop 10 types:")
for type_name, count in type_counts.most_common(10):
print(f" {type_name}: {count}")
# Size estimation (approximate)
total_size = sum(sys.getsizeof(obj) for obj in objects)
print(f"\nApproximate total size: {total_size / 1024 / 1024:.2f} MB")
memory_report()Common Issues
import gc
# Issue 1: Circular references with __del__
# Solution: Use weakref or context managers
# Issue 2: Growing memory
# Solution: Check gc.garbage for uncollectable
# Issue 3: GC pauses
# Solution: Tune thresholds or gc.disable() strategically
# Issue 4: Memory not released to OS
# Solution: Python keeps freed memory for reuse
# Use pymalloc debug or tracemalloc for trackingBest Practices
| Do | Don't |
|---|---|
| Use context managers | Rely on __del__ for cleanup |
| Use weakref for caches | Create circular references |
| Profile with gc.get_objects() | Disable GC in production |
| Tune thresholds for workload | Set arbitrary thresholds |
| gc.collect() after large operations | Call gc.collect() frequently |
The gc module reveals Python's memory management internals. Use it to debug leaks and optimize memory-intensive applications.
React to this post: