Copying objects in Python is trickier than it looks. The copy module provides tools to duplicate objects correctly, but choosing between shallow and deep copy matters.
The Problem
# Assignment doesn't copy!
original = [1, 2, [3, 4]]
reference = original
reference[0] = 100
print(original) # [100, 2, [3, 4]] - Both changed!
reference[2].append(5)
print(original) # [100, 2, [3, 4, 5]] - Still linked!Shallow Copy
Creates new outer object, but nested objects are shared:
import copy
original = [1, 2, [3, 4]]
shallow = copy.copy(original)
# Top level is independent
shallow[0] = 100
print(original) # [1, 2, [3, 4]] - Unchanged!
# Nested objects are shared
shallow[2].append(5)
print(original) # [1, 2, [3, 4, 5]] - Changed!Deep Copy
Recursively copies everything:
import copy
original = [1, 2, [3, 4]]
deep = copy.deepcopy(original)
# Completely independent
deep[0] = 100
deep[2].append(5)
print(original) # [1, 2, [3, 4]] - Unchanged!
print(deep) # [100, 2, [3, 4, 5]]Visual Comparison
import copy
data = {'items': [1, 2, 3], 'meta': {'count': 3}}
# Shallow: new dict, same nested objects
shallow = copy.copy(data)
print(data['items'] is shallow['items']) # True (same list!)
# Deep: new dict, new nested objects
deep = copy.deepcopy(data)
print(data['items'] is deep['items']) # False (different list)Built-in Shallow Copies
Many operations create shallow copies:
# List
original = [[1, 2], [3, 4]]
shallow1 = list(original)
shallow2 = original[:]
shallow3 = original.copy()
# All share nested lists
shallow1[0].append(5)
print(original) # [[1, 2, 5], [3, 4]]
# Dict
d = {'a': [1, 2]}
shallow_d = d.copy() # or dict(d)
# Set
s = {frozenset([1, 2])}
shallow_s = s.copy() # or set(s)Custom Copy Behavior
import copy
class Config:
def __init__(self, settings):
self.settings = settings
self._cache = {} # Don't copy this
def __copy__(self):
# Shallow copy, skip cache
new = Config.__new__(Config)
new.settings = self.settings
new._cache = {}
return new
def __deepcopy__(self, memo):
# Deep copy, skip cache
new = Config.__new__(Config)
new.settings = copy.deepcopy(self.settings, memo)
new._cache = {}
return new
config = Config({'debug': True, 'nested': {'a': 1}})
config._cache['key'] = 'value'
shallow = copy.copy(config)
print(shallow._cache) # {} (not copied)
print(shallow.settings is config.settings) # True
deep = copy.deepcopy(config)
print(deep.settings is config.settings) # FalseHandling Circular References
Deep copy handles circular references automatically:
import copy
# Create circular reference
a = [1, 2]
a.append(a) # a = [1, 2, [...]]
# Deepcopy handles it
b = copy.deepcopy(a)
print(b[2] is b) # True (circular ref preserved)
print(b is a) # False (it's a copy)The Memo Dictionary
import copy
class Node:
def __init__(self, value):
self.value = value
self.children = []
# Create tree with shared nodes
shared = Node("shared")
root = Node("root")
root.children = [shared, shared] # Same node twice
# Deep copy preserves identity
copied = copy.deepcopy(root)
print(copied.children[0] is copied.children[1]) # True (same copy)
print(copied.children[0] is shared) # False (different from original)Copying Dataclasses
import copy
from dataclasses import dataclass, field
@dataclass
class User:
name: str
tags: list = field(default_factory=list)
user = User("Alice", ["admin", "active"])
# Shallow copy
shallow = copy.copy(user)
shallow.tags.append("new")
print(user.tags) # ['admin', 'active', 'new'] - Shared!
# Deep copy
deep = copy.deepcopy(User("Bob", ["user"]))
# Completely independentWhen Deepcopy Fails
Some objects can't be copied:
import copy
# File handles
f = open('test.txt', 'w')
# copy.deepcopy(f) # TypeError
# Modules
import sys
# copy.deepcopy(sys) # TypeError
# Lambdas with closures (sometimes)
x = 10
func = lambda: x
# Usually works, but be carefulPerformance Considerations
import copy
import time
# Deep copy is slower
large_nested = {'level1': {'level2': {'level3': list(range(10000))}}}
start = time.perf_counter()
for _ in range(1000):
copy.copy(large_nested)
shallow_time = time.perf_counter() - start
start = time.perf_counter()
for _ in range(1000):
copy.deepcopy(large_nested)
deep_time = time.perf_counter() - start
print(f"Shallow: {shallow_time:.4f}s")
print(f"Deep: {deep_time:.4f}s")
# Deep is often 10-100x slowerPractical Patterns
Configuration Copies
import copy
DEFAULT_CONFIG = {
'debug': False,
'database': {'host': 'localhost', 'port': 5432}
}
def get_config(**overrides):
config = copy.deepcopy(DEFAULT_CONFIG)
config.update(overrides)
return config
# Each call gets independent copy
config1 = get_config(debug=True)
config2 = get_config()
config1['database']['port'] = 5433
print(DEFAULT_CONFIG['database']['port']) # 5432 (unchanged)Undo Stack
import copy
class Editor:
def __init__(self):
self.state = {'text': '', 'cursor': 0}
self._history = []
def save_state(self):
self._history.append(copy.deepcopy(self.state))
def undo(self):
if self._history:
self.state = self._history.pop()
def edit(self, text):
self.save_state()
self.state['text'] = textDecision Guide
| Scenario | Use |
|---|---|
| Simple flat structure | copy.copy() or .copy() |
| Nested mutable objects | copy.deepcopy() |
| Read-only data | No copy needed |
| Custom copy logic | __copy__ / __deepcopy__ |
| Performance critical | Profile first |
Rule of thumb: If your object contains mutable objects (lists, dicts, custom classes) that you plan to modify, use deepcopy. Otherwise, shallow copy is fine.
React to this post: