The pickle module serializes Python objects to bytes and deserializes them back. Powerful but with important caveats.
Basic Usage
import pickle
data = {
'name': 'Alice',
'scores': [95, 87, 92],
'active': True,
}
# Serialize to bytes
pickled = pickle.dumps(data)
# Deserialize back
restored = pickle.loads(pickled)
print(restored) # {'name': 'Alice', 'scores': [95, 87, 92], 'active': True}File Operations
import pickle
data = {'key': 'value'}
# Write to file
with open('data.pkl', 'wb') as f:
pickle.dump(data, f)
# Read from file
with open('data.pkl', 'rb') as f:
loaded = pickle.load(f)Protocol Versions
import pickle
data = {'example': True}
# Specify protocol
pickled = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
# Check default protocol
print(pickle.DEFAULT_PROTOCOL) # Usually 4 or 5| Protocol | Python | Features |
|---|---|---|
| 0 | All | ASCII, human-readable |
| 1 | All | Binary, more efficient |
| 2 | 2.3+ | New-style classes |
| 3 | 3.0+ | Bytes objects |
| 4 | 3.4+ | Large objects, more types |
| 5 | 3.8+ | Out-of-band data, buffers |
Use HIGHEST_PROTOCOL for best performance when Python version compatibility isn't needed.
What Can Be Pickled
import pickle
from dataclasses import dataclass
from datetime import datetime
@dataclass
class User:
name: str
created: datetime
# Works: most built-in types
pickle.dumps(42)
pickle.dumps("hello")
pickle.dumps([1, 2, 3])
pickle.dumps({'a': 1})
pickle.dumps(User("Alice", datetime.now()))
# Doesn't work: lambdas, generators, open files
# pickle.dumps(lambda x: x) # Error
# pickle.dumps(open('file.txt')) # ErrorCustom Pickling
import pickle
class Connection:
def __init__(self, host, port):
self.host = host
self.port = port
self._socket = None # Can't pickle sockets
def __getstate__(self):
# Return what to pickle
state = self.__dict__.copy()
del state['_socket']
return state
def __setstate__(self, state):
# Restore from pickled state
self.__dict__.update(state)
self._socket = None # Reinitialize
conn = Connection('localhost', 8080)
pickled = pickle.dumps(conn)
restored = pickle.loads(pickled)⚠️ Security Warning
Never unpickle data from untrusted sources.
import pickle
# DANGEROUS: This can execute arbitrary code
# pickle.loads(untrusted_data)
# Pickle can run any Python code on load
class Evil:
def __reduce__(self):
import os
return (os.system, ('echo HACKED',))
# This would execute the command when unpickled
evil_pickle = pickle.dumps(Evil())Safe alternatives for untrusted data:
- JSON for simple data
- Protocol Buffers for structured data
- MessagePack for efficient binary
Pickling Functions and Classes
import pickle
def greet(name):
return f"Hello, {name}"
class MyClass:
pass
# Functions and classes pickle by reference
pickled_func = pickle.dumps(greet)
pickled_class = pickle.dumps(MyClass)
# They must exist with same name when unpickling
restored_func = pickle.loads(pickled_func)
print(restored_func("World")) # Hello, WorldHandling Unpicklable Objects
import pickle
import io
class DataProcessor:
def __init__(self, data):
self.data = data
self.file_handle = None # Unpicklable
def __reduce__(self):
# Return (callable, args) to reconstruct
return (self.__class__, (self.data,))
# Or use copyreg for external classes
import copyreg
def pickle_external(obj):
return unpickle_external, (obj.data,)
def unpickle_external(data):
return ExternalClass(data)
copyreg.pickle(ExternalClass, pickle_external)Multiple Objects
import pickle
# Dump multiple objects
with open('data.pkl', 'wb') as f:
pickle.dump(obj1, f)
pickle.dump(obj2, f)
pickle.dump(obj3, f)
# Load multiple objects
with open('data.pkl', 'rb') as f:
loaded1 = pickle.load(f)
loaded2 = pickle.load(f)
loaded3 = pickle.load(f)Practical Examples
Model Checkpointing
import pickle
class Model:
def __init__(self):
self.weights = []
self.trained = False
def save(self, path):
with open(path, 'wb') as f:
pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
@classmethod
def load(cls, path):
with open(path, 'rb') as f:
return pickle.load(f)
model = Model()
model.weights = [1.0, 2.0, 3.0]
model.trained = True
model.save('model.pkl')
loaded = Model.load('model.pkl')Caching Computed Results
import pickle
import hashlib
from pathlib import Path
def cached(func):
cache_dir = Path('.cache')
cache_dir.mkdir(exist_ok=True)
def wrapper(*args, **kwargs):
key = hashlib.md5(
pickle.dumps((func.__name__, args, kwargs))
).hexdigest()
cache_path = cache_dir / f"{key}.pkl"
if cache_path.exists():
with open(cache_path, 'rb') as f:
return pickle.load(f)
result = func(*args, **kwargs)
with open(cache_path, 'wb') as f:
pickle.dump(result, f)
return result
return wrapperpickle vs Alternatives
| Format | Python Objects | Cross-Language | Human Readable | Security |
|---|---|---|---|---|
| pickle | ✓ All | ✗ | ✗ | ✗ Unsafe |
| JSON | Limited | ✓ | ✓ | ✓ Safe |
| msgpack | Limited | ✓ | ✗ | ✓ Safe |
| protobuf | Schema | ✓ | ✗ | ✓ Safe |
Quick Reference
import pickle
# Serialize
pickle.dumps(obj) # To bytes
pickle.dump(obj, file) # To file
# Deserialize
pickle.loads(bytes_data) # From bytes
pickle.load(file) # From file
# Options
pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
pickle.DEFAULT_PROTOCOL
pickle.HIGHEST_PROTOCOL
# Custom pickling
def __getstate__(self): ...
def __setstate__(self, state): ...
def __reduce__(self): ...pickle is Python's native serialization. Use it for trusted data when you need full Python object support. For untrusted data or cross-language needs, use JSON or other safe formats.
React to this post: