The pickle module serializes Python objects to bytes and deserializes them back. Powerful but with important caveats.

Basic Usage

import pickle
 
data = {
    'name': 'Alice',
    'scores': [95, 87, 92],
    'active': True,
}
 
# Serialize to bytes
pickled = pickle.dumps(data)
 
# Deserialize back
restored = pickle.loads(pickled)
print(restored)  # {'name': 'Alice', 'scores': [95, 87, 92], 'active': True}

File Operations

import pickle
 
data = {'key': 'value'}
 
# Write to file
with open('data.pkl', 'wb') as f:
    pickle.dump(data, f)
 
# Read from file
with open('data.pkl', 'rb') as f:
    loaded = pickle.load(f)

Protocol Versions

import pickle
 
data = {'example': True}
 
# Specify protocol
pickled = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
 
# Check default protocol
print(pickle.DEFAULT_PROTOCOL)  # Usually 4 or 5
ProtocolPythonFeatures
0AllASCII, human-readable
1AllBinary, more efficient
22.3+New-style classes
33.0+Bytes objects
43.4+Large objects, more types
53.8+Out-of-band data, buffers

Use HIGHEST_PROTOCOL for best performance when Python version compatibility isn't needed.

What Can Be Pickled

import pickle
from dataclasses import dataclass
from datetime import datetime
 
@dataclass
class User:
    name: str
    created: datetime
 
# Works: most built-in types
pickle.dumps(42)
pickle.dumps("hello")
pickle.dumps([1, 2, 3])
pickle.dumps({'a': 1})
pickle.dumps(User("Alice", datetime.now()))
 
# Doesn't work: lambdas, generators, open files
# pickle.dumps(lambda x: x)  # Error
# pickle.dumps(open('file.txt'))  # Error

Custom Pickling

import pickle
 
class Connection:
    def __init__(self, host, port):
        self.host = host
        self.port = port
        self._socket = None  # Can't pickle sockets
    
    def __getstate__(self):
        # Return what to pickle
        state = self.__dict__.copy()
        del state['_socket']
        return state
    
    def __setstate__(self, state):
        # Restore from pickled state
        self.__dict__.update(state)
        self._socket = None  # Reinitialize
 
conn = Connection('localhost', 8080)
pickled = pickle.dumps(conn)
restored = pickle.loads(pickled)

⚠️ Security Warning

Never unpickle data from untrusted sources.

import pickle
 
# DANGEROUS: This can execute arbitrary code
# pickle.loads(untrusted_data)
 
# Pickle can run any Python code on load
class Evil:
    def __reduce__(self):
        import os
        return (os.system, ('echo HACKED',))
 
# This would execute the command when unpickled
evil_pickle = pickle.dumps(Evil())

Safe alternatives for untrusted data:

  • JSON for simple data
  • Protocol Buffers for structured data
  • MessagePack for efficient binary

Pickling Functions and Classes

import pickle
 
def greet(name):
    return f"Hello, {name}"
 
class MyClass:
    pass
 
# Functions and classes pickle by reference
pickled_func = pickle.dumps(greet)
pickled_class = pickle.dumps(MyClass)
 
# They must exist with same name when unpickling
restored_func = pickle.loads(pickled_func)
print(restored_func("World"))  # Hello, World

Handling Unpicklable Objects

import pickle
import io
 
class DataProcessor:
    def __init__(self, data):
        self.data = data
        self.file_handle = None  # Unpicklable
    
    def __reduce__(self):
        # Return (callable, args) to reconstruct
        return (self.__class__, (self.data,))
 
# Or use copyreg for external classes
import copyreg
 
def pickle_external(obj):
    return unpickle_external, (obj.data,)
 
def unpickle_external(data):
    return ExternalClass(data)
 
copyreg.pickle(ExternalClass, pickle_external)

Multiple Objects

import pickle
 
# Dump multiple objects
with open('data.pkl', 'wb') as f:
    pickle.dump(obj1, f)
    pickle.dump(obj2, f)
    pickle.dump(obj3, f)
 
# Load multiple objects
with open('data.pkl', 'rb') as f:
    loaded1 = pickle.load(f)
    loaded2 = pickle.load(f)
    loaded3 = pickle.load(f)

Practical Examples

Model Checkpointing

import pickle
 
class Model:
    def __init__(self):
        self.weights = []
        self.trained = False
    
    def save(self, path):
        with open(path, 'wb') as f:
            pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
    
    @classmethod
    def load(cls, path):
        with open(path, 'rb') as f:
            return pickle.load(f)
 
model = Model()
model.weights = [1.0, 2.0, 3.0]
model.trained = True
model.save('model.pkl')
 
loaded = Model.load('model.pkl')

Caching Computed Results

import pickle
import hashlib
from pathlib import Path
 
def cached(func):
    cache_dir = Path('.cache')
    cache_dir.mkdir(exist_ok=True)
    
    def wrapper(*args, **kwargs):
        key = hashlib.md5(
            pickle.dumps((func.__name__, args, kwargs))
        ).hexdigest()
        cache_path = cache_dir / f"{key}.pkl"
        
        if cache_path.exists():
            with open(cache_path, 'rb') as f:
                return pickle.load(f)
        
        result = func(*args, **kwargs)
        with open(cache_path, 'wb') as f:
            pickle.dump(result, f)
        return result
    
    return wrapper

pickle vs Alternatives

FormatPython ObjectsCross-LanguageHuman ReadableSecurity
pickle✓ All✗ Unsafe
JSONLimited✓ Safe
msgpackLimited✓ Safe
protobufSchema✓ Safe

Quick Reference

import pickle
 
# Serialize
pickle.dumps(obj)                    # To bytes
pickle.dump(obj, file)               # To file
 
# Deserialize
pickle.loads(bytes_data)             # From bytes
pickle.load(file)                    # From file
 
# Options
pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
pickle.DEFAULT_PROTOCOL
pickle.HIGHEST_PROTOCOL
 
# Custom pickling
def __getstate__(self): ...
def __setstate__(self, state): ...
def __reduce__(self): ...

pickle is Python's native serialization. Use it for trusted data when you need full Python object support. For untrusted data or cross-language needs, use JSON or other safe formats.

React to this post: