pickle serializes arbitrary Python objects to bytes. Powerful but dangerous—here's what you need to know.

Basic Usage

import pickle
 
# Serialize (dump)
data = {"name": "Alice", "scores": [95, 87, 92]}
serialized = pickle.dumps(data)
 
# Deserialize (load)
restored = pickle.loads(serialized)
print(restored)  # {'name': 'Alice', 'scores': [95, 87, 92]}

File Operations

import pickle
 
data = {"key": "value", "numbers": [1, 2, 3]}
 
# Write to file
with open("data.pkl", "wb") as f:
    pickle.dump(data, f)
 
# Read from file
with open("data.pkl", "rb") as f:
    loaded = pickle.load(f)

Protocols

import pickle
 
data = {"test": True}
 
# Protocol 0: ASCII, human-readable (slow)
pickle.dumps(data, protocol=0)
 
# Protocol 4: Python 3.4+ (default in 3.8+)
pickle.dumps(data, protocol=4)
 
# Protocol 5: Python 3.8+ (out-of-band data)
pickle.dumps(data, protocol=5)
 
# Highest available protocol
pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
 
# Check default protocol
print(pickle.DEFAULT_PROTOCOL)  # 4 or 5

What Can Be Pickled

import pickle
 
# These work:
pickle.dumps(None)
pickle.dumps(True)
pickle.dumps(42)
pickle.dumps(3.14)
pickle.dumps("hello")
pickle.dumps(b"bytes")
pickle.dumps([1, 2, 3])
pickle.dumps({"a": 1})
pickle.dumps((1, 2))
pickle.dumps({1, 2, 3})
 
# Classes defined at module level
class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
 
pickle.dumps(Point(1, 2))  # Works
 
# These don't work:
# pickle.dumps(lambda x: x)  # Can't pickle lambdas
# pickle.dumps(open("file.txt"))  # Can't pickle file handles

Custom Pickling

import pickle
 
class Connection:
    def __init__(self, host, port):
        self.host = host
        self.port = port
        self.socket = None  # Can't pickle sockets
    
    def __getstate__(self):
        # Return state to pickle (exclude socket)
        state = self.__dict__.copy()
        del state["socket"]
        return state
    
    def __setstate__(self, state):
        # Restore state from pickle
        self.__dict__.update(state)
        self.socket = None  # Reconnect later
 
conn = Connection("localhost", 8080)
data = pickle.dumps(conn)
restored = pickle.loads(data)

Reduce Protocol

import pickle
 
class DatabaseConnection:
    def __init__(self, url):
        self.url = url
        self._connect()
    
    def _connect(self):
        # Simulate connection
        self.connection = f"Connected to {self.url}"
    
    def __reduce__(self):
        # Return (callable, args) to reconstruct
        return (self.__class__, (self.url,))
 
conn = DatabaseConnection("postgres://localhost/db")
restored = pickle.loads(pickle.dumps(conn))
print(restored.connection)  # Reconnected

⚠️ Security Warning

import pickle
 
# NEVER unpickle untrusted data!
# Pickle can execute arbitrary code:
 
class Evil:
    def __reduce__(self):
        import os
        return (os.system, ("echo PWNED",))
 
# This would run the command:
# pickle.loads(pickle.dumps(Evil()))
 
# Safe alternatives for untrusted data:
# - JSON (json module)
# - MessagePack (msgpack)
# - Protocol Buffers

Restricting Unpickling

import pickle
import io
 
class RestrictedUnpickler(pickle.Unpickler):
    ALLOWED_CLASSES = {
        ("builtins", "dict"),
        ("builtins", "list"),
        ("builtins", "set"),
        ("builtins", "tuple"),
    }
    
    def find_class(self, module, name):
        if (module, name) in self.ALLOWED_CLASSES:
            return super().find_class(module, name)
        raise pickle.UnpicklingError(
            f"Class {module}.{name} not allowed"
        )
 
def safe_loads(data):
    return RestrictedUnpickler(io.BytesIO(data)).load()
 
# Only allows basic types
safe_loads(pickle.dumps([1, 2, 3]))  # OK
# safe_loads(pickle.dumps(SomeClass()))  # Raises error

Pickling with Slots

import pickle
 
class Point:
    __slots__ = ["x", "y"]
    
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getstate__(self):
        return {"x": self.x, "y": self.y}
    
    def __setstate__(self, state):
        self.x = state["x"]
        self.y = state["y"]
 
p = Point(1, 2)
restored = pickle.loads(pickle.dumps(p))

Persistent References

import pickle
import io
 
class Database:
    def __init__(self):
        self.objects = {}
    
    def store(self, obj_id, obj):
        self.objects[obj_id] = obj
    
    def fetch(self, obj_id):
        return self.objects[obj_id]
 
class DatabasePickler(pickle.Pickler):
    def __init__(self, file, db):
        super().__init__(file)
        self.db = db
    
    def persistent_id(self, obj):
        if hasattr(obj, "db_id"):
            return ("db", obj.db_id)
        return None
 
class DatabaseUnpickler(pickle.Unpickler):
    def __init__(self, file, db):
        super().__init__(file)
        self.db = db
    
    def persistent_load(self, pid):
        if pid[0] == "db":
            return self.db.fetch(pid[1])
        raise pickle.UnpicklingError(f"Unknown persistent id: {pid}")

Pickling Large Objects

import pickle
 
# For large numpy arrays, use protocol 5
import numpy as np
 
arr = np.zeros((10000, 10000))
 
# Protocol 5 with out-of-band buffers
buffers = []
data = pickle.dumps(arr, protocol=5, buffer_callback=buffers.append)
 
# Restore
restored = pickle.loads(data, buffers=buffers)

Common Patterns

import pickle
from pathlib import Path
 
def save_object(obj, path):
    """Save object to file."""
    Path(path).write_bytes(pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL))
 
def load_object(path):
    """Load object from file."""
    return pickle.loads(Path(path).read_bytes())
 
def clone(obj):
    """Deep copy via pickle."""
    return pickle.loads(pickle.dumps(obj))
 
# Cache decorator
def disk_cache(path):
    def decorator(func):
        def wrapper(*args, **kwargs):
            cache_file = Path(path)
            if cache_file.exists():
                return load_object(cache_file)
            result = func(*args, **kwargs)
            save_object(result, cache_file)
            return result
        return wrapper
    return decorator

Alternatives to Pickle

# JSON - safe, portable, text-based
import json
json.dumps({"key": "value"})
 
# MessagePack - fast, compact binary
import msgpack  # pip install msgpack
msgpack.packb({"key": "value"})
 
# Protocol Buffers - schema-based, cross-language
# Requires .proto definition and compilation
 
# Cloudpickle - extended pickle for lambdas
import cloudpickle  # pip install cloudpickle
cloudpickle.dumps(lambda x: x * 2)
 
# Dill - extended pickle for more types
import dill  # pip install dill
dill.dumps(lambda x: x * 2)

When to Use Pickle

Good for:

  • Caching computed results locally
  • Saving/loading ML models (with trusted sources)
  • Inter-process communication (same machine)
  • Development and debugging

Avoid for:

  • Network protocols (use JSON, protobuf)
  • Long-term storage (schema changes break it)
  • Untrusted data (security risk)
  • Cross-language systems (Python-only)

Best Practices

# Always use highest protocol for speed/size
pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
 
# Use binary mode for files
with open("data.pkl", "wb") as f:
    pickle.dump(obj, f)
 
# Never unpickle untrusted data
# If you must, use RestrictedUnpickler
 
# Document pickle format versions
# Objects pickled with one Python version
# may not unpickle with another
 
# Consider alternatives for production
# JSON for APIs, protobuf for cross-language

Pickle is convenient for internal use but not for untrusted data or long-term storage. Use JSON or protocol buffers when security or portability matter.

React to this post: