You know @dataclass basics. Here are the patterns that make them powerful.

Field Factories

Never use mutable defaults directly:

from dataclasses import dataclass, field
 
# BAD: shared list across instances
@dataclass
class BadConfig:
    items: list = []  # Error!
 
# GOOD: factory creates new list per instance
@dataclass
class Config:
    items: list = field(default_factory=list)
    metadata: dict = field(default_factory=dict)

Custom factories:

from dataclasses import dataclass, field
from datetime import datetime
 
def now() -> datetime:
    return datetime.now()
 
@dataclass
class Event:
    name: str
    created_at: datetime = field(default_factory=now)

post_init

Run code after __init__:

from dataclasses import dataclass
 
@dataclass
class Rectangle:
    width: float
    height: float
    area: float = field(init=False)  # Computed, not in __init__
    
    def __post_init__(self):
        self.area = self.width * self.height
 
r = Rectangle(3, 4)
print(r.area)  # 12.0

Validation:

@dataclass
class User:
    name: str
    age: int
    
    def __post_init__(self):
        if self.age < 0:
            raise ValueError("Age cannot be negative")
        if not self.name.strip():
            raise ValueError("Name cannot be empty")

InitVar: Init-Only Variables

Pass to __post_init__ but don't store:

from dataclasses import dataclass, field, InitVar
 
@dataclass
class Database:
    connection_string: InitVar[str]
    host: str = field(init=False)
    port: int = field(init=False)
    
    def __post_init__(self, connection_string: str):
        # Parse connection string
        host, port = connection_string.split(":")
        self.host = host
        self.port = int(port)
 
db = Database("localhost:5432")
print(db.host, db.port)  # localhost 5432
# db.connection_string  # AttributeError - not stored

Frozen: Immutable Dataclasses

from dataclasses import dataclass
 
@dataclass(frozen=True)
class Point:
    x: float
    y: float
 
p = Point(1, 2)
# p.x = 3  # FrozenInstanceError
 
# Hashable (can use in sets/dict keys)
points = {Point(0, 0), Point(1, 1)}

Slots: Memory Efficiency

Python 3.10+:

from dataclasses import dataclass
 
@dataclass(slots=True)
class Particle:
    x: float
    y: float
    velocity: float
 
# ~40% less memory per instance
# Slightly faster attribute access
# Cannot add arbitrary attributes

Field Options

from dataclasses import dataclass, field
 
@dataclass
class Record:
    # Normal field
    name: str
    
    # Default value
    count: int = 0
    
    # Not in __init__
    computed: str = field(init=False, default="")
    
    # Not in repr
    secret: str = field(repr=False, default="hidden")
    
    # Not in comparison
    cache: dict = field(compare=False, default_factory=dict)
    
    # Not hashed (for frozen classes)
    mutable_ref: list = field(hash=False, default_factory=list)

Inheritance

from dataclasses import dataclass
 
@dataclass
class Animal:
    name: str
    age: int
 
@dataclass
class Dog(Animal):
    breed: str
 
dog = Dog(name="Rex", age=5, breed="German Shepherd")

Gotcha: Parent fields with defaults before child fields without:

# BAD: TypeError
@dataclass
class Parent:
    name: str = "default"
 
@dataclass
class Child(Parent):
    age: int  # Non-default after default - Error!
 
# GOOD: Use field() or reorder
@dataclass
class Parent:
    name: str = "default"
 
@dataclass
class Child(Parent):
    age: int = 0  # Give default

KW Only (3.10+)

Force keyword arguments:

from dataclasses import dataclass
 
@dataclass(kw_only=True)
class Config:
    host: str
    port: int
    debug: bool
 
# Must use keywords
Config(host="localhost", port=8080, debug=True)
# Config("localhost", 8080, True)  # TypeError

Per-field:

from dataclasses import dataclass, field
 
@dataclass
class Request:
    method: str
    path: str
    headers: dict = field(kw_only=True, default_factory=dict)
    body: bytes = field(kw_only=True, default=b"")
 
Request("GET", "/api", headers={"Auth": "token"})

Comparison and Ordering

from dataclasses import dataclass
 
@dataclass(order=True)
class Version:
    major: int
    minor: int
    patch: int
 
v1 = Version(1, 0, 0)
v2 = Version(2, 0, 0)
print(v1 < v2)  # True
print(sorted([v2, v1]))  # [Version(1,0,0), Version(2,0,0)]

Custom sort key:

@dataclass(order=True)
class Task:
    sort_index: int = field(init=False, repr=False)
    priority: int
    name: str
    
    def __post_init__(self):
        self.sort_index = -self.priority  # Higher priority first

Replace (Functional Updates)

from dataclasses import dataclass, replace
 
@dataclass(frozen=True)
class User:
    name: str
    email: str
    active: bool
 
user = User("Alice", "alice@example.com", True)
updated = replace(user, active=False)
# New instance with one field changed

asdict and astuple

from dataclasses import dataclass, asdict, astuple
 
@dataclass
class Person:
    name: str
    age: int
 
p = Person("Alice", 30)
print(asdict(p))   # {'name': 'Alice', 'age': 30}
print(astuple(p))  # ('Alice', 30)

Pattern: Builder

from dataclasses import dataclass, field
 
@dataclass
class QueryBuilder:
    table: str
    columns: list = field(default_factory=lambda: ["*"])
    conditions: list = field(default_factory=list)
    limit: int | None = None
    
    def select(self, *cols) -> "QueryBuilder":
        return replace(self, columns=list(cols))
    
    def where(self, condition: str) -> "QueryBuilder":
        return replace(self, conditions=[*self.conditions, condition])
    
    def take(self, n: int) -> "QueryBuilder":
        return replace(self, limit=n)
    
    def build(self) -> str:
        sql = f"SELECT {', '.join(self.columns)} FROM {self.table}"
        if self.conditions:
            sql += f" WHERE {' AND '.join(self.conditions)}"
        if self.limit:
            sql += f" LIMIT {self.limit}"
        return sql
 
query = (QueryBuilder("users")
    .select("name", "email")
    .where("active = true")
    .take(10)
    .build())

Quick Reference

@dataclass(
    init=True,      # Generate __init__
    repr=True,      # Generate __repr__
    eq=True,        # Generate __eq__
    order=False,    # Generate comparison methods
    frozen=False,   # Make immutable
    slots=False,    # Use __slots__ (3.10+)
    kw_only=False,  # Keyword-only args (3.10+)
)
class MyClass:
    field: type = field(
        default=...,           # Default value
        default_factory=...,   # Factory for mutable defaults
        init=True,             # Include in __init__
        repr=True,             # Include in __repr__
        compare=True,          # Include in comparisons
        hash=None,             # Include in __hash__
        kw_only=False,         # Keyword-only (3.10+)
    )

Dataclasses eliminate boilerplate. These patterns eliminate the rest.

React to this post: