JSON is everywhere. Here's how to handle it properly in Python.

Basic Operations

import json
 
# Python to JSON string
data = {"name": "Alice", "age": 30}
json_str = json.dumps(data)
print(json_str)  # '{"name": "Alice", "age": 30}'
 
# JSON string to Python
parsed = json.loads(json_str)
print(parsed["name"])  # Alice
 
# Pretty printing
print(json.dumps(data, indent=2))

File Operations

import json
from pathlib import Path
 
# Write to file
with open("data.json", "w") as f:
    json.dump(data, f, indent=2)
 
# Read from file
with open("data.json") as f:
    loaded = json.load(f)
 
# Using pathlib
Path("data.json").write_text(json.dumps(data, indent=2))
loaded = json.loads(Path("data.json").read_text())

Formatting Options

import json
 
data = {"users": [{"name": "Alice"}, {"name": "Bob"}]}
 
# Compact (default)
json.dumps(data)
# '{"users": [{"name": "Alice"}, {"name": "Bob"}]}'
 
# Pretty with indentation
json.dumps(data, indent=2)
 
# Custom separators (most compact)
json.dumps(data, separators=(",", ":"))
# '{"users":[{"name":"Alice"},{"name":"Bob"}]}'
 
# Sorted keys (deterministic output)
json.dumps(data, sort_keys=True)
 
# Non-ASCII characters
json.dumps({"emoji": "🎉"}, ensure_ascii=False)
# '{"emoji": "🎉"}'
 
json.dumps({"emoji": "🎉"}, ensure_ascii=True)
# '{"emoji": "\\ud83c\\udf89"}'

Custom Encoders

import json
from datetime import datetime, date
from decimal import Decimal
from uuid import UUID
 
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        if isinstance(obj, date):
            return obj.isoformat()
        if isinstance(obj, Decimal):
            return str(obj)
        if isinstance(obj, UUID):
            return str(obj)
        if isinstance(obj, set):
            return list(obj)
        if isinstance(obj, bytes):
            return obj.decode("utf-8")
        return super().default(obj)
 
data = {
    "created": datetime.now(),
    "price": Decimal("19.99"),
    "tags": {"python", "json"},
}
 
json.dumps(data, cls=CustomEncoder)

Custom Decoders

import json
from datetime import datetime
 
def decode_datetime(dct):
    """Object hook to decode datetime strings."""
    for key, value in dct.items():
        if isinstance(value, str):
            try:
                dct[key] = datetime.fromisoformat(value)
            except ValueError:
                pass
    return dct
 
json_str = '{"created": "2024-01-15T10:30:00"}'
data = json.loads(json_str, object_hook=decode_datetime)
print(type(data["created"]))  # datetime.datetime

Dataclass Serialization

import json
from dataclasses import dataclass, asdict
 
@dataclass
class User:
    name: str
    email: str
    age: int
 
user = User("Alice", "alice@example.com", 30)
 
# Serialize
json.dumps(asdict(user))
 
# Deserialize
data = json.loads('{"name": "Bob", "email": "bob@example.com", "age": 25}')
user = User(**data)

Pydantic Integration

from pydantic import BaseModel
 
class User(BaseModel):
    name: str
    email: str
    age: int
 
user = User(name="Alice", email="alice@example.com", age=30)
 
# Serialize
user.model_dump_json()
 
# Deserialize
User.model_validate_json('{"name": "Bob", "email": "bob@example.com", "age": 25}')

Streaming JSON Lines (JSONL)

import json
 
# Write JSONL
def write_jsonl(path, records):
    with open(path, "w") as f:
        for record in records:
            f.write(json.dumps(record) + "\n")
 
# Read JSONL
def read_jsonl(path):
    with open(path) as f:
        for line in f:
            if line.strip():
                yield json.loads(line)
 
# Usage
records = [{"id": 1}, {"id": 2}, {"id": 3}]
write_jsonl("data.jsonl", records)
 
for record in read_jsonl("data.jsonl"):
    print(record)

Handling Large Files

import json
 
# Stream parsing with ijson (for very large files)
# pip install ijson
import ijson
 
def stream_large_json(path, prefix="item"):
    with open(path, "rb") as f:
        for item in ijson.items(f, prefix):
            yield item
 
# Incremental encoder
def stream_json_array(items, file):
    """Write items as JSON array without loading all in memory."""
    file.write("[\n")
    first = True
    for item in items:
        if not first:
            file.write(",\n")
        file.write(json.dumps(item))
        first = False
    file.write("\n]")

Error Handling

import json
 
def safe_parse(json_str, default=None):
    """Parse JSON with fallback."""
    try:
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"JSON error at line {e.lineno}, col {e.colno}: {e.msg}")
        return default
 
# Detailed error info
try:
    json.loads('{"broken": }')
except json.JSONDecodeError as e:
    print(f"Error: {e.msg}")
    print(f"Line: {e.lineno}, Column: {e.colno}")
    print(f"Document: {e.doc}")

Schema Validation

# Using jsonschema
# pip install jsonschema
from jsonschema import validate, ValidationError
 
schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer", "minimum": 0},
        "email": {"type": "string", "format": "email"},
    },
    "required": ["name", "age"],
}
 
def validate_user(data):
    try:
        validate(instance=data, schema=schema)
        return True
    except ValidationError as e:
        print(f"Validation error: {e.message}")
        return False

Common Patterns

import json
from pathlib import Path
 
# Config file pattern
def load_config(path: Path, defaults: dict = None) -> dict:
    """Load config with defaults."""
    config = defaults or {}
    if path.exists():
        config.update(json.loads(path.read_text()))
    return config
 
def save_config(path: Path, config: dict) -> None:
    """Save config atomically."""
    tmp = path.with_suffix(".tmp")
    tmp.write_text(json.dumps(config, indent=2))
    tmp.replace(path)
 
# Merge JSON files
def merge_json_files(*paths) -> dict:
    result = {}
    for path in paths:
        data = json.loads(Path(path).read_text())
        result.update(data)
    return result
 
# Deep merge
def deep_merge(base: dict, override: dict) -> dict:
    result = base.copy()
    for key, value in override.items():
        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
            result[key] = deep_merge(result[key], value)
        else:
            result[key] = value
    return result

API Response Pattern

import json
from typing import Any
 
def json_response(data: Any, status: str = "ok") -> str:
    """Standardized API response format."""
    return json.dumps({
        "status": status,
        "data": data,
    }, default=str)  # default=str handles most types
 
def json_error(message: str, code: str = "error") -> str:
    """Standardized error response."""
    return json.dumps({
        "status": "error",
        "error": {
            "code": code,
            "message": message,
        },
    })

Performance Tips

import json
 
# orjson is 10x faster (pip install orjson)
import orjson
 
# Serialize
orjson.dumps(data)  # Returns bytes
 
# Deserialize  
orjson.loads(json_bytes)
 
# ujson is also faster (pip install ujson)
import ujson
ujson.dumps(data)
ujson.loads(json_str)

Type-Safe Loading

import json
from typing import TypedDict
 
class UserData(TypedDict):
    name: str
    email: str
    age: int
 
def load_user(json_str: str) -> UserData:
    """Load and validate user data."""
    data = json.loads(json_str)
    
    # Basic validation
    required = {"name", "email", "age"}
    if not required.issubset(data.keys()):
        raise ValueError(f"Missing fields: {required - data.keys()}")
    
    return data

JSON handling is fundamental. Use the stdlib for simple cases, add validation for production, and reach for orjson when performance matters.

React to this post: