JSON is everywhere. Here's how to handle it properly in Python.
Basic Operations
import json
# Python to JSON string
data = {"name": "Alice", "age": 30}
json_str = json.dumps(data)
print(json_str) # '{"name": "Alice", "age": 30}'
# JSON string to Python
parsed = json.loads(json_str)
print(parsed["name"]) # Alice
# Pretty printing
print(json.dumps(data, indent=2))File Operations
import json
from pathlib import Path
# Write to file
with open("data.json", "w") as f:
json.dump(data, f, indent=2)
# Read from file
with open("data.json") as f:
loaded = json.load(f)
# Using pathlib
Path("data.json").write_text(json.dumps(data, indent=2))
loaded = json.loads(Path("data.json").read_text())Formatting Options
import json
data = {"users": [{"name": "Alice"}, {"name": "Bob"}]}
# Compact (default)
json.dumps(data)
# '{"users": [{"name": "Alice"}, {"name": "Bob"}]}'
# Pretty with indentation
json.dumps(data, indent=2)
# Custom separators (most compact)
json.dumps(data, separators=(",", ":"))
# '{"users":[{"name":"Alice"},{"name":"Bob"}]}'
# Sorted keys (deterministic output)
json.dumps(data, sort_keys=True)
# Non-ASCII characters
json.dumps({"emoji": "🎉"}, ensure_ascii=False)
# '{"emoji": "🎉"}'
json.dumps({"emoji": "🎉"}, ensure_ascii=True)
# '{"emoji": "\\ud83c\\udf89"}'Custom Encoders
import json
from datetime import datetime, date
from decimal import Decimal
from uuid import UUID
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, date):
return obj.isoformat()
if isinstance(obj, Decimal):
return str(obj)
if isinstance(obj, UUID):
return str(obj)
if isinstance(obj, set):
return list(obj)
if isinstance(obj, bytes):
return obj.decode("utf-8")
return super().default(obj)
data = {
"created": datetime.now(),
"price": Decimal("19.99"),
"tags": {"python", "json"},
}
json.dumps(data, cls=CustomEncoder)Custom Decoders
import json
from datetime import datetime
def decode_datetime(dct):
"""Object hook to decode datetime strings."""
for key, value in dct.items():
if isinstance(value, str):
try:
dct[key] = datetime.fromisoformat(value)
except ValueError:
pass
return dct
json_str = '{"created": "2024-01-15T10:30:00"}'
data = json.loads(json_str, object_hook=decode_datetime)
print(type(data["created"])) # datetime.datetimeDataclass Serialization
import json
from dataclasses import dataclass, asdict
@dataclass
class User:
name: str
email: str
age: int
user = User("Alice", "alice@example.com", 30)
# Serialize
json.dumps(asdict(user))
# Deserialize
data = json.loads('{"name": "Bob", "email": "bob@example.com", "age": 25}')
user = User(**data)Pydantic Integration
from pydantic import BaseModel
class User(BaseModel):
name: str
email: str
age: int
user = User(name="Alice", email="alice@example.com", age=30)
# Serialize
user.model_dump_json()
# Deserialize
User.model_validate_json('{"name": "Bob", "email": "bob@example.com", "age": 25}')Streaming JSON Lines (JSONL)
import json
# Write JSONL
def write_jsonl(path, records):
with open(path, "w") as f:
for record in records:
f.write(json.dumps(record) + "\n")
# Read JSONL
def read_jsonl(path):
with open(path) as f:
for line in f:
if line.strip():
yield json.loads(line)
# Usage
records = [{"id": 1}, {"id": 2}, {"id": 3}]
write_jsonl("data.jsonl", records)
for record in read_jsonl("data.jsonl"):
print(record)Handling Large Files
import json
# Stream parsing with ijson (for very large files)
# pip install ijson
import ijson
def stream_large_json(path, prefix="item"):
with open(path, "rb") as f:
for item in ijson.items(f, prefix):
yield item
# Incremental encoder
def stream_json_array(items, file):
"""Write items as JSON array without loading all in memory."""
file.write("[\n")
first = True
for item in items:
if not first:
file.write(",\n")
file.write(json.dumps(item))
first = False
file.write("\n]")Error Handling
import json
def safe_parse(json_str, default=None):
"""Parse JSON with fallback."""
try:
return json.loads(json_str)
except json.JSONDecodeError as e:
print(f"JSON error at line {e.lineno}, col {e.colno}: {e.msg}")
return default
# Detailed error info
try:
json.loads('{"broken": }')
except json.JSONDecodeError as e:
print(f"Error: {e.msg}")
print(f"Line: {e.lineno}, Column: {e.colno}")
print(f"Document: {e.doc}")Schema Validation
# Using jsonschema
# pip install jsonschema
from jsonschema import validate, ValidationError
schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer", "minimum": 0},
"email": {"type": "string", "format": "email"},
},
"required": ["name", "age"],
}
def validate_user(data):
try:
validate(instance=data, schema=schema)
return True
except ValidationError as e:
print(f"Validation error: {e.message}")
return FalseCommon Patterns
import json
from pathlib import Path
# Config file pattern
def load_config(path: Path, defaults: dict = None) -> dict:
"""Load config with defaults."""
config = defaults or {}
if path.exists():
config.update(json.loads(path.read_text()))
return config
def save_config(path: Path, config: dict) -> None:
"""Save config atomically."""
tmp = path.with_suffix(".tmp")
tmp.write_text(json.dumps(config, indent=2))
tmp.replace(path)
# Merge JSON files
def merge_json_files(*paths) -> dict:
result = {}
for path in paths:
data = json.loads(Path(path).read_text())
result.update(data)
return result
# Deep merge
def deep_merge(base: dict, override: dict) -> dict:
result = base.copy()
for key, value in override.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return resultAPI Response Pattern
import json
from typing import Any
def json_response(data: Any, status: str = "ok") -> str:
"""Standardized API response format."""
return json.dumps({
"status": status,
"data": data,
}, default=str) # default=str handles most types
def json_error(message: str, code: str = "error") -> str:
"""Standardized error response."""
return json.dumps({
"status": "error",
"error": {
"code": code,
"message": message,
},
})Performance Tips
import json
# orjson is 10x faster (pip install orjson)
import orjson
# Serialize
orjson.dumps(data) # Returns bytes
# Deserialize
orjson.loads(json_bytes)
# ujson is also faster (pip install ujson)
import ujson
ujson.dumps(data)
ujson.loads(json_str)Type-Safe Loading
import json
from typing import TypedDict
class UserData(TypedDict):
name: str
email: str
age: int
def load_user(json_str: str) -> UserData:
"""Load and validate user data."""
data = json.loads(json_str)
# Basic validation
required = {"name", "email", "age"}
if not required.issubset(data.keys()):
raise ValueError(f"Missing fields: {required - data.keys()}")
return dataJSON handling is fundamental. Use the stdlib for simple cases, add validation for production, and reach for orjson when performance matters.
React to this post: