JSON is everywhere. Here's how to handle it properly in Python.
Basic Operations
import json
# Parse JSON string
data = json.loads('{"name": "Owen", "age": 25}')
# Convert to JSON string
text = json.dumps({"name": "Owen", "age": 25})
# Read from file
with open("data.json") as f:
data = json.load(f)
# Write to file
with open("data.json", "w") as f:
json.dump(data, f)Pretty Printing
# Readable output
print(json.dumps(data, indent=2))
# Sorted keys for consistency
print(json.dumps(data, indent=2, sort_keys=True))
# Output:
# {
# "age": 25,
# "name": "Owen"
# }Handling Types
JSON only supports: strings, numbers, booleans, null, arrays, objects.
from datetime import datetime
from decimal import Decimal
# This fails
json.dumps({"date": datetime.now()}) # TypeError
# Custom encoder
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
return super().default(obj)
json.dumps({"date": datetime.now()}, cls=CustomEncoder)
# '{"date": "2024-03-21T10:30:00"}'Validation with Pydantic
For serious JSON work, use Pydantic:
from pydantic import BaseModel, EmailStr
from typing import Optional
class User(BaseModel):
name: str
email: EmailStr
age: Optional[int] = None
# Parse and validate
user = User.model_validate_json('{"name": "Owen", "email": "owen@example.com"}')
# Access fields
print(user.name) # "Owen"
# Serialize
print(user.model_dump_json())
# '{"name":"Owen","email":"owen@example.com","age":null}'Pydantic validates types, handles optional fields, and provides clear errors.
Dataclasses
For simpler cases:
from dataclasses import dataclass, asdict
import json
@dataclass
class User:
name: str
email: str
age: int | None = None
# Parse
data = json.loads('{"name": "Owen", "email": "owen@example.com"}')
user = User(**data)
# Serialize
json.dumps(asdict(user))No validation, but simple and built-in.
Common Patterns
Safe parsing
def parse_json(text: str) -> dict | None:
try:
return json.loads(text)
except json.JSONDecodeError:
return None
# Usage
data = parse_json(maybe_json)
if data is None:
print("Invalid JSON")Nested access
# Risky: raises KeyError
value = data["user"]["address"]["city"]
# Safe: returns None
def get_nested(data: dict, *keys, default=None):
for key in keys:
if isinstance(data, dict):
data = data.get(key)
else:
return default
return data if data is not None else default
city = get_nested(data, "user", "address", "city", default="Unknown")Merging JSON
# Shallow merge
merged = {**dict1, **dict2}
# Deep merge (Python 3.9+)
import copy
def deep_merge(base: dict, updates: dict) -> dict:
result = copy.deepcopy(base)
for key, value in updates.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = copy.deepcopy(value)
return resultPerformance Tips
Use orjson for speed:
import orjson
# 3-10x faster than stdlib
data = orjson.loads(json_bytes)
output = orjson.dumps(data)Stream large files:
import ijson
# Parse huge files without loading into memory
with open("huge.json", "rb") as f:
for item in ijson.items(f, "items.item"):
process(item)Error Handling
def load_json_file(path: str) -> dict:
try:
with open(path) as f:
return json.load(f)
except FileNotFoundError:
raise ValueError(f"File not found: {path}")
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in {path}: {e}")
except PermissionError:
raise ValueError(f"Cannot read file: {path}")My Recommendations
- Use Pydantic for API data — validation saves debugging time
- Use dataclasses for internal data — simple and fast
- Use orjson for performance — drop-in replacement
- Always handle errors — JSON from external sources can be anything
- Pretty print for debugging —
indent=2is your friend
JSON is simple. Keep your handling simple too.
React to this post: