Beyond json.loads() and json.dumps(), Python's json module supports custom encoders, streaming, and advanced patterns.
Custom Encoders
import json
from datetime import datetime, date
from decimal import Decimal
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, date):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, set):
return list(obj)
if hasattr(obj, '__dict__'):
return obj.__dict__
return super().default(obj)
data = {
'timestamp': datetime.now(),
'price': Decimal('19.99'),
'tags': {'a', 'b', 'c'}
}
json.dumps(data, cls=CustomEncoder)Custom Decoders
import json
from datetime import datetime
def custom_decoder(dct):
for key, value in dct.items():
if isinstance(value, str):
# Try parsing ISO dates
try:
dct[key] = datetime.fromisoformat(value)
except ValueError:
pass
return dct
text = '{"created": "2026-03-21T10:30:00"}'
data = json.loads(text, object_hook=custom_decoder)
print(data['created']) # datetime objectDataclass Serialization
import json
from dataclasses import dataclass, asdict
from typing import List
@dataclass
class User:
name: str
email: str
tags: List[str]
user = User("Alice", "alice@example.com", ["admin", "active"])
# Serialize
json_str = json.dumps(asdict(user))
# Deserialize
data = json.loads(json_str)
user = User(**data)Streaming Large Files
import json
# Write streaming NDJSON (newline-delimited)
def write_ndjson(items, filepath):
with open(filepath, 'w') as f:
for item in items:
f.write(json.dumps(item) + '\n')
# Read streaming
def read_ndjson(filepath):
with open(filepath) as f:
for line in f:
yield json.loads(line)
# Process without loading entire file
for record in read_ndjson('large.ndjson'):
process(record)Incremental Parsing
import json
# Parse incomplete JSON streams
decoder = json.JSONDecoder()
buffer = ''
for chunk in stream:
buffer += chunk
while buffer:
try:
obj, idx = decoder.raw_decode(buffer)
yield obj
buffer = buffer[idx:].lstrip()
except json.JSONDecodeError:
break # Need more dataPretty Printing
import json
data = {'users': [{'name': 'Alice', 'age': 30}, {'name': 'Bob', 'age': 25}]}
# Indented
print(json.dumps(data, indent=2))
# Sorted keys
print(json.dumps(data, indent=2, sort_keys=True))
# Compact separators
print(json.dumps(data, separators=(',', ':'))) # No spaces
# Custom separators
print(json.dumps(data, indent=2, separators=(', ', ': ')))Handling Special Values
import json
import math
# NaN and Infinity (not valid JSON)
data = {'value': float('nan')}
# Default: raises error
# json.dumps(data) # ValueError
# Allow non-standard values
json.dumps(data, allow_nan=True) # '{"value": NaN}'
# Better: replace with null
class SafeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, float):
if math.isnan(obj) or math.isinf(obj):
return None
return super().default(obj)Ensure ASCII
import json
data = {'message': 'こんにちは'}
# Default: escape non-ASCII
json.dumps(data) # '{"message": "\\u3053\\u3093..."}'
# Preserve Unicode
json.dumps(data, ensure_ascii=False) # '{"message": "こんにちは"}'JSON Lines (JSONL)
import json
class JSONLWriter:
def __init__(self, filepath):
self.filepath = filepath
self.file = None
def __enter__(self):
self.file = open(self.filepath, 'w')
return self
def __exit__(self, *args):
self.file.close()
def write(self, obj):
self.file.write(json.dumps(obj) + '\n')
# Usage
with JSONLWriter('output.jsonl') as writer:
for record in records:
writer.write(record)Schema Validation
# Using jsonschema library
# pip install jsonschema
from jsonschema import validate, ValidationError
schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer", "minimum": 0},
"email": {"type": "string", "format": "email"}
},
"required": ["name", "email"]
}
def validate_user(data):
try:
validate(data, schema)
return True
except ValidationError as e:
print(f"Invalid: {e.message}")
return FalsePerformance: orjson
# pip install orjson
import orjson
# 3-10x faster than stdlib json
data = orjson.loads(json_bytes)
output = orjson.dumps(data)
# Native datetime support
from datetime import datetime
orjson.dumps({'time': datetime.now()})
# Options
orjson.dumps(data, option=orjson.OPT_INDENT_2)
orjson.dumps(data, option=orjson.OPT_SORT_KEYS)Merging JSON Objects
import json
def deep_merge(base, override):
"""Deep merge two dicts."""
result = base.copy()
for key, value in override.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
config_default = json.loads(open('default.json').read())
config_user = json.loads(open('user.json').read())
config = deep_merge(config_default, config_user)JSON Pointer
def get_by_pointer(obj, pointer):
"""Get value using JSON Pointer (RFC 6901)."""
if pointer == '':
return obj
parts = pointer.split('/')[1:] # Skip empty first element
for part in parts:
part = part.replace('~1', '/').replace('~0', '~')
if isinstance(obj, list):
obj = obj[int(part)]
else:
obj = obj[part]
return obj
data = {'users': [{'name': 'Alice'}]}
get_by_pointer(data, '/users/0/name') # 'Alice'Handling Circular References
import json
class CircularEncoder(json.JSONEncoder):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._seen = set()
def default(self, obj):
if id(obj) in self._seen:
return '[Circular]'
self._seen.add(id(obj))
if hasattr(obj, '__dict__'):
return obj.__dict__
return super().default(obj)Summary
Advanced JSON patterns:
- Custom encoders: Handle datetime, Decimal, custom classes
- Object hooks: Custom deserialization
- Streaming: NDJSON for large files
- Performance: Use orjson for speed-critical code
- Validation: jsonschema for type checking
- Pretty print: indent, sort_keys, separators
JSON is simple, but mastering these patterns handles real-world complexity.
React to this post: