Python json Module Guide

JSON is everywhere in web development. Here's how to handle it effectively in Python.

Basic Usage

import json
 
# Parse JSON string
data = json.loads('{"name": "Alice", "age": 30}')
# {'name': 'Alice', 'age': 30}
 
# Convert to JSON string
json_str = json.dumps({"name": "Alice", "age": 30})
# '{"name": "Alice", "age": 30}'

File Operations

# Read from file
with open("data.json") as f:
    data = json.load(f)
 
# Write to file
with open("output.json", "w") as f:
    json.dump(data, f)

Pretty Printing

data = {"users": [{"name": "Alice"}, {"name": "Bob"}]}
 
# Indented output
json.dumps(data, indent=2)
# {
#   "users": [
#     {"name": "Alice"},
#     {"name": "Bob"}
#   ]
# }
 
# Sort keys
json.dumps(data, indent=2, sort_keys=True)
 
# Compact separators
json.dumps(data, separators=(",", ":"))  # No spaces

Type Mapping

Python	JSON
dict	object
list, tuple	array
str	string
int, float	number
True	true
False	false
None	null

Handling Non-Serializable Types

Default json.dumps fails on dates, sets, custom objects:

from datetime import datetime
 
json.dumps({"date": datetime.now()})
# TypeError: Object of type datetime is not JSON serializable

Using default function

def json_serializer(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    if isinstance(obj, set):
        return list(obj)
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
 
json.dumps(
    {"date": datetime.now(), "tags": {"a", "b"}},
    default=json_serializer
)

Custom Encoder Class

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return {"_type": "datetime", "value": obj.isoformat()}
        if isinstance(obj, set):
            return {"_type": "set", "value": list(obj)}
        return super().default(obj)
 
json.dumps(data, cls=CustomEncoder)

Custom Decoder

def object_hook(obj):
    if "_type" in obj:
        if obj["_type"] == "datetime":
            return datetime.fromisoformat(obj["value"])
        if obj["_type"] == "set":
            return set(obj["value"])
    return obj
 
json.loads(json_str, object_hook=object_hook)

Error Handling

try:
    data = json.loads(user_input)
except json.JSONDecodeError as e:
    print(f"Invalid JSON: {e.msg} at line {e.lineno}, column {e.colno}")

Working with APIs

import json
import urllib.request
 
# Fetch and parse
with urllib.request.urlopen("https://api.example.com/data") as response:
    data = json.load(response)
 
# With requests library
import requests
response = requests.get("https://api.example.com/data")
data = response.json()  # Built-in parsing

Streaming Large Files

For large JSON files, use ijson:

import ijson
 
# Stream array items
with open("large.json", "rb") as f:
    for item in ijson.items(f, "users.item"):
        process(item)

Or read line-delimited JSON (JSONL/NDJSON):

# Each line is a JSON object
with open("data.jsonl") as f:
    for line in f:
        obj = json.loads(line)
        process(obj)

Common Patterns

Config files

from pathlib import Path
 
def load_config(path: str = "config.json"):
    config_path = Path(path)
    if config_path.exists():
        return json.loads(config_path.read_text())
    return {}
 
def save_config(config: dict, path: str = "config.json"):
    Path(path).write_text(json.dumps(config, indent=2))

Safe parsing

def safe_parse(json_str: str, default=None):
    try:
        return json.loads(json_str)
    except json.JSONDecodeError:
        return default
 
data = safe_parse(user_input, default={})

Merge JSON files

def merge_json_files(*paths):
    result = {}
    for path in paths:
        with open(path) as f:
            result.update(json.load(f))
    return result

Validate structure

def validate_user(data: dict) -> bool:
    required = {"name", "email", "age"}
    return required.issubset(data.keys())
 
users = json.loads(json_str)
valid_users = [u for u in users if validate_user(u)]

Performance Tips

# orjson is faster (pip install orjson)
import orjson
 
# Serialize
orjson.dumps(data)  # Returns bytes
 
# Parse
orjson.loads(json_bytes)
 
# ujson is another fast alternative
import ujson
ujson.dumps(data)
ujson.loads(json_str)

Quick Reference

import json
 
# Parse
json.loads(string)      # String to dict
json.load(file)         # File to dict
 
# Serialize
json.dumps(data)        # Dict to string
json.dump(data, file)   # Dict to file
 
# Options
json.dumps(data, 
    indent=2,           # Pretty print
    sort_keys=True,     # Alphabetical keys
    default=func,       # Handle custom types
    ensure_ascii=False, # Allow unicode
    cls=CustomEncoder   # Custom encoder class
)
 
# Parse options
json.loads(string,
    object_hook=func    # Transform objects
)

The json module covers most use cases. For performance-critical applications, consider orjson or ujson.

React to this post:

#Basic Usage

#File Operations

#Pretty Printing

#Type Mapping

#Handling Non-Serializable Types

#Using default function

#Custom Encoder Class

#Custom Decoder

#Error Handling

#Working with APIs

#Streaming Large Files

#Common Patterns

#Config files

#Safe parsing

#Merge JSON files

#Validate structure

#Performance Tips

#Quick Reference

Keep Reading

Need help shipping fast?