Base64 encodes binary data into ASCII text, making it safe for text-only protocols like JSON, email, and URLs.
Basic Encoding and Decoding
import base64
# Encode bytes to base64
data = b"Hello, World!"
encoded = base64.b64encode(data)
print(encoded) # b'SGVsbG8sIFdvcmxkIQ=='
# Decode back to bytes
decoded = base64.b64decode(encoded)
print(decoded) # b'Hello, World!'
# Work with strings
text = "Hello, World!"
encoded_str = base64.b64encode(text.encode()).decode()
print(encoded_str) # 'SGVsbG8sIFdvcmxkIQ=='URL-Safe Base64
Standard base64 uses + and / which aren't URL-safe:
import base64
data = b"binary data with special chars"
# Standard (may include + and /)
standard = base64.b64encode(data)
# URL-safe (uses - and _ instead)
url_safe = base64.urlsafe_b64encode(data)
print(url_safe.decode())
# Decode URL-safe
decoded = base64.urlsafe_b64decode(url_safe)Encoding Files
import base64
from pathlib import Path
def file_to_base64(path: str) -> str:
"""Read file and return base64 string."""
return base64.b64encode(Path(path).read_bytes()).decode()
def base64_to_file(data: str, path: str) -> None:
"""Write base64 data to file."""
Path(path).write_bytes(base64.b64decode(data))
# Usage
encoded = file_to_base64('image.png')
base64_to_file(encoded, 'copy.png')Data URIs
Embed files directly in HTML/CSS:
import base64
from pathlib import Path
import mimetypes
def create_data_uri(path: str) -> str:
"""Create data URI from file."""
data = Path(path).read_bytes()
mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream'
encoded = base64.b64encode(data).decode()
return f"data:{mime_type};base64,{encoded}"
# Usage
data_uri = create_data_uri('logo.png')
# data:image/png;base64,iVBORw0KGgo...
# In HTML
html = f'<img src="{data_uri}" />'Parse Data URIs
import base64
import re
def parse_data_uri(uri: str) -> tuple[str, bytes]:
"""Parse data URI into mime type and binary data."""
pattern = r'data:([^;]+);base64,(.+)'
match = re.match(pattern, uri)
if not match:
raise ValueError("Invalid data URI")
mime_type = match.group(1)
data = base64.b64decode(match.group(2))
return mime_type, data
# Usage
mime_type, data = parse_data_uri(data_uri)
print(f"Type: {mime_type}, Size: {len(data)} bytes")JSON with Binary Data
import base64
import json
def encode_binary_json(data: dict) -> str:
"""Encode dict with binary values to JSON."""
def encode_value(v):
if isinstance(v, bytes):
return {'__base64__': base64.b64encode(v).decode()}
elif isinstance(v, dict):
return {k: encode_value(val) for k, val in v.items()}
elif isinstance(v, list):
return [encode_value(item) for item in v]
return v
return json.dumps(encode_value(data))
def decode_binary_json(json_str: str) -> dict:
"""Decode JSON with base64 values to dict with bytes."""
def decode_value(v):
if isinstance(v, dict) and '__base64__' in v:
return base64.b64decode(v['__base64__'])
elif isinstance(v, dict):
return {k: decode_value(val) for k, val in v.items()}
elif isinstance(v, list):
return [decode_value(item) for item in v]
return v
return decode_value(json.loads(json_str))
# Usage
data = {
'name': 'test',
'image': b'\x89PNG\r\n\x1a\n...'
}
json_str = encode_binary_json(data)
restored = decode_binary_json(json_str)Base32 Encoding
More human-readable, used in TOTP/2FA:
import base64
data = b"secret key"
# Base32 (A-Z, 2-7, no confusing chars)
b32 = base64.b32encode(data)
print(b32) # b'ONXW2ZJAMRQXIYI='
decoded = base64.b32decode(b32)
# Hex encoding (for reading)
b32_hex = base64.b16encode(data)
print(b32_hex) # b'7365637265742066'ASCII85 / Base85
Higher density encoding:
import base64
data = b"Hello, World!"
# Base85 (more efficient than base64)
b85 = base64.b85encode(data)
print(b85) # b'NM&qnZy;B1a%^NF'
# ASCII85 (Adobe variant)
a85 = base64.a85encode(data)
print(a85) # b'87cURD]i,"Ebo80'
# Decode
decoded = base64.b85decode(b85)Streaming Large Files
import base64
def encode_file_streaming(input_path: str, output_path: str,
chunk_size: int = 57) -> None:
"""Encode large file without loading into memory.
chunk_size=57 gives 76-char lines (57*4/3=76)
"""
with open(input_path, 'rb') as infile, \
open(output_path, 'w') as outfile:
while chunk := infile.read(chunk_size):
encoded = base64.b64encode(chunk).decode()
outfile.write(encoded + '\n')
def decode_file_streaming(input_path: str, output_path: str) -> None:
"""Decode large base64 file without loading into memory."""
with open(input_path, 'r') as infile, \
open(output_path, 'wb') as outfile:
for line in infile:
decoded = base64.b64decode(line.strip())
outfile.write(decoded)Email Attachments (MIME)
import base64
from email.mime.base import MIMEBase
from email import encoders
def create_attachment(path: str) -> MIMEBase:
"""Create email attachment from file."""
with open(path, 'rb') as f:
data = f.read()
filename = path.split('/')[-1]
attachment = MIMEBase('application', 'octet-stream')
attachment.set_payload(data)
encoders.encode_base64(attachment)
attachment.add_header(
'Content-Disposition',
f'attachment; filename="{filename}"'
)
return attachmentPadding Handling
Base64 uses = padding to align to 4-character boundaries:
import base64
# Without padding
encoded_no_pad = base64.b64encode(b"a").decode().rstrip('=')
print(encoded_no_pad) # 'YQ'
# Add padding back for decoding
def add_padding(s: str) -> str:
return s + '=' * (4 - len(s) % 4) if len(s) % 4 else s
decoded = base64.b64decode(add_padding(encoded_no_pad))
print(decoded) # b'a'
# Or use urlsafe with automatic padding handling
decoded = base64.urlsafe_b64decode(encoded_no_pad + '==')Validation
import base64
import re
def is_valid_base64(s: str) -> bool:
"""Check if string is valid base64."""
# Check characters and padding
pattern = r'^[A-Za-z0-9+/]*={0,2}$'
if not re.match(pattern, s):
return False
# Check length
if len(s) % 4 != 0:
return False
# Try to decode
try:
base64.b64decode(s)
return True
except Exception:
return False
print(is_valid_base64('SGVsbG8=')) # True
print(is_valid_base64('Invalid!')) # FalseSize Considerations
import base64
# Base64 increases size by ~33%
original_size = 1000
encoded_size = len(base64.b64encode(b'x' * original_size))
print(f"Original: {original_size} bytes")
print(f"Encoded: {encoded_size} bytes")
print(f"Overhead: {(encoded_size / original_size - 1) * 100:.1f}%")
# Original: 1000 bytes
# Encoded: 1336 bytes
# Overhead: 33.6%Base64 is everywhere—APIs, data URIs, tokens, email. Understanding its variants (standard, URL-safe, base32, base85) helps you choose the right encoding for each situation.
React to this post: