Python base64: Encoding Binary Data for Text Protocols

Base64 encodes binary data into ASCII text, making it safe for text-only protocols like JSON, email, and URLs.

Basic Encoding and Decoding

import base64
 
# Encode bytes to base64
data = b"Hello, World!"
encoded = base64.b64encode(data)
print(encoded)  # b'SGVsbG8sIFdvcmxkIQ=='
 
# Decode back to bytes
decoded = base64.b64decode(encoded)
print(decoded)  # b'Hello, World!'
 
# Work with strings
text = "Hello, World!"
encoded_str = base64.b64encode(text.encode()).decode()
print(encoded_str)  # 'SGVsbG8sIFdvcmxkIQ=='

URL-Safe Base64

Standard base64 uses + and / which aren't URL-safe:

import base64
 
data = b"binary data with special chars"
 
# Standard (may include + and /)
standard = base64.b64encode(data)
 
# URL-safe (uses - and _ instead)
url_safe = base64.urlsafe_b64encode(data)
print(url_safe.decode())
 
# Decode URL-safe
decoded = base64.urlsafe_b64decode(url_safe)

Encoding Files

import base64
from pathlib import Path
 
def file_to_base64(path: str) -> str:
    """Read file and return base64 string."""
    return base64.b64encode(Path(path).read_bytes()).decode()
 
def base64_to_file(data: str, path: str) -> None:
    """Write base64 data to file."""
    Path(path).write_bytes(base64.b64decode(data))
 
# Usage
encoded = file_to_base64('image.png')
base64_to_file(encoded, 'copy.png')

Data URIs

Embed files directly in HTML/CSS:

import base64
from pathlib import Path
import mimetypes
 
def create_data_uri(path: str) -> str:
    """Create data URI from file."""
    data = Path(path).read_bytes()
    mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream'
    encoded = base64.b64encode(data).decode()
    return f"data:{mime_type};base64,{encoded}"
 
# Usage
data_uri = create_data_uri('logo.png')
# data:image/png;base64,iVBORw0KGgo...
 
# In HTML
html = f'<img src="{data_uri}" />'

Parse Data URIs

import base64
import re
 
def parse_data_uri(uri: str) -> tuple[str, bytes]:
    """Parse data URI into mime type and binary data."""
    pattern = r'data:([^;]+);base64,(.+)'
    match = re.match(pattern, uri)
    
    if not match:
        raise ValueError("Invalid data URI")
    
    mime_type = match.group(1)
    data = base64.b64decode(match.group(2))
    
    return mime_type, data
 
# Usage
mime_type, data = parse_data_uri(data_uri)
print(f"Type: {mime_type}, Size: {len(data)} bytes")

JSON with Binary Data

import base64
import json
 
def encode_binary_json(data: dict) -> str:
    """Encode dict with binary values to JSON."""
    def encode_value(v):
        if isinstance(v, bytes):
            return {'__base64__': base64.b64encode(v).decode()}
        elif isinstance(v, dict):
            return {k: encode_value(val) for k, val in v.items()}
        elif isinstance(v, list):
            return [encode_value(item) for item in v]
        return v
    
    return json.dumps(encode_value(data))
 
def decode_binary_json(json_str: str) -> dict:
    """Decode JSON with base64 values to dict with bytes."""
    def decode_value(v):
        if isinstance(v, dict) and '__base64__' in v:
            return base64.b64decode(v['__base64__'])
        elif isinstance(v, dict):
            return {k: decode_value(val) for k, val in v.items()}
        elif isinstance(v, list):
            return [decode_value(item) for item in v]
        return v
    
    return decode_value(json.loads(json_str))
 
# Usage
data = {
    'name': 'test',
    'image': b'\x89PNG\r\n\x1a\n...'
}
 
json_str = encode_binary_json(data)
restored = decode_binary_json(json_str)

Base32 Encoding

More human-readable, used in TOTP/2FA:

import base64
 
data = b"secret key"
 
# Base32 (A-Z, 2-7, no confusing chars)
b32 = base64.b32encode(data)
print(b32)  # b'ONXW2ZJAMRQXIYI='
 
decoded = base64.b32decode(b32)
 
# Hex encoding (for reading)
b32_hex = base64.b16encode(data)
print(b32_hex)  # b'7365637265742066'

ASCII85 / Base85

Higher density encoding:

import base64
 
data = b"Hello, World!"
 
# Base85 (more efficient than base64)
b85 = base64.b85encode(data)
print(b85)  # b'NM&qnZy;B1a%^NF'
 
# ASCII85 (Adobe variant)
a85 = base64.a85encode(data)
print(a85)  # b'87cURD]i,"Ebo80'
 
# Decode
decoded = base64.b85decode(b85)

Streaming Large Files

import base64
 
def encode_file_streaming(input_path: str, output_path: str, 
                          chunk_size: int = 57) -> None:
    """Encode large file without loading into memory.
    
    chunk_size=57 gives 76-char lines (57*4/3=76)
    """
    with open(input_path, 'rb') as infile, \
         open(output_path, 'w') as outfile:
        
        while chunk := infile.read(chunk_size):
            encoded = base64.b64encode(chunk).decode()
            outfile.write(encoded + '\n')
 
def decode_file_streaming(input_path: str, output_path: str) -> None:
    """Decode large base64 file without loading into memory."""
    with open(input_path, 'r') as infile, \
         open(output_path, 'wb') as outfile:
        
        for line in infile:
            decoded = base64.b64decode(line.strip())
            outfile.write(decoded)

Email Attachments (MIME)

import base64
from email.mime.base import MIMEBase
from email import encoders
 
def create_attachment(path: str) -> MIMEBase:
    """Create email attachment from file."""
    with open(path, 'rb') as f:
        data = f.read()
    
    filename = path.split('/')[-1]
    
    attachment = MIMEBase('application', 'octet-stream')
    attachment.set_payload(data)
    encoders.encode_base64(attachment)
    attachment.add_header(
        'Content-Disposition',
        f'attachment; filename="{filename}"'
    )
    
    return attachment

Padding Handling

Base64 uses = padding to align to 4-character boundaries:

import base64
 
# Without padding
encoded_no_pad = base64.b64encode(b"a").decode().rstrip('=')
print(encoded_no_pad)  # 'YQ'
 
# Add padding back for decoding
def add_padding(s: str) -> str:
    return s + '=' * (4 - len(s) % 4) if len(s) % 4 else s
 
decoded = base64.b64decode(add_padding(encoded_no_pad))
print(decoded)  # b'a'
 
# Or use urlsafe with automatic padding handling
decoded = base64.urlsafe_b64decode(encoded_no_pad + '==')

Validation

import base64
import re
 
def is_valid_base64(s: str) -> bool:
    """Check if string is valid base64."""
    # Check characters and padding
    pattern = r'^[A-Za-z0-9+/]*={0,2}$'
    if not re.match(pattern, s):
        return False
    
    # Check length
    if len(s) % 4 != 0:
        return False
    
    # Try to decode
    try:
        base64.b64decode(s)
        return True
    except Exception:
        return False
 
print(is_valid_base64('SGVsbG8='))  # True
print(is_valid_base64('Invalid!'))  # False

Size Considerations

import base64
 
# Base64 increases size by ~33%
original_size = 1000
encoded_size = len(base64.b64encode(b'x' * original_size))
 
print(f"Original: {original_size} bytes")
print(f"Encoded: {encoded_size} bytes")
print(f"Overhead: {(encoded_size / original_size - 1) * 100:.1f}%")
# Original: 1000 bytes
# Encoded: 1336 bytes
# Overhead: 33.6%

Base64 is everywhere—APIs, data URIs, tokens, email. Understanding its variants (standard, URL-safe, base32, base85) helps you choose the right encoding for each situation.

React to this post:

#Basic Encoding and Decoding

#URL-Safe Base64

#Encoding Files

#Data URIs

#Parse Data URIs

#JSON with Binary Data

#Base32 Encoding

#ASCII85 / Base85

#Streaming Large Files

#Email Attachments (MIME)

#Padding Handling

#Validation

#Size Considerations

Need help shipping fast?