The struct module converts between Python values and C-style binary data. Essential for parsing file formats, network protocols, and interfacing with C code.
Basic Packing and Unpacking
import struct
# Pack: Python values → bytes
data = struct.pack('i', 42)
# b'*\x00\x00\x00' (4 bytes, little-endian int)
# Unpack: bytes → Python values
value, = struct.unpack('i', data)
# 42Format Characters
| Char | C Type | Python Type | Size |
|---|---|---|---|
b | signed char | int | 1 |
B | unsigned char | int | 1 |
h | short | int | 2 |
H | unsigned short | int | 2 |
i | int | int | 4 |
I | unsigned int | int | 4 |
q | long long | int | 8 |
Q | unsigned long long | int | 8 |
f | float | float | 4 |
d | double | float | 8 |
s | char[] | bytes | - |
? | _Bool | bool | 1 |
Multiple Values
# Pack multiple values
data = struct.pack('ihf', 1, 2, 3.14)
# Unpack them
a, b, c = struct.unpack('ihf', data)
print(a, b, c) # 1, 2, 3.140000104904175Byte Order
# Little-endian (Intel)
struct.pack('<i', 1) # b'\x01\x00\x00\x00'
# Big-endian (network)
struct.pack('>i', 1) # b'\x00\x00\x00\x01'
# Native (system default)
struct.pack('=i', 1)
# Network order (big-endian, standard sizes)
struct.pack('!i', 1)Always specify byte order when dealing with file formats or network data.
Strings and Bytes
# Fixed-length string (10 bytes)
data = struct.pack('10s', b'hello')
# b'hello\x00\x00\x00\x00\x00'
# Unpack
text, = struct.unpack('10s', data)
# b'hello\x00\x00\x00\x00\x00'
# Strip null bytes
text = text.rstrip(b'\x00')Struct Objects
For repeated operations, create a Struct object:
# Create once, use many times
point_struct = struct.Struct('<ff') # Two floats
# Pack
data = point_struct.pack(1.0, 2.0)
# Unpack
x, y = point_struct.unpack(data)
# Size of packed data
print(point_struct.size) # 8Parsing File Headers
Example: parsing a BMP file header:
def parse_bmp_header(data: bytes) -> dict:
"""Parse BMP file header."""
# BMP header format (14 bytes)
header = struct.unpack('<2sIHHI', data[:14])
return {
'magic': header[0], # b'BM'
'file_size': header[1], # Total file size
'reserved1': header[2], # Usually 0
'reserved2': header[3], # Usually 0
'offset': header[4], # Pixel data offset
}
with open('image.bmp', 'rb') as f:
header = parse_bmp_header(f.read(14))Network Protocols
def parse_ip_header(data: bytes) -> dict:
"""Parse IPv4 header (simplified)."""
fields = struct.unpack('!BBHHHBBH4s4s', data[:20])
return {
'version': fields[0] >> 4,
'ihl': fields[0] & 0xF,
'tos': fields[1],
'total_length': fields[2],
'ttl': fields[5],
'protocol': fields[6],
'src_ip': '.'.join(map(str, fields[8])),
'dst_ip': '.'.join(map(str, fields[9])),
}Iterating Over Binary Data
def read_records(data: bytes, format_str: str):
"""Iterate over packed records."""
record_size = struct.calcsize(format_str)
for i in range(0, len(data), record_size):
yield struct.unpack(format_str, data[i:i+record_size])
# Usage
data = b'...' # Binary data with repeated records
for x, y, z in read_records(data, '<fff'):
print(x, y, z)Pack Into / Unpack From
Write directly to a buffer:
# Pre-allocated buffer
buffer = bytearray(100)
# Pack into buffer at offset
struct.pack_into('<ii', buffer, 0, 10, 20)
struct.pack_into('<ii', buffer, 8, 30, 40)
# Unpack from buffer at offset
a, b = struct.unpack_from('<ii', buffer, 0)
c, d = struct.unpack_from('<ii', buffer, 8)Padding and Alignment
# Native alignment (may have padding)
struct.pack('ci', b'x', 1)
# Might be 8 bytes with padding
# No padding
struct.pack('<ci', b'x', 1)
# Always 5 bytesUse explicit byte order to avoid platform-dependent padding.
Quick Reference
import struct
# Pack values to bytes
struct.pack(format, v1, v2, ...)
# Unpack bytes to tuple
struct.unpack(format, buffer)
# Size of format in bytes
struct.calcsize(format)
# Reusable Struct object
s = struct.Struct(format)
s.pack(v1, v2)
s.unpack(buffer)
s.size
# Pack/unpack with buffer offset
struct.pack_into(format, buffer, offset, v1, ...)
struct.unpack_from(format, buffer, offset)Common Patterns
| Task | Format |
|---|---|
| Network int (4 bytes) | !I |
| Little-endian short | <H |
| Two floats | <ff |
| 20-byte string | 20s |
| Bool + int + float | <?if |
struct is the bridge between Python and the binary world. Master it to parse any file format or protocol.
React to this post: