The struct module converts between Python values and C-style binary data. Essential for binary protocols, file formats, and interop with C code.
Basic Packing and Unpacking
import struct
# Pack Python values to bytes
data = struct.pack('ihf', 42, 1000, 3.14)
print(data) # b'*\x00\x00\x00\xe8\x03\xc3\xf5H@'
print(len(data)) # 12 bytes
# Unpack bytes to Python values
values = struct.unpack('ihf', data)
print(values) # (42, 1000, 3.140000104904175)Format Characters
import struct
# Integer types
struct.pack('b', -128) # signed char (1 byte)
struct.pack('B', 255) # unsigned char (1 byte)
struct.pack('h', -32768) # short (2 bytes)
struct.pack('H', 65535) # unsigned short (2 bytes)
struct.pack('i', -2147483648) # int (4 bytes)
struct.pack('I', 4294967295) # unsigned int (4 bytes)
struct.pack('q', -9223372036854775808) # long long (8 bytes)
struct.pack('Q', 18446744073709551615) # unsigned long long (8 bytes)
# Floating point
struct.pack('f', 3.14) # float (4 bytes)
struct.pack('d', 3.14159265358979) # double (8 bytes)
# Other
struct.pack('?', True) # bool (1 byte)
struct.pack('c', b'A') # char (1 byte)
struct.pack('5s', b'hello') # string (5 bytes)
struct.pack('x') # pad byteByte Order
import struct
value = 0x12345678
# Native byte order (system dependent)
struct.pack('I', value)
# Little-endian (x86, ARM)
struct.pack('<I', value) # b'xV4\x12'
# Big-endian (network, some file formats)
struct.pack('>I', value) # b'\x124Vx'
# Network order (big-endian)
struct.pack('!I', value) # b'\x124Vx'Reading Binary Files
import struct
from pathlib import Path
def read_bmp_header(path: str) -> dict:
"""Read BMP file header."""
data = Path(path).read_bytes()
# BMP header format
header = struct.unpack('<2sIHHI', data[:14])
return {
'signature': header[0], # 'BM'
'file_size': header[1],
'reserved1': header[2],
'reserved2': header[3],
'data_offset': header[4],
}
# Read DIB header
def read_dib_header(path: str) -> dict:
"""Read BMP DIB header."""
data = Path(path).read_bytes()[14:54]
dib = struct.unpack('<IiiHHIIiiII', data)
return {
'header_size': dib[0],
'width': dib[1],
'height': dib[2],
'planes': dib[3],
'bits_per_pixel': dib[4],
'compression': dib[5],
'image_size': dib[6],
}Writing Binary Files
import struct
def write_wav_header(f, sample_rate: int, channels: int,
bits_per_sample: int, data_size: int):
"""Write WAV file header."""
byte_rate = sample_rate * channels * bits_per_sample // 8
block_align = channels * bits_per_sample // 8
header = struct.pack(
'<4sI4s4sIHHIIHH4sI',
b'RIFF',
36 + data_size, # File size - 8
b'WAVE',
b'fmt ',
16, # Subchunk1 size
1, # Audio format (PCM)
channels,
sample_rate,
byte_rate,
block_align,
bits_per_sample,
b'data',
data_size
)
f.write(header)
# Create WAV file
with open('output.wav', 'wb') as f:
audio_data = b'\x00' * 44100 # 1 second of silence
write_wav_header(f, 44100, 1, 8, len(audio_data))
f.write(audio_data)Struct Objects for Efficiency
import struct
# Pre-compile format for repeated use
header_struct = struct.Struct('<IBBHI')
print(header_struct.size) # 12 bytes
# Pack and unpack
data = header_struct.pack(100, 1, 2, 300, 4)
values = header_struct.unpack(data)
# Unpack from buffer at offset
buffer = b'\x00' * 10 + data
values = header_struct.unpack_from(buffer, offset=10)
# Pack into existing buffer
buffer = bytearray(20)
header_struct.pack_into(buffer, 5, 100, 1, 2, 300, 4)Variable-Length Data
import struct
def pack_string(s: str) -> bytes:
"""Pack string with length prefix."""
encoded = s.encode('utf-8')
return struct.pack(f'<I{len(encoded)}s', len(encoded), encoded)
def unpack_string(data: bytes, offset: int = 0) -> tuple[str, int]:
"""Unpack length-prefixed string, return value and new offset."""
length = struct.unpack_from('<I', data, offset)[0]
offset += 4
s = struct.unpack_from(f'{length}s', data, offset)[0]
return s.decode('utf-8'), offset + length
# Usage
packed = pack_string("Hello, World!")
text, _ = unpack_string(packed)
print(text) # Hello, World!Network Protocol Parsing
import struct
from dataclasses import dataclass
@dataclass
class IPHeader:
version: int
ihl: int
tos: int
total_length: int
identification: int
flags: int
fragment_offset: int
ttl: int
protocol: int
checksum: int
src_addr: str
dst_addr: str
def parse_ip_header(data: bytes) -> IPHeader:
"""Parse IPv4 header."""
fields = struct.unpack('!BBHHHBBH4s4s', data[:20])
version_ihl = fields[0]
flags_fragment = fields[4]
return IPHeader(
version=(version_ihl >> 4),
ihl=(version_ihl & 0x0F),
tos=fields[1],
total_length=fields[2],
identification=fields[3],
flags=(flags_fragment >> 13),
fragment_offset=(flags_fragment & 0x1FFF),
ttl=fields[5],
protocol=fields[6],
checksum=fields[7],
src_addr='.'.join(str(b) for b in fields[8]),
dst_addr='.'.join(str(b) for b in fields[9]),
)Padding and Alignment
import struct
# Native alignment may add padding
native = struct.pack('ci', b'A', 1000)
print(len(native)) # 8 (3 bytes padding)
# No padding with explicit size
packed = struct.pack('<cxxxI', b'A', 1000)
print(len(packed)) # 8 (explicit padding)
# Calculate struct size
print(struct.calcsize('ci')) # 8 (with padding)
print(struct.calcsize('<cI')) # 5 (no padding)Iterating Over Records
import struct
def read_records(path: str, format_str: str):
"""Iterate over binary records in file."""
record_struct = struct.Struct(format_str)
with open(path, 'rb') as f:
while True:
data = f.read(record_struct.size)
if len(data) < record_struct.size:
break
yield record_struct.unpack(data)
# Read log records
for timestamp, level, code in read_records('log.bin', '<QBI'):
print(f"{timestamp}: Level {level}, Code {code}")C Struct Interop
import struct
# Match C struct:
# struct Point {
# double x;
# double y;
# int32_t id;
# };
POINT_FORMAT = '<ddl' # Little-endian, 2 doubles, 1 long
def to_c_point(x: float, y: float, id: int) -> bytes:
return struct.pack(POINT_FORMAT, x, y, id)
def from_c_point(data: bytes) -> tuple:
return struct.unpack(POINT_FORMAT, data)
# Send to C library
point_bytes = to_c_point(1.5, 2.5, 42)
# Receive from C library
x, y, point_id = from_c_point(point_bytes)Format String Reference
| Character | Type | Size |
|---|---|---|
x | pad byte | 1 |
c | char | 1 |
b/B | signed/unsigned char | 1 |
? | bool | 1 |
h/H | short/unsigned short | 2 |
i/I | int/unsigned int | 4 |
l/L | long/unsigned long | 4 |
q/Q | long long/unsigned | 8 |
f | float | 4 |
d | double | 8 |
s | char[] | n |
p | pascal string | n |
Byte order prefixes: @ (native), = (native, no pad), < (little), > (big), ! (network/big)
The struct module bridges Python and binary data. Master it to work with file formats, network protocols, and C libraries.
React to this post: