hashlib provides cryptographic hash functions. Here's how to use them correctly.

Basic Usage

import hashlib
 
# Create hash
h = hashlib.sha256()
h.update(b"Hello, World!")
print(h.hexdigest())
# a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e
 
# One-liner
hashlib.sha256(b"Hello, World!").hexdigest()

Available Algorithms

# Always available
hashlib.md5(data)
hashlib.sha1(data)
hashlib.sha256(data)
hashlib.sha384(data)
hashlib.sha512(data)
 
# Check available algorithms
print(hashlib.algorithms_available)
print(hashlib.algorithms_guaranteed)
 
# Use by name
hashlib.new("sha256", data)

Hash Methods

h = hashlib.sha256()
 
# Add data incrementally
h.update(b"Hello, ")
h.update(b"World!")
 
# Get results
h.hexdigest()  # Hex string
h.digest()     # Raw bytes
h.digest_size  # Size in bytes
h.block_size   # Internal block size
h.name         # Algorithm name
 
# Copy hash state
h2 = h.copy()

Hashing Strings

# Strings must be encoded to bytes
text = "Hello, World!"
h = hashlib.sha256(text.encode("utf-8"))
print(h.hexdigest())

Hashing Files

def hash_file(path, algorithm="sha256"):
    """Hash a file efficiently."""
    h = hashlib.new(algorithm)
    with open(path, "rb") as f:
        while chunk := f.read(8192):
            h.update(chunk)
    return h.hexdigest()
 
# Usage
file_hash = hash_file("document.pdf")

Common Hash Algorithms

MD5 (Not for security!)

# Only for checksums, NOT security
hashlib.md5(b"data").hexdigest()
# 8d777f385d3dfec8815d20f7496026dc

⚠️ MD5 is cryptographically broken. Don't use for passwords or security.

hashlib.sha256(b"data").hexdigest()
# 3a6eb0790f39ac87c94f3856b2dd2c5d110e6811602261a9a923d3bb23adc8b7

SHA-512

hashlib.sha512(b"data").hexdigest()
# 77c7ce9a5d86bb386d443bb96390faa120633158699c8844c30b13ab0bf92760...

HMAC (Keyed Hashing)

For message authentication:

import hmac
 
key = b"secret-key"
message = b"Hello, World!"
 
# Create HMAC
h = hmac.new(key, message, hashlib.sha256)
print(h.hexdigest())
 
# Verify (timing-safe comparison)
expected = h.hexdigest()
received = "..."
if hmac.compare_digest(expected, received):
    print("Valid")

Password Hashing

⚠️ Don't use plain hashes for passwords!

# ❌ Wrong - vulnerable to rainbow tables
hashlib.sha256(password.encode()).hexdigest()
 
# ✓ Use bcrypt or argon2
import bcrypt
 
# Hash password
password = b"user_password"
salt = bcrypt.gensalt()
hashed = bcrypt.hashpw(password, salt)
 
# Verify
if bcrypt.checkpw(password, hashed):
    print("Password correct")

Using PBKDF2

import hashlib
import os
 
def hash_password(password: str) -> bytes:
    salt = os.urandom(16)
    key = hashlib.pbkdf2_hmac(
        "sha256",
        password.encode(),
        salt,
        iterations=100000
    )
    return salt + key
 
def verify_password(password: str, stored: bytes) -> bool:
    salt = stored[:16]
    key = stored[16:]
    new_key = hashlib.pbkdf2_hmac(
        "sha256",
        password.encode(),
        salt,
        iterations=100000
    )
    return hmac.compare_digest(key, new_key)

Practical Examples

File integrity check

def verify_download(path, expected_hash):
    """Verify file matches expected SHA-256 hash."""
    actual = hash_file(path, "sha256")
    return actual == expected_hash.lower()
 
if verify_download("file.zip", "abc123..."):
    print("File is valid")

Cache key generation

def cache_key(*args, **kwargs):
    """Generate cache key from arguments."""
    import json
    data = json.dumps((args, kwargs), sort_keys=True)
    return hashlib.sha256(data.encode()).hexdigest()[:16]
 
key = cache_key("user", 123, include_profile=True)

Content-based ID

def content_id(data: bytes) -> str:
    """Generate ID based on content."""
    return hashlib.sha256(data).hexdigest()[:12]
 
# Same content = same ID
id1 = content_id(b"Hello")
id2 = content_id(b"Hello")
assert id1 == id2

Checksum comparison

def files_identical(path1, path2):
    """Check if two files have identical content."""
    return hash_file(path1) == hash_file(path2)

SHAKE (Variable-Length)

# SHAKE produces variable-length output
h = hashlib.shake_256(b"data")
print(h.hexdigest(32))  # 32 bytes of output
print(h.hexdigest(64))  # 64 bytes of output

Quick Reference

import hashlib
 
# Basic hashing
hashlib.sha256(b"data").hexdigest()
 
# Incremental
h = hashlib.sha256()
h.update(b"data")
h.hexdigest()
 
# File hashing
def hash_file(path):
    h = hashlib.sha256()
    with open(path, "rb") as f:
        while chunk := f.read(8192):
            h.update(chunk)
    return h.hexdigest()
 
# HMAC
import hmac
hmac.new(key, msg, hashlib.sha256).hexdigest()
 
# Secure comparison
hmac.compare_digest(a, b)
 
# Password hashing (use bcrypt!)
import bcrypt
hashed = bcrypt.hashpw(password, bcrypt.gensalt())

Use SHA-256 for general hashing. Use bcrypt or argon2 for passwords. Never use MD5 or SHA-1 for security.

React to this post: