I recently shipped a feature with an API key hardcoded in the source. It got caught in code review, but that moment of embarrassment taught me to take secrets seriously. Here's everything I've learned about handling sensitive data in Python.
The secrets Module: Why random Isn't Enough
My first mistake was using random for generating tokens. Turns out, random is predictable—it's designed for simulations, not security.
import random
import secrets
# DON'T: random is predictable
bad_token = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=32))
# DO: secrets uses cryptographically secure randomness
good_token = secrets.token_hex(32) # 64 hex characters
print(good_token) # 'a3f8b2c1d4e5f6a7b8c9d0e1f2a3b4c5...'The random module uses a Mersenne Twister algorithm. If an attacker sees enough outputs, they can predict future values. The secrets module uses os.urandom(), which draws from the operating system's cryptographic random number generator.
Secure Token Generation
The secrets module gives you several ways to generate tokens:
import secrets
# Hex tokens (good for URLs, database IDs)
token = secrets.token_hex(16) # 32 hex chars
print(token) # '4a7b3c9d1e5f2a8b6c0d4e7f9a2b5c8d'
# URL-safe tokens (good for password reset links)
url_token = secrets.token_urlsafe(16) # ~22 chars
print(url_token) # 'dGhpcyBpcyBhIHRlc3Q'
# Raw bytes (for encryption keys)
key = secrets.token_bytes(32) # 32 bytes = 256 bits
# Random integers
code = secrets.randbelow(1000000) # 0 to 999999
print(f"{code:06d}") # Zero-padded 6-digit code
# Secure choice from a list
alphabet = 'ABCDEFGHJKLMNPQRSTUVWXYZ23456789' # No confusing chars
otp = ''.join(secrets.choice(alphabet) for _ in range(6))
print(otp) # 'K7M3P9'Generating Password Reset Tokens
import secrets
from datetime import datetime, timedelta
def generate_reset_token():
"""Generate a secure password reset token."""
return secrets.token_urlsafe(32)
def create_reset_link(user_id: int) -> dict:
"""Create a time-limited password reset link."""
token = generate_reset_token()
expires = datetime.utcnow() + timedelta(hours=1)
# Store in database: user_id, token_hash, expires
# Return the raw token to email to user
return {
"token": token,
"expires": expires,
"link": f"https://example.com/reset?token={token}"
}Password Hashing: Never Store Plaintext
This is non-negotiable. Passwords must be hashed, and not with MD5 or SHA-256. Use a proper password hashing algorithm.
Using bcrypt
# pip install bcrypt
import bcrypt
def hash_password(password: str) -> bytes:
"""Hash a password for storage."""
salt = bcrypt.gensalt(rounds=12) # Higher = slower = more secure
return bcrypt.hashpw(password.encode('utf-8'), salt)
def verify_password(password: str, hashed: bytes) -> bool:
"""Verify a password against its hash."""
return bcrypt.checkpw(password.encode('utf-8'), hashed)
# Usage
hashed = hash_password("my_secure_password")
print(hashed) # b'$2b$12$...'
# Later, during login
if verify_password("my_secure_password", hashed):
print("Login successful")
else:
print("Invalid password")Using Argon2 (Recommended)
Argon2 won the Password Hashing Competition in 2015. It's the modern choice.
# pip install argon2-cffi
from argon2 import PasswordHasher
from argon2.exceptions import VerifyMismatchError
ph = PasswordHasher()
def hash_password(password: str) -> str:
"""Hash a password using Argon2."""
return ph.hash(password)
def verify_password(password: str, hashed: str) -> bool:
"""Verify a password against its Argon2 hash."""
try:
ph.verify(hashed, password)
return True
except VerifyMismatchError:
return False
# Usage
hashed = hash_password("my_secure_password")
print(hashed) # '$argon2id$v=19$m=65536,t=3,p=4$...'
# Argon2 also tells you when to rehash (if settings changed)
def verify_and_rehash(password: str, hashed: str) -> tuple[bool, str | None]:
"""Verify password and rehash if needed."""
try:
ph.verify(hashed, password)
if ph.check_needs_rehash(hashed):
return True, ph.hash(password)
return True, None
except VerifyMismatchError:
return False, NoneWhy Not SHA-256?
import hashlib
# DON'T: Fast hashes are bad for passwords
bad_hash = hashlib.sha256(b"password").hexdigest()
# SHA-256 can compute billions of hashes per second
# An attacker with a GPU can brute-force common passwords quickly
# bcrypt/Argon2 are intentionally slow:
# - bcrypt: ~100ms per hash
# - Argon2: configurable memory and time costEnvironment Variables for Secrets
Secrets don't belong in code. Put them in environment variables.
import os
# Reading secrets from environment
DATABASE_URL = os.environ.get("DATABASE_URL")
API_KEY = os.environ.get("API_KEY")
SECRET_KEY = os.environ.get("SECRET_KEY")
# With validation
def get_required_env(name: str) -> str:
"""Get an environment variable or raise an error."""
value = os.environ.get(name)
if value is None:
raise RuntimeError(f"Missing required environment variable: {name}")
return value
DATABASE_URL = get_required_env("DATABASE_URL")Using python-dotenv for Development
# pip install python-dotenv
from dotenv import load_dotenv
import os
# Load .env file (for local development)
load_dotenv()
# Now environment variables from .env are available
API_KEY = os.environ.get("API_KEY")Your .env file:
# .env (add to .gitignore!)
DATABASE_URL=postgresql://user:pass@localhost/db
API_KEY=sk-1234567890abcdef
SECRET_KEY=your-256-bit-secret-key
And your .gitignore:
# .gitignore
.env
.env.local
.env.*.local
*.pem
*.key
secrets/
Pydantic Settings for Validation
# pip install pydantic-settings
from pydantic_settings import BaseSettings
from pydantic import SecretStr
class Settings(BaseSettings):
database_url: str
api_key: SecretStr # Masked in logs/repr
secret_key: SecretStr
debug: bool = False
class Config:
env_file = ".env"
settings = Settings()
# SecretStr prevents accidental logging
print(settings.api_key) # SecretStr('**********')
print(settings.api_key.get_secret_value()) # actual valueAvoiding Hardcoded Credentials
I've seen (and written) all of these mistakes:
# DON'T: Hardcoded credentials
API_KEY = "sk-1234567890abcdef" # Will end up in git history
# DON'T: Credentials in config files committed to git
config = {
"api_key": "sk-1234567890abcdef",
"db_password": "hunter2"
}
# DON'T: Credentials in comments
# API key for production: sk-1234567890abcdef
# DON'T: Credentials in URLs
DATABASE_URL = "postgresql://admin:password123@prod-db.example.com/mydb"Better Patterns
import os
from functools import lru_cache
@lru_cache
def get_settings():
"""Load settings once, cached."""
return {
"api_key": os.environ["API_KEY"],
"database_url": os.environ["DATABASE_URL"],
}
# Or use a secrets manager
def get_secret_from_manager(name: str) -> str:
"""Fetch secret from AWS Secrets Manager, Vault, etc."""
# In production, use boto3 for AWS, hvac for Vault, etc.
import boto3
client = boto3.client('secretsmanager')
response = client.get_secret_value(SecretId=name)
return response['SecretString']Secure Comparison: Timing Attacks
When comparing secrets, use constant-time comparison to prevent timing attacks:
import secrets
import hmac
# DON'T: Regular comparison leaks timing information
def insecure_compare(a: str, b: str) -> bool:
return a == b # Returns faster if first char differs
# DO: Constant-time comparison
def secure_compare(a: str, b: str) -> bool:
return secrets.compare_digest(a, b)
# Also works with bytes
def verify_signature(received: bytes, expected: bytes) -> bool:
return hmac.compare_digest(received, expected)
# Example: Verifying webhook signatures
def verify_webhook(payload: bytes, signature: str, secret: str) -> bool:
expected = hmac.new(
secret.encode(),
payload,
digestmod='sha256'
).hexdigest()
return secrets.compare_digest(signature, expected)Why Timing Attacks Matter
Regular string comparison returns False as soon as it finds a difference. An attacker can measure response times to guess characters one by one:
# Regular comparison timing (simplified)
def regular_compare(a, b):
if len(a) != len(b):
return False
for i in range(len(a)):
if a[i] != b[i]:
return False # Returns early!
return True
# Timing: "aXXXXXXX" vs "bXXXXXXX" → fails immediately
# Timing: "sXXXXXXX" vs "secret12" → takes longer (first char matches)
# Attacker can deduce each character by measuring response timeCommon Security Mistakes
Here are mistakes I've made or seen others make:
1. Logging Sensitive Data
import logging
# DON'T
logging.info(f"Authenticating with API key: {api_key}")
logging.debug(f"User password: {password}")
# DO: Mask or omit sensitive data
logging.info(f"Authenticating with API key: {api_key[:4]}...")
logging.debug("User authentication attempt")2. Exposing Secrets in Error Messages
# DON'T
raise ValueError(f"Invalid API key: {api_key}")
# DO
raise ValueError("Invalid API key")3. Secrets in URL Parameters
# DON'T: Shows up in logs, browser history, referrer headers
response = requests.get(f"https://api.example.com?api_key={api_key}")
# DO: Use headers
response = requests.get(
"https://api.example.com",
headers={"Authorization": f"Bearer {api_key}"}
)4. Not Rotating Secrets
# Build rotation into your design
class APIClient:
def __init__(self, get_api_key):
self._get_api_key = get_api_key # Function, not value
@property
def api_key(self):
return self._get_api_key() # Fresh value each time5. Trusting Client-Side Validation
# DON'T: Assume client validated the token
def process_payment(token):
# "Token was validated on the frontend"
charge(token) # WRONG
# DO: Always validate on the server
def process_payment(token):
if not verify_token(token):
raise ValueError("Invalid token")
charge(token)6. Weak Password Requirements
# DON'T: Just check length
def validate_password(password):
return len(password) >= 8
# DO: Check against common passwords, require complexity
def validate_password(password: str) -> tuple[bool, str]:
if len(password) < 12:
return False, "Password must be at least 12 characters"
# Check against common passwords
common = {"password", "123456789", "qwerty123", ...}
if password.lower() in common:
return False, "Password is too common"
return True, ""Security Checklist
Before shipping, verify:
- No secrets in code or git history
- Passwords hashed with bcrypt or Argon2
- Tokens generated with
secretsmodule - Secrets loaded from environment variables
-
.envfiles in.gitignore - Constant-time comparison for secret verification
- Sensitive data excluded from logs
- HTTPS everywhere
- Secrets rotatable without code changes
Quick Reference
import secrets
from argon2 import PasswordHasher
import os
# Generate secure token
token = secrets.token_urlsafe(32)
# Hash password
ph = PasswordHasher()
hashed = ph.hash(password)
is_valid = ph.verify(hashed, password)
# Get secret from environment
api_key = os.environ["API_KEY"]
# Constant-time comparison
is_match = secrets.compare_digest(received, expected)Security isn't something you add at the end. Build it in from the start, and your future self (and your users) will thank you.