The shlex module handles shell-style tokenization. Use it whenever you need to parse or construct shell commands safely.
Splitting Shell Commands
import shlex
# Split like a shell would
cmd = 'grep -r "hello world" /path/to/dir'
tokens = shlex.split(cmd)
print(tokens)
# ['grep', '-r', 'hello world', '/path/to/dir']
# Handles escapes and quotes
cmd = "echo 'it\\'s working' \"with spaces\""
tokens = shlex.split(cmd)
print(tokens)
# ['echo', "it's working", 'with spaces']Safe Subprocess Calls
import shlex
import subprocess
# DANGEROUS: shell=True with unsanitized input
user_input = "file.txt; rm -rf /"
# subprocess.run(f"cat {user_input}", shell=True) # Don't!
# SAFE: split into list, no shell
cmd = f"cat {shlex.quote(user_input)}"
subprocess.run(shlex.split(cmd))
# Or construct list directly
subprocess.run(["cat", user_input])Quoting for Shell Safety
import shlex
# Quote string for safe shell use
filename = "my file; rm -rf /"
safe = shlex.quote(filename)
print(safe)
# "'my file; rm -rf /'"
# Build safe commands
def safe_command(cmd_template: str, **kwargs) -> str:
"""Build command with properly quoted arguments."""
quoted = {k: shlex.quote(str(v)) for k, v in kwargs.items()}
return cmd_template.format(**quoted)
cmd = safe_command("grep {pattern} {file}",
pattern="hello world",
file="path/to/file.txt")
print(cmd)
# "grep 'hello world' 'path/to/file.txt'"Parsing Configuration Files
import shlex
# shlex handles shell-style config files
config_content = '''
# Database settings
DB_HOST = localhost
DB_PORT = 5432
DB_NAME = "my database" # with spaces
'''
def parse_config(content: str) -> dict:
"""Parse shell-style config file."""
config = {}
for line in content.strip().split('\n'):
# Skip comments and empty lines
line = line.split('#')[0].strip()
if not line or '=' not in line:
continue
# Split on first =
key, value = line.split('=', 1)
key = key.strip()
# Use shlex to parse the value
try:
tokens = shlex.split(value.strip())
config[key] = tokens[0] if len(tokens) == 1 else tokens
except ValueError:
config[key] = value.strip()
return config
print(parse_config(config_content))
# {'DB_HOST': 'localhost', 'DB_PORT': '5432', 'DB_NAME': 'my database'}Lexer for Custom Parsing
import shlex
# Create lexer for fine-grained control
lexer = shlex.shlex('command --flag="value with spaces"', posix=True)
lexer.whitespace_split = True
lexer.commenters = ''
tokens = list(lexer)
print(tokens)
# ['command', '--flag=value with spaces']Handling POSIX vs Non-POSIX
import shlex
cmd = "echo 'hello'"
# POSIX mode (default): strips quotes
tokens = shlex.split(cmd, posix=True)
print(tokens) # ['echo', 'hello']
# Non-POSIX: preserves quotes
tokens = shlex.split(cmd, posix=False)
print(tokens) # ['echo', "'hello'"]Stream Processing
Process large files without loading into memory:
import shlex
def parse_command_file(path: str):
"""Parse file of shell commands."""
with open(path) as f:
lexer = shlex.shlex(f, posix=True)
lexer.whitespace_split = True
current_command = []
for token in lexer:
if token == '\n':
if current_command:
yield current_command
current_command = []
else:
current_command.append(token)
if current_command:
yield current_command
for cmd in parse_command_file('commands.sh'):
print(cmd)Custom Lexer Configuration
import shlex
text = 'key1=value1, key2="quoted value", key3=value3'
lexer = shlex.shlex(text, posix=True)
lexer.wordchars += '=' # Include = in words
lexer.whitespace += ',' # Treat , as whitespace
lexer.whitespace_split = True
tokens = list(lexer)
print(tokens)
# ['key1=value1', 'key2=quoted value', 'key3=value3']Join Command List
Reverse of split—create safe command string:
import shlex
def join_command(args: list) -> str:
"""Join arguments into shell command string."""
return ' '.join(shlex.quote(arg) for arg in args)
args = ['grep', '-r', 'hello world', '/path/with spaces/']
cmd = join_command(args)
print(cmd)
# "grep -r 'hello world' '/path/with spaces/'"
# Roundtrip
assert shlex.split(cmd) == argsError Handling
import shlex
def safe_split(cmd: str) -> list | None:
"""Split command, return None on parse error."""
try:
return shlex.split(cmd)
except ValueError as e:
print(f"Parse error: {e}")
return None
# Unclosed quote
safe_split('echo "hello') # Parse error: No closing quotation
# Valid
safe_split('echo "hello"') # ['echo', 'hello']Environment Variable Expansion
import shlex
import os
def expand_vars(cmd: str) -> str:
"""Expand environment variables in command."""
return os.path.expandvars(cmd)
# With shlex
os.environ['HOME'] = '/home/user'
cmd = 'ls $HOME/documents'
expanded = expand_vars(cmd)
tokens = shlex.split(expanded)
print(tokens)
# ['ls', '/home/user/documents']Command Building Pattern
import shlex
from typing import List
class CommandBuilder:
"""Build shell commands safely."""
def __init__(self, program: str):
self.parts = [program]
def arg(self, value: str) -> 'CommandBuilder':
self.parts.append(value)
return self
def flag(self, name: str, value: str = None) -> 'CommandBuilder':
self.parts.append(name)
if value is not None:
self.parts.append(value)
return self
def to_list(self) -> List[str]:
return self.parts.copy()
def to_string(self) -> str:
return ' '.join(shlex.quote(p) for p in self.parts)
# Usage
cmd = (CommandBuilder('rsync')
.flag('-avz')
.flag('--exclude', '*.tmp')
.arg('/source/path/')
.arg('user@host:/dest/'))
print(cmd.to_list())
# ['rsync', '-avz', '--exclude', '*.tmp', '/source/path/', 'user@host:/dest/']
print(cmd.to_string())
# "rsync -avz --exclude '*.tmp' /source/path/ user@host:/dest/"Parsing Quoted Strings in Data
import shlex
import csv
from io import StringIO
# Parse CSV-like data with shell quoting
data = '''
name,command
"backup job","rsync -av '/data/my files/' /backup/"
"cleanup","rm -f '*.tmp'"
'''
def parse_commands(data: str) -> dict:
"""Parse data with shell-quoted commands."""
reader = csv.DictReader(StringIO(data.strip()))
return {
row['name']: shlex.split(row['command'])
for row in reader
}
commands = parse_commands(data)
print(commands)
# {'backup job': ['rsync', '-av', '/data/my files/', '/backup/'],
# 'cleanup': ['rm', '-f', '*.tmp']}The shlex module is essential for safely handling shell commands. Always use shlex.quote() when building commands with user input, and shlex.split() instead of naive string splitting.
React to this post: