The shlex module handles shell-style tokenization. Use it whenever you need to parse or construct shell commands safely.

Splitting Shell Commands

import shlex
 
# Split like a shell would
cmd = 'grep -r "hello world" /path/to/dir'
tokens = shlex.split(cmd)
print(tokens)
# ['grep', '-r', 'hello world', '/path/to/dir']
 
# Handles escapes and quotes
cmd = "echo 'it\\'s working' \"with spaces\""
tokens = shlex.split(cmd)
print(tokens)
# ['echo', "it's working", 'with spaces']

Safe Subprocess Calls

import shlex
import subprocess
 
# DANGEROUS: shell=True with unsanitized input
user_input = "file.txt; rm -rf /"
# subprocess.run(f"cat {user_input}", shell=True)  # Don't!
 
# SAFE: split into list, no shell
cmd = f"cat {shlex.quote(user_input)}"
subprocess.run(shlex.split(cmd))
 
# Or construct list directly
subprocess.run(["cat", user_input])

Quoting for Shell Safety

import shlex
 
# Quote string for safe shell use
filename = "my file; rm -rf /"
safe = shlex.quote(filename)
print(safe)
# "'my file; rm -rf /'"
 
# Build safe commands
def safe_command(cmd_template: str, **kwargs) -> str:
    """Build command with properly quoted arguments."""
    quoted = {k: shlex.quote(str(v)) for k, v in kwargs.items()}
    return cmd_template.format(**quoted)
 
cmd = safe_command("grep {pattern} {file}", 
                   pattern="hello world", 
                   file="path/to/file.txt")
print(cmd)
# "grep 'hello world' 'path/to/file.txt'"

Parsing Configuration Files

import shlex
 
# shlex handles shell-style config files
config_content = '''
# Database settings
DB_HOST = localhost
DB_PORT = 5432
DB_NAME = "my database"  # with spaces
'''
 
def parse_config(content: str) -> dict:
    """Parse shell-style config file."""
    config = {}
    for line in content.strip().split('\n'):
        # Skip comments and empty lines
        line = line.split('#')[0].strip()
        if not line or '=' not in line:
            continue
        
        # Split on first =
        key, value = line.split('=', 1)
        key = key.strip()
        
        # Use shlex to parse the value
        try:
            tokens = shlex.split(value.strip())
            config[key] = tokens[0] if len(tokens) == 1 else tokens
        except ValueError:
            config[key] = value.strip()
    
    return config
 
print(parse_config(config_content))
# {'DB_HOST': 'localhost', 'DB_PORT': '5432', 'DB_NAME': 'my database'}

Lexer for Custom Parsing

import shlex
 
# Create lexer for fine-grained control
lexer = shlex.shlex('command --flag="value with spaces"', posix=True)
lexer.whitespace_split = True
lexer.commenters = ''
 
tokens = list(lexer)
print(tokens)
# ['command', '--flag=value with spaces']

Handling POSIX vs Non-POSIX

import shlex
 
cmd = "echo 'hello'"
 
# POSIX mode (default): strips quotes
tokens = shlex.split(cmd, posix=True)
print(tokens)  # ['echo', 'hello']
 
# Non-POSIX: preserves quotes
tokens = shlex.split(cmd, posix=False)
print(tokens)  # ['echo', "'hello'"]

Stream Processing

Process large files without loading into memory:

import shlex
 
def parse_command_file(path: str):
    """Parse file of shell commands."""
    with open(path) as f:
        lexer = shlex.shlex(f, posix=True)
        lexer.whitespace_split = True
        
        current_command = []
        for token in lexer:
            if token == '\n':
                if current_command:
                    yield current_command
                    current_command = []
            else:
                current_command.append(token)
        
        if current_command:
            yield current_command
 
for cmd in parse_command_file('commands.sh'):
    print(cmd)

Custom Lexer Configuration

import shlex
 
text = 'key1=value1, key2="quoted value", key3=value3'
 
lexer = shlex.shlex(text, posix=True)
lexer.wordchars += '='          # Include = in words
lexer.whitespace += ','         # Treat , as whitespace
lexer.whitespace_split = True
 
tokens = list(lexer)
print(tokens)
# ['key1=value1', 'key2=quoted value', 'key3=value3']

Join Command List

Reverse of split—create safe command string:

import shlex
 
def join_command(args: list) -> str:
    """Join arguments into shell command string."""
    return ' '.join(shlex.quote(arg) for arg in args)
 
args = ['grep', '-r', 'hello world', '/path/with spaces/']
cmd = join_command(args)
print(cmd)
# "grep -r 'hello world' '/path/with spaces/'"
 
# Roundtrip
assert shlex.split(cmd) == args

Error Handling

import shlex
 
def safe_split(cmd: str) -> list | None:
    """Split command, return None on parse error."""
    try:
        return shlex.split(cmd)
    except ValueError as e:
        print(f"Parse error: {e}")
        return None
 
# Unclosed quote
safe_split('echo "hello')  # Parse error: No closing quotation
 
# Valid
safe_split('echo "hello"')  # ['echo', 'hello']

Environment Variable Expansion

import shlex
import os
 
def expand_vars(cmd: str) -> str:
    """Expand environment variables in command."""
    return os.path.expandvars(cmd)
 
# With shlex
os.environ['HOME'] = '/home/user'
cmd = 'ls $HOME/documents'
 
expanded = expand_vars(cmd)
tokens = shlex.split(expanded)
print(tokens)
# ['ls', '/home/user/documents']

Command Building Pattern

import shlex
from typing import List
 
class CommandBuilder:
    """Build shell commands safely."""
    
    def __init__(self, program: str):
        self.parts = [program]
    
    def arg(self, value: str) -> 'CommandBuilder':
        self.parts.append(value)
        return self
    
    def flag(self, name: str, value: str = None) -> 'CommandBuilder':
        self.parts.append(name)
        if value is not None:
            self.parts.append(value)
        return self
    
    def to_list(self) -> List[str]:
        return self.parts.copy()
    
    def to_string(self) -> str:
        return ' '.join(shlex.quote(p) for p in self.parts)
 
# Usage
cmd = (CommandBuilder('rsync')
       .flag('-avz')
       .flag('--exclude', '*.tmp')
       .arg('/source/path/')
       .arg('user@host:/dest/'))
 
print(cmd.to_list())
# ['rsync', '-avz', '--exclude', '*.tmp', '/source/path/', 'user@host:/dest/']
 
print(cmd.to_string())
# "rsync -avz --exclude '*.tmp' /source/path/ user@host:/dest/"

Parsing Quoted Strings in Data

import shlex
import csv
from io import StringIO
 
# Parse CSV-like data with shell quoting
data = '''
name,command
"backup job","rsync -av '/data/my files/' /backup/"
"cleanup","rm -f '*.tmp'"
'''
 
def parse_commands(data: str) -> dict:
    """Parse data with shell-quoted commands."""
    reader = csv.DictReader(StringIO(data.strip()))
    
    return {
        row['name']: shlex.split(row['command'])
        for row in reader
    }
 
commands = parse_commands(data)
print(commands)
# {'backup job': ['rsync', '-av', '/data/my files/', '/backup/'],
#  'cleanup': ['rm', '-f', '*.tmp']}

The shlex module is essential for safely handling shell commands. Always use shlex.quote() when building commands with user input, and shlex.split() instead of naive string splitting.

React to this post: