Regex is powerful but cryptic. Here's how to use it effectively in Python.
Basic Usage
import re
text = "Contact us at hello@example.com"
# Search for pattern
match = re.search(r"\w+@\w+\.\w+", text)
if match:
print(match.group()) # hello@example.comKey Functions
# search - find first match anywhere
re.search(pattern, text)
# match - match at start only
re.match(pattern, text)
# findall - find all matches
re.findall(pattern, text)
# finditer - iterator of match objects
re.finditer(pattern, text)
# sub - replace matches
re.sub(pattern, replacement, text)
# split - split by pattern
re.split(pattern, text)Common Patterns
# Digits
r"\d" # Single digit
r"\d+" # One or more digits
r"\d{3}" # Exactly 3 digits
r"\d{2,4}" # 2 to 4 digits
# Word characters
r"\w" # Letter, digit, underscore
r"\w+" # Word
# Whitespace
r"\s" # Space, tab, newline
r"\s+" # One or more whitespace
# Any character
r"." # Any char except newline
r".*" # Any number of any charsCharacter Classes
r"[aeiou]" # Any vowel
r"[a-z]" # Lowercase letter
r"[A-Z]" # Uppercase letter
r"[0-9]" # Digit
r"[a-zA-Z0-9]" # Alphanumeric
r"[^0-9]" # NOT a digitAnchors
r"^start" # Start of string
r"end$" # End of string
r"\bword\b" # Word boundaryGroups
text = "John Smith, 25 years old"
# Capturing groups
match = re.search(r"(\w+) (\w+), (\d+)", text)
if match:
print(match.group(0)) # Full match
print(match.group(1)) # John
print(match.group(2)) # Smith
print(match.group(3)) # 25
print(match.groups()) # ('John', 'Smith', '25')Named Groups
pattern = r"(?P<first>\w+) (?P<last>\w+), (?P<age>\d+)"
match = re.search(pattern, text)
if match:
print(match.group("first")) # John
print(match.groupdict()) # {'first': 'John', ...}Substitution
text = "Hello World"
# Simple replace
re.sub(r"World", "Python", text) # Hello Python
# With groups
text = "John Smith"
re.sub(r"(\w+) (\w+)", r"\2, \1", text) # Smith, John
# With function
def upper(match):
return match.group(0).upper()
re.sub(r"\w+", upper, "hello world") # HELLO WORLDFlags
# Case insensitive
re.search(r"hello", "HELLO", re.IGNORECASE)
re.search(r"hello", "HELLO", re.I)
# Multiline - ^ and $ match line boundaries
re.findall(r"^\w+", text, re.MULTILINE)
# Dotall - . matches newlines too
re.search(r"start.*end", text, re.DOTALL)
# Verbose - allow comments and whitespace
pattern = re.compile(r"""
\d{3} # Area code
- # Separator
\d{4} # Number
""", re.VERBOSE)Compile for Reuse
# Compile once, use many times
email_pattern = re.compile(r"[\w.-]+@[\w.-]+\.\w+")
emails = email_pattern.findall(text)
is_valid = email_pattern.match(user_input)Common Patterns
r"[\w.-]+@[\w.-]+\.\w+"URL
r"https?://[\w./%-]+"Phone Number
r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"IP Address
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"Date (YYYY-MM-DD)
r"\d{4}-\d{2}-\d{2}"Greedy vs Non-Greedy
text = "<div>content</div>"
# Greedy (default) - matches as much as possible
re.search(r"<.*>", text).group() # <div>content</div>
# Non-greedy - matches as little as possible
re.search(r"<.*?>", text).group() # <div>Add ? after quantifier for non-greedy.
Lookahead and Lookbehind
# Positive lookahead - followed by
r"\d+(?= dollars)" # Matches "50" in "50 dollars"
# Negative lookahead - NOT followed by
r"\d+(?! dollars)" # Matches digits not followed by "dollars"
# Positive lookbehind - preceded by
r"(?<=\$)\d+" # Matches "50" in "$50"
# Negative lookbehind - NOT preceded by
r"(?<!\$)\d+" # Matches digits not preceded by "$"Best Practices
Use raw strings:
# Good
r"\d+\.\d+"
# Bad - need to escape backslashes
"\\d+\\.\\d+"Compile patterns you reuse:
pattern = re.compile(r"\d+")
for text in texts:
pattern.findall(text)Test your patterns:
# Use regex101.com to test patterns
# Write unit tests for complex patternsDon't overuse regex:
# For simple checks, string methods are clearer
if "@" in email: # Better than regex for simple checkWhen Not to Use Regex
- Parsing HTML/XML (use BeautifulSoup)
- Complex nested structures
- When string methods suffice
- When readability matters more than brevity
Regex is powerful but can be cryptic. Use it when it's the right tool.
React to this post: