Regex is powerful but cryptic. Here's how to use it effectively in Python.

Basic Usage

import re
 
text = "Contact us at hello@example.com"
 
# Search for pattern
match = re.search(r"\w+@\w+\.\w+", text)
if match:
    print(match.group())  # hello@example.com

Key Functions

# search - find first match anywhere
re.search(pattern, text)
 
# match - match at start only
re.match(pattern, text)
 
# findall - find all matches
re.findall(pattern, text)
 
# finditer - iterator of match objects
re.finditer(pattern, text)
 
# sub - replace matches
re.sub(pattern, replacement, text)
 
# split - split by pattern
re.split(pattern, text)

Common Patterns

# Digits
r"\d"       # Single digit
r"\d+"      # One or more digits
r"\d{3}"    # Exactly 3 digits
r"\d{2,4}"  # 2 to 4 digits
 
# Word characters
r"\w"       # Letter, digit, underscore
r"\w+"      # Word
 
# Whitespace
r"\s"       # Space, tab, newline
r"\s+"      # One or more whitespace
 
# Any character
r"."        # Any char except newline
r".*"       # Any number of any chars

Character Classes

r"[aeiou]"      # Any vowel
r"[a-z]"        # Lowercase letter
r"[A-Z]"        # Uppercase letter
r"[0-9]"        # Digit
r"[a-zA-Z0-9]"  # Alphanumeric
r"[^0-9]"       # NOT a digit

Anchors

r"^start"   # Start of string
r"end$"     # End of string
r"\bword\b" # Word boundary

Groups

text = "John Smith, 25 years old"
 
# Capturing groups
match = re.search(r"(\w+) (\w+), (\d+)", text)
if match:
    print(match.group(0))  # Full match
    print(match.group(1))  # John
    print(match.group(2))  # Smith
    print(match.group(3))  # 25
    print(match.groups())  # ('John', 'Smith', '25')

Named Groups

pattern = r"(?P<first>\w+) (?P<last>\w+), (?P<age>\d+)"
match = re.search(pattern, text)
if match:
    print(match.group("first"))  # John
    print(match.groupdict())     # {'first': 'John', ...}

Substitution

text = "Hello World"
 
# Simple replace
re.sub(r"World", "Python", text)  # Hello Python
 
# With groups
text = "John Smith"
re.sub(r"(\w+) (\w+)", r"\2, \1", text)  # Smith, John
 
# With function
def upper(match):
    return match.group(0).upper()
 
re.sub(r"\w+", upper, "hello world")  # HELLO WORLD

Flags

# Case insensitive
re.search(r"hello", "HELLO", re.IGNORECASE)
re.search(r"hello", "HELLO", re.I)
 
# Multiline - ^ and $ match line boundaries
re.findall(r"^\w+", text, re.MULTILINE)
 
# Dotall - . matches newlines too
re.search(r"start.*end", text, re.DOTALL)
 
# Verbose - allow comments and whitespace
pattern = re.compile(r"""
    \d{3}   # Area code
    -       # Separator
    \d{4}   # Number
""", re.VERBOSE)

Compile for Reuse

# Compile once, use many times
email_pattern = re.compile(r"[\w.-]+@[\w.-]+\.\w+")
 
emails = email_pattern.findall(text)
is_valid = email_pattern.match(user_input)

Common Patterns

Email

r"[\w.-]+@[\w.-]+\.\w+"

URL

r"https?://[\w./%-]+"

Phone Number

r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"

IP Address

r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"

Date (YYYY-MM-DD)

r"\d{4}-\d{2}-\d{2}"

Greedy vs Non-Greedy

text = "<div>content</div>"
 
# Greedy (default) - matches as much as possible
re.search(r"<.*>", text).group()   # <div>content</div>
 
# Non-greedy - matches as little as possible
re.search(r"<.*?>", text).group()  # <div>

Add ? after quantifier for non-greedy.

Lookahead and Lookbehind

# Positive lookahead - followed by
r"\d+(?= dollars)"  # Matches "50" in "50 dollars"
 
# Negative lookahead - NOT followed by
r"\d+(?! dollars)"  # Matches digits not followed by "dollars"
 
# Positive lookbehind - preceded by
r"(?<=\$)\d+"       # Matches "50" in "$50"
 
# Negative lookbehind - NOT preceded by
r"(?<!\$)\d+"       # Matches digits not preceded by "$"

Best Practices

Use raw strings:

# Good
r"\d+\.\d+"
 
# Bad - need to escape backslashes
"\\d+\\.\\d+"

Compile patterns you reuse:

pattern = re.compile(r"\d+")
for text in texts:
    pattern.findall(text)

Test your patterns:

# Use regex101.com to test patterns
# Write unit tests for complex patterns

Don't overuse regex:

# For simple checks, string methods are clearer
if "@" in email:  # Better than regex for simple check

When Not to Use Regex

  • Parsing HTML/XML (use BeautifulSoup)
  • Complex nested structures
  • When string methods suffice
  • When readability matters more than brevity

Regex is powerful but can be cryptic. Use it when it's the right tool.

React to this post: