Generators and iterators are foundational to Python. Understanding them unlocks memory-efficient data processing and elegant code patterns. Here's everything you need to know.
The Iteration Protocol
Python's iteration is built on two methods: __iter__ and __next__.
class Counter:
"""A simple iterator that counts up to a maximum."""
def __init__(self, max_value):
self.max_value = max_value
self.current = 0
def __iter__(self):
return self
def __next__(self):
if self.current >= self.max_value:
raise StopIteration
self.current += 1
return self.current
# Use it in a for loop
for num in Counter(5):
print(num) # 1, 2, 3, 4, 5How it works:
forcalls__iter__()to get an iterator- It repeatedly calls
__next__()to get values StopIterationsignals the end
You can also iterate manually:
counter = Counter(3)
iterator = iter(counter) # Calls __iter__
print(next(iterator)) # 1
print(next(iterator)) # 2
print(next(iterator)) # 3
print(next(iterator)) # Raises StopIterationIterables vs Iterators
Iterable: Has __iter__, returns an iterator (lists, strings, dicts)
Iterator: Has both __iter__ and __next__, tracks position
# A list is iterable but not an iterator
my_list = [1, 2, 3]
print(hasattr(my_list, '__iter__')) # True
print(hasattr(my_list, '__next__')) # False
# Get an iterator from the list
list_iter = iter(my_list)
print(hasattr(list_iter, '__next__')) # TrueGenerator Functions
Writing iterator classes is verbose. Generator functions are the Pythonic solution—they create iterators automatically.
def counter(max_value):
"""Same behavior as the Counter class, but simpler."""
current = 1
while current <= max_value:
yield current
current += 1
for num in counter(5):
print(num) # 1, 2, 3, 4, 5The yield keyword:
- Pauses the function and returns a value
- Resumes where it left off on the next call
- Function state is preserved between yields
How Generators Work
def simple_gen():
print("Starting")
yield 1
print("After first yield")
yield 2
print("After second yield")
yield 3
print("Ending")
gen = simple_gen() # Nothing happens yet
print(next(gen)) # "Starting" then 1
print(next(gen)) # "After first yield" then 2
print(next(gen)) # "After second yield" then 3
print(next(gen)) # "Ending" then StopIterationThe function is "frozen" at each yield, resuming exactly where it stopped.
Returning from Generators
def limited_gen():
yield 1
yield 2
return "Done!" # Sets StopIteration.value
gen = limited_gen()
print(next(gen)) # 1
print(next(gen)) # 2
try:
next(gen)
except StopIteration as e:
print(e.value) # "Done!"Generator Expressions
Like list comprehensions, but lazy:
# List comprehension - creates full list in memory
squares_list = [x**2 for x in range(1000000)]
# Generator expression - creates values on demand
squares_gen = (x**2 for x in range(1000000))Generator expressions use parentheses instead of brackets:
# Syntax: (expression for item in iterable if condition)
evens = (x for x in range(100) if x % 2 == 0)
# Iterate through them
for num in evens:
print(num) # 0, 2, 4, 6, ...You can drop the parentheses inside function calls:
# Both work
sum((x**2 for x in range(100)))
sum(x**2 for x in range(100)) # CleanerMemory Efficiency
This is why generators matter. They process one item at a time instead of loading everything into memory.
import sys
# List: stores all values
list_comp = [x for x in range(1000000)]
print(sys.getsizeof(list_comp)) # ~8,000,000 bytes
# Generator: stores only the state
gen_exp = (x for x in range(1000000))
print(sys.getsizeof(gen_exp)) # ~200 bytesSame computation, vastly different memory footprint.
Processing Large Files
def read_large_file(filepath):
"""Read a file line by line without loading it all."""
with open(filepath) as f:
for line in f:
yield line.strip()
def process_logs(filepath):
"""Process log entries one at a time."""
for line in read_large_file(filepath):
if "ERROR" in line:
yield line
# Process a 10GB log file with minimal memory
for error in process_logs("/var/log/huge.log"):
print(error)The file is read lazily—only one line is in memory at a time.
Chaining Generators
Build processing pipelines:
def read_lines(filepath):
with open(filepath) as f:
for line in f:
yield line
def strip_lines(lines):
for line in lines:
yield line.strip()
def filter_comments(lines):
for line in lines:
if not line.startswith("#"):
yield line
def parse_csv(lines):
for line in lines:
yield line.split(",")
# Chain them together
pipeline = parse_csv(
filter_comments(
strip_lines(
read_lines("data.csv")
)
)
)
for row in pipeline:
print(row)Each step processes one item before passing it along.
Lazy Evaluation
Generators compute values only when requested. This enables patterns that would be impossible with lists.
Infinite Sequences
def count(start=0, step=1):
"""Count forever."""
n = start
while True:
yield n
n += step
def fibonacci():
"""Generate Fibonacci numbers forever."""
a, b = 0, 1
while True:
yield a
a, b = b, a + b
# Take first 10 Fibonacci numbers
from itertools import islice
first_10 = list(islice(fibonacci(), 10))
# [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]These generators never exhaust—you pull what you need.
Computing on Demand
def expensive_computation(n):
"""Simulate expensive work."""
import time
time.sleep(0.1)
return n ** 2
# With a list: waits for ALL computations
results = [expensive_computation(x) for x in range(100)] # 10 seconds
# With a generator: computes as needed
def lazy_results(n):
for x in range(n):
yield expensive_computation(x)
# Only compute what you use
gen = lazy_results(100)
first_three = [next(gen), next(gen), next(gen)] # 0.3 secondssend, throw, and close
Generators are actually coroutines—they can receive values and handle exceptions.
send() - Send Values Into a Generator
def accumulator():
"""Accumulate values sent to the generator."""
total = 0
while True:
value = yield total
if value is not None:
total += value
acc = accumulator()
next(acc) # Prime the generator, returns 0
print(acc.send(10)) # 10
print(acc.send(5)) # 15
print(acc.send(20)) # 35send(value) resumes the generator with value as the result of yield.
throw() - Inject Exceptions
def careful_gen():
try:
yield 1
yield 2
yield 3
except ValueError:
yield "Caught ValueError!"
yield "Continuing..."
gen = careful_gen()
print(next(gen)) # 1
print(gen.throw(ValueError)) # "Caught ValueError!"
print(next(gen)) # "Continuing..."close() - Stop a Generator
def resource_gen():
try:
yield 1
yield 2
yield 3
finally:
print("Cleaning up!")
gen = resource_gen()
print(next(gen)) # 1
gen.close() # "Cleaning up!"close() raises GeneratorExit at the yield point. Use try/finally for cleanup.
yield from
Delegate to another generator:
def inner():
yield 1
yield 2
def outer():
yield 'a'
yield from inner() # Delegate to inner
yield 'b'
list(outer()) # ['a', 1, 2, 'b']This is cleaner than:
def outer():
yield 'a'
for x in inner():
yield x
yield 'b'Useful for flattening nested structures:
def flatten(nested):
"""Flatten nested lists."""
for item in nested:
if isinstance(item, list):
yield from flatten(item)
else:
yield item
nested = [1, [2, 3, [4, 5]], 6]
list(flatten(nested)) # [1, 2, 3, 4, 5, 6]itertools Highlights
The itertools module provides powerful tools for working with iterators.
Infinite Iterators
from itertools import count, cycle, repeat
# Count from 10 by 2s
for n in count(10, 2):
if n > 20:
break
print(n) # 10, 12, 14, 16, 18, 20
# Cycle through items forever
colors = cycle(['red', 'green', 'blue'])
for _ in range(6):
print(next(colors)) # red, green, blue, red, green, blue
# Repeat a value
list(repeat('hello', 3)) # ['hello', 'hello', 'hello']Slicing Iterators
from itertools import islice
# Get a slice of any iterator
gen = (x**2 for x in range(100))
list(islice(gen, 5, 10)) # [25, 36, 49, 64, 81]
# First n items
from itertools import islice
first_five = list(islice(fibonacci(), 5))Combining Iterators
from itertools import chain, zip_longest
# Chain multiple iterators
combined = chain([1, 2], [3, 4], [5, 6])
list(combined) # [1, 2, 3, 4, 5, 6]
# Zip with fill value
list(zip_longest([1, 2], [3, 4, 5], fillvalue=0))
# [(1, 3), (2, 4), (0, 5)]Filtering
from itertools import filterfalse, takewhile, dropwhile
# Filter items where predicate is False
list(filterfalse(lambda x: x % 2, range(10))) # [0, 2, 4, 6, 8]
# Take while predicate is True
list(takewhile(lambda x: x < 5, [1, 3, 5, 2, 1])) # [1, 3]
# Drop while predicate is True
list(dropwhile(lambda x: x < 5, [1, 3, 5, 2, 1])) # [5, 2, 1]Grouping
from itertools import groupby
data = [
('fruit', 'apple'),
('fruit', 'banana'),
('veggie', 'carrot'),
('veggie', 'broccoli'),
]
for key, group in groupby(data, key=lambda x: x[0]):
print(f"{key}: {list(group)}")
# fruit: [('fruit', 'apple'), ('fruit', 'banana')]
# veggie: [('veggie', 'carrot'), ('veggie', 'broccoli')]Permutations and Combinations
from itertools import permutations, combinations, product
# All orderings
list(permutations([1, 2, 3], 2))
# [(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)]
# Unique combinations
list(combinations([1, 2, 3], 2))
# [(1, 2), (1, 3), (2, 3)]
# Cartesian product
list(product([1, 2], ['a', 'b']))
# [(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')]Practical Examples
Batching Items
from itertools import islice
def batched(iterable, n):
"""Yield batches of n items."""
it = iter(iterable)
while batch := list(islice(it, n)):
yield batch
# Process in chunks
for batch in batched(range(10), 3):
print(batch)
# [0, 1, 2]
# [3, 4, 5]
# [6, 7, 8]
# [9]Sliding Window
from collections import deque
def sliding_window(iterable, n):
"""Yield sliding windows of size n."""
it = iter(iterable)
window = deque(islice(it, n), maxlen=n)
if len(window) == n:
yield tuple(window)
for item in it:
window.append(item)
yield tuple(window)
list(sliding_window([1, 2, 3, 4, 5], 3))
# [(1, 2, 3), (2, 3, 4), (3, 4, 5)]Processing CSV Files
import csv
def process_large_csv(filepath):
"""Process a CSV file row by row."""
with open(filepath) as f:
reader = csv.DictReader(f)
for row in reader:
# Transform each row
yield {
'name': row['name'].upper(),
'value': float(row['value']) * 1.1,
}
# Works on gigabyte files
for record in process_large_csv('huge_data.csv'):
if record['value'] > 100:
print(record)Database Streaming
def fetch_users_in_batches(cursor, batch_size=1000):
"""Fetch database rows in memory-efficient batches."""
while True:
rows = cursor.fetchmany(batch_size)
if not rows:
break
yield from rows
# Process millions of rows without loading them all
for user in fetch_users_in_batches(db_cursor):
process_user(user)Tree Traversal
def traverse_tree(node):
"""Pre-order tree traversal."""
yield node
for child in node.children:
yield from traverse_tree(child)
# Lazy tree traversal
for node in traverse_tree(root):
if node.name == 'target':
breakGenerator Gotchas
Single Use
Generators are exhausted after one iteration:
gen = (x for x in range(3))
print(list(gen)) # [0, 1, 2]
print(list(gen)) # [] - exhausted!
# If you need multiple iterations, use a list or a function
def make_gen():
return (x for x in range(3))Lazy Side Effects
def side_effect_gen():
for i in range(3):
print(f"Processing {i}")
yield i
# Nothing printed yet!
gen = side_effect_gen()
# Now it prints
list(gen) # "Processing 0", "Processing 1", "Processing 2"Variable Capture
# Bug: all lambdas capture the same variable
funcs = [lambda: i for i in range(3)]
[f() for f in funcs] # [2, 2, 2] - all see final value!
# Fix: use default argument
funcs = [lambda i=i: i for i in range(3)]
[f() for f in funcs] # [0, 1, 2]Quick Reference
# Generator function
def gen():
yield value
# Generator expression
(expr for x in iterable if cond)
# Iteration protocol
__iter__() -> returns iterator
__next__() -> returns next value or raises StopIteration
# Generator methods
gen.send(value) # Send value into generator
gen.throw(exc) # Throw exception at yield
gen.close() # Stop generator
# yield from
yield from iterable # Delegate to sub-generator
# Useful itertools
islice(it, stop) # Slice an iterator
chain(it1, it2) # Concatenate iterators
cycle(it) # Repeat forever
takewhile(pred, it) # Take while True
groupby(it, key) # Group by key functionGenerators are one of Python's most powerful features. They enable memory-efficient processing, elegant pipelines, and lazy evaluation patterns. Once you understand them, you'll find uses everywhere.