Generators & Iterators
Checking access...
Generators produce values on demand — they don’t store the entire sequence in memory.
Generator Functions
def count_up_to(n): """Generator that yields numbers from 1 to n.""" i = 1 while i <= n: yield i i += 1
# Using the generatorcounter = count_up_to(5)print(next(counter)) # 1print(next(counter)) # 2print(next(counter)) # 3
# Iterate over remainingfor num in counter: print(num) # 4, 5
# Convert to list (defeats the memory benefit)numbers = list(count_up_to(10))Generator Expressions
# Generator expression — like list comprehension but lazysquares = (x ** 2 for x in range(10))
print(next(squares)) # 0print(next(squares)) # 1
# More memory efficient than list comprehensionsum(x ** 2 for x in range(1000000)) # No intermediate listYield vs Return
def lazy_sequence(): """Generator — can yield multiple values.""" yield 1 yield 2 yield 3
def regular_sequence(): """Function — returns once.""" return [1, 2, 3]
# Generator produces values on demandgen = lazy_sequence()for value in gen: print(value)
# After iteration, generator is exhaustedprint(list(gen)) # []
# Function returns all at onceresult = regular_sequence()for value in result: print(value)Generator Use Cases
Reading Large Files
def read_large_file(filepath): """Read file line by line without loading entire file.""" with open(filepath) as f: for line in f: yield line.strip()
# Memory efficient — one line in memory at a timefor line in read_large_file("huge_file.txt"): process(line)Infinite Sequences
def fibonacci(): """Infinite Fibonacci sequence generator.""" a, b = 0, 1 while True: yield a a, b = b, a + b
# Take what you needfib = fibonacci()first_10 = [next(fib) for _ in range(10)]print(first_10) # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
# Get first Fibonacci > 1000fib = fibonacci()for num in fib: if num > 1000: print(num) # 1597 breakPipeline Pattern
def read_lines(filename): with open(filename) as f: for line in f: yield line
def filter_lines(lines, pattern): import re for line in lines: if re.search(pattern, line): yield line
def strip_lines(lines): for line in lines: yield line.strip()
def capitalize_lines(lines): for line in lines: yield line.upper()
# Pipeline — each step is lazypipeline = capitalize_lines( strip_lines( filter_lines( read_lines("data.txt"), "ERROR" ) ))
for line in pipeline: print(line)yield from — Delegating to Sub-generators
def gen1(): yield 1 yield 2
def gen2(): yield 3 yield 4
def combined(): yield from gen1() yield from gen2()
print(list(combined())) # [1, 2, 3, 4]
# Flatten nested iterablesdef flatten(nested): for item in nested: if isinstance(item, (list, tuple)): yield from flatten(item) else: yield item
nested = [1, [2, [3, 4], 5], 6]print(list(flatten(nested))) # [1, 2, 3, 4, 5, 6]Custom Iterators
class CountDown: """Custom iterator implementing the iterator protocol."""
def __init__(self, start): self.current = start
def __iter__(self): return self # Iterators return themselves
def __next__(self): if self.current <= 0: raise StopIteration value = self.current self.current -= 1 return value
for n in CountDown(5): print(n) # 5, 4, 3, 2, 1
# You can also make any class iterableclass Range: """Iterable (not iterator) — returns new iterator each time."""
def __init__(self, start, end): self.start = start self.end = end
def __iter__(self): return RangeIterator(self.start, self.end)
class RangeIterator: def __init__(self, start, end): self.current = start self.end = end
def __iter__(self): return self
def __next__(self): if self.current >= self.end: raise StopIteration value = self.current self.current += 1 return valueitertools with Generators
from itertools import islice, takewhile, dropwhile
# Take first n items from infinite generatorfib = fibonacci()first_20 = list(islice(fib, 20))
# Take while condition is truefib = fibonacci()under_100 = list(takewhile(lambda x: x < 100, fib))
# Skip while condition is truedata = [1, 2, 3, 4, 5, 1, 2]result = list(dropwhile(lambda x: x < 4, data))# [4, 5, 1, 2]Generator.send() and .close()
def echo(): """Generator that receives and yields values.""" while True: received = yield print(f"Received: {received}")
gen = echo()next(gen) # Prime the generatorgen.send("hello") # Received: hellogen.send("world") # Received: worldgen.close() # GeneratorExit raised inside generatorKey Takeaways
- Generators use
yieldinstead ofreturn— they produce values lazily - Generator expressions:
(x for x in iterable)— similar to list comprehensions but lazy - Generators are memory-efficient for large/infinite sequences
yield fromdelegates to sub-generators (useful for flattening)- Pipeline pattern: chain generators for memory-efficient data processing
- Custom iterators implement
__iter__and__next__ itertools.islicetakes slices of infinite generators- Generators can receive values with
.send()and be terminated with.close()