60 MINS advanced
17. Operator Toolkit
Module 17: Tooling
Debugging, Logging, Testing, and Profiling Automation Systems
Production automation systems break in ways that development environments never reveal. A scraper that works perfectly for 100 URLs starts failing silently at 10,000. A bot that handles normal messages breaks on edge case inputs. A pipeline that runs fine on your laptop crashes in Docker. This module covers the professional tooling practices that turn debugging from frustration into methodology.
๐ Structured Logging with Loguru
from loguru import logger
import sys
from pathlib import Path
from functools import wraps
import time
import uuid
# Configure loguru for production
logger.remove() # remove default handler
# Console handler โ human readable
logger.add(
sys.stdout,
format='{time:HH:mm:ss} | {level: <8} | {name} :{line} โ {message}',
level='DEBUG',
colorize=True
)
# File handler โ structured for analysis
logger.add(
'logs/automation_{time:YYYY-MM-DD}.log',
format='{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {name}:{function}:{line} | {message}',
rotation='00:00', # rotate at midnight
retention='30 days',
compression='gz', # compress old logs
level='INFO'
)
# Error-only handler for alerting
logger.add(
'logs/errors.log',
level='ERROR',
rotation='10 MB'
)
def with_logging(operation: str):
'''Decorator that logs entry, exit, duration, and any exceptions'''
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
correlation_id = str(uuid.uuid4())[:8]
log = logger.bind(operation=operation, correlation_id=correlation_id)
log.info(f'Starting {operation}')
start = time.time()
try:
result = func(*args, **kwargs)
duration = time.time() - start
log.success(f'Completed {operation} in {duration:.3f}s')
return result
except Exception as e:
duration = time.time() - start
log.exception(f'Failed {operation} after {duration:.3f}s: {type(e).__name__}: {e}')
raise
return wrapper
return decorator
@with_logging('scrape_product_page')
def scrape_product(url: str) -> dict:
# scraping logic
return {}๐งช Testing Automation Code
import pytest from pathlib import Path from unittest.mock import Mock, patch, MagicMock from bs4 import BeautifulSoup # Test HTML fixtures SAMPLE_PRODUCT_HTML = ''' ''' # Test data validators โ pure functions, easy to test def test_clean_price(): from capstone.processor import DataValidator assert DataValidator.clean_price('$29.99') == 29.99 assert DataValidator.clean_price('1,299.00') == 1299.00 assert DataValidator.clean_price('ยฃ45') == 45.0 assert DataValidator.clean_price('') is None assert DataValidator.clean_price('N/A') is None def test_parse_product(): from capstone.scraper import ProductScraper soup = BeautifulSoup(SAMPLE_PRODUCT_HTML, 'lxml') card = soup.find('div', class_='product-card') scraper = ProductScraper(base_url='https://example.com') product = scraper.parse_product(card) assert product is not None assert product.name == 'Test Product Name' assert product.price == 29.99 assert product.url == 'https://example.com/products/test-product' # Mock HTTP requests in tests @patch('requests.Session.get') def test_scraper_handles_404(mock_get): from requests import HTTPError from capstone.scraper import EcommerceScraper mock_response = Mock() mock_response.status_code = 404 mock_response.raise_for_status.side_effect = HTTPError('404 Not Found') mock_get.return_value = mock_response scraper = EcommerceScraper() result = scraper.get_page('https://example.com/nonexistent') assert result is None # should handle gracefully, not raise # Integration test with Playwright (marks as slow) @pytest.mark.slow def test_browser_loads_page(): from playwright.sync_api import sync_playwright with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() page.goto('https://books.toscrape.com') assert page.title() == 'All products | Books to Scrape - Sandbox' browser.close()
๐ Profiling and Performance
import cProfile
import pstats
import io
from memory_profiler import profile as memory_profile
import time
from functools import wraps
def profile_function(func):
@wraps(func)
def wrapper(*args, **kwargs):
pr = cProfile.Profile()
pr.enable()
result = func(*args, **kwargs)
pr.disable()
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats(20) # top 20 functions
print(s.getvalue())
return result
return wrapper
# Memory profiling for large scrapers
@memory_profile
def scrape_large_dataset(urls: list):
results = []
for url in urls:
# Each iteration's memory usage is tracked
data = {}
results.append(data)
return results # notice if results grows unboundedly
# Timing decorator for specific operations
def timeit(func):
@wraps(func)
def wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
elapsed = time.perf_counter() - start
logger.debug(f'{func.__name__} took {elapsed*1000:.2f}ms')
return result
return wrapper๐ Common Production Failure Modes and Fixes
- Memory Leak: Accumulating results in a list without writing to disk. Fix: batch write every N records and clear the list.
- Connection Pool Exhaustion: Creating new requests.Session() per request instead of sharing one session. Fix: use a single shared session per worker.
- Silent Data Loss: Using bare except clauses that catch exceptions and do nothing. Fix: always log exceptions with full traceback.
- Infinite Wait: Network request with no timeout. Fix: always set timeout= on every requests call and playwright page.goto call.
- Stale Session Cookies: Loading saved cookies from last week. Fix: validate cookies on load and refresh if expired.
- Race Condition: Two workers processing the same URL. Fix: use atomic Redis operations (zpopmax) for task distribution.
Knowledge Check
Ready to test your understanding of 17. Operator Toolkit?