75 MINS intermediate
6. Task Automation Systems
Module 06: Task Automation
CLI Tools, Email Automation, and System Tasks
Beyond web scraping, automation engineers build tools that orchestrate work at the operating system level โ sending emails, processing files in bulk, building CLI utilities, monitoring systems, and integrating with cloud storage. These are the unglamorous workhorses that save enterprises thousands of hours annually.
๐ฅ๏ธ Building Professional CLI Tools
A CLI tool with proper arguments, help text, and error handling is dramatically more useful than a script with hardcoded values. typer is the modern standard for Python CLI development:
import typer
from pathlib import Path
from typing import Optional
from rich.console import Console
from rich.progress import track
from rich.table import Table
app = typer.Typer(help='VoidX Automation CLI โ Operator-grade task runner')
console = Console()
@app.command()
def scrape(
url: str = typer.Argument(..., help='Target URL to scrape'),
output: Path = typer.Option('./output', '--output', '-o', help='Output directory'),
pages: int = typer.Option(1, '--pages', '-p', help='Number of pages to scrape'),
delay: float = typer.Option(1.5, '--delay', '-d', help='Delay between requests (seconds)'),
format: str = typer.Option('csv', '--format', '-f', help='Output format: csv, json, sqlite'),
verbose: bool = typer.Option(False, '--verbose', '-v')
):
output.mkdir(parents=True, exist_ok=True)
console.print(f'[cyan]Scraping:[/cyan] {url}')
console.print(f'[cyan]Pages:[/cyan] {pages} | [cyan]Delay:[/cyan] {delay}s | [cyan]Format:[/cyan] {format}')
results = []
for page in track(range(1, pages + 1), description='Scraping...'):
# scraping logic here
results.append({'page': page, 'url': f'{url}?page={page}'})
table = Table(title='Scrape Results')
table.add_column('Page', style='cyan')
table.add_column('URL')
for r in results:
table.add_row(str(r['page']), r['url'])
console.print(table)
@app.command()
def monitor(
url: str = typer.Argument(..., help='URL to monitor for changes'),
interval: int = typer.Option(300, help='Check interval in seconds'),
notify: Optional[str] = typer.Option(None, help='Email to notify on change')
):
console.print(f'[green]Monitoring:[/green] {url} every {interval}s')
if __name__ == '__main__':
app()๐ง Professional Email Automation
import smtplib
import ssl
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from pathlib import Path
from typing import List, Optional
from dataclasses import dataclass
import os
@dataclass
class EmailConfig:
smtp_host: str = 'smtp.gmail.com'
smtp_port: int = 465
sender_email: str = os.environ['EMAIL_ADDRESS']
sender_password: str = os.environ['EMAIL_PASSWORD']
class EmailAutomation:
def __init__(self, config: EmailConfig = None):
self.config = config or EmailConfig()
def send(
self,
to: List[str],
subject: str,
html_body: str,
attachments: Optional[List[Path]] = None,
cc: Optional[List[str]] = None
) -> bool:
msg = MIMEMultipart('alternative')
msg['From'] = self.config.sender_email
msg['To'] = ', '.join(to)
msg['Subject'] = subject
if cc:
msg['Cc'] = ', '.join(cc)
msg.attach(MIMEText(html_body, 'html'))
if attachments:
for file_path in attachments:
with open(file_path, 'rb') as f:
part = MIMEApplication(f.read(), Name=file_path.name)
part['Content-Disposition'] = f'attachment; filename="{file_path.name}"'
msg.attach(part)
context = ssl.create_default_context()
try:
with smtplib.SMTP_SSL(self.config.smtp_host, self.config.smtp_port, context=context) as server:
server.login(self.config.sender_email, self.config.sender_password)
all_recipients = to + (cc or [])
server.sendmail(self.config.sender_email, all_recipients, msg.as_string())
return True
except Exception as e:
print(f'Email failed: {e}')
return False
# Usage
mailer = EmailAutomation()
daily_report_html = '''
Daily Scrape Report
Today's automation run completed successfully.
| Metric | Value |
|---|---|
| Items Scraped | 1,247 |
| Errors | 3 |
| Duration | 4m 32s |
๐๏ธ Bulk File Processing
import asyncio
import aiofiles
from pathlib import Path
from typing import Callable, List
from concurrent.futures import ProcessPoolExecutor
import csv
import json
# CPU-bound: use ProcessPoolExecutor
def process_csv_file(file_path: Path) -> dict:
results = []
with open(file_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
# Heavy data transformation here
row['processed'] = True
results.append(row)
return {'file': str(file_path.name), 'rows': len(results), 'data': results}
async def bulk_process_files(directory: Path, pattern: str = '*.csv', max_workers: int = 4):
files = list(directory.glob(pattern))
print(f'Processing {len(files)} files with {max_workers} workers')
loop = asyncio.get_event_loop()
with ProcessPoolExecutor(max_workers=max_workers) as executor:
tasks = [loop.run_in_executor(executor, process_csv_file, f) for f in files]
results = await asyncio.gather(*tasks)
# Aggregate all results
all_rows = []
for result in results:
all_rows.extend(result['data'])
output = Path('merged_output.json')
async with aiofiles.open(output, 'w') as f:
await f.write(json.dumps(all_rows, indent=2))
print(f'Merged {len(all_rows)} total rows into {output}')
return all_rows
asyncio.run(bulk_process_files(Path('./data/'), pattern='*.csv', max_workers=4))Knowledge Check
Ready to test your understanding of 6. Task Automation Systems?