Automation & Bots | VoidX Academy

6. Task Automation Systems

Module 06: Task Automation

CLI Tools, Email Automation, and System Tasks

Beyond web scraping, automation engineers build tools that orchestrate work at the operating system level — sending emails, processing files in bulk, building CLI utilities, monitoring systems, and integrating with cloud storage. These are the unglamorous workhorses that save enterprises thousands of hours annually.

🖥️ Building Professional CLI Tools

A CLI tool with proper arguments, help text, and error handling is dramatically more useful than a script with hardcoded values. typer is the modern standard for Python CLI development:

import typer
from pathlib import Path
from typing import Optional
from rich.console import Console
from rich.progress import track
from rich.table import Table

app = typer.Typer(help='VoidX Automation CLI — Operator-grade task runner')
console = Console()

@app.command()
def scrape(
    url: str = typer.Argument(..., help='Target URL to scrape'),
    output: Path = typer.Option('./output', '--output', '-o', help='Output directory'),
    pages: int = typer.Option(1, '--pages', '-p', help='Number of pages to scrape'),
    delay: float = typer.Option(1.5, '--delay', '-d', help='Delay between requests (seconds)'),
    format: str = typer.Option('csv', '--format', '-f', help='Output format: csv, json, sqlite'),
    verbose: bool = typer.Option(False, '--verbose', '-v')
):
    output.mkdir(parents=True, exist_ok=True)
    console.print(f'[cyan]Scraping:[/cyan] {url}')
    console.print(f'[cyan]Pages:[/cyan] {pages} | [cyan]Delay:[/cyan] {delay}s | [cyan]Format:[/cyan] {format}')
    
    results = []
    for page in track(range(1, pages + 1), description='Scraping...'):
        # scraping logic here
        results.append({'page': page, 'url': f'{url}?page={page}'})
    
    table = Table(title='Scrape Results')
    table.add_column('Page', style='cyan')
    table.add_column('URL')
    for r in results:
        table.add_row(str(r['page']), r['url'])
    console.print(table)

@app.command()
def monitor(
    url: str = typer.Argument(..., help='URL to monitor for changes'),
    interval: int = typer.Option(300, help='Check interval in seconds'),
    notify: Optional[str] = typer.Option(None, help='Email to notify on change')
):
    console.print(f'[green]Monitoring:[/green] {url} every {interval}s')

if __name__ == '__main__':
    app()

📧 Professional Email Automation

import smtplib
import ssl
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from pathlib import Path
from typing import List, Optional
from dataclasses import dataclass
import os

@dataclass
class EmailConfig:
    smtp_host: str = 'smtp.gmail.com'
    smtp_port: int = 465
    sender_email: str = os.environ['EMAIL_ADDRESS']
    sender_password: str = os.environ['EMAIL_PASSWORD']

class EmailAutomation:
    def __init__(self, config: EmailConfig = None):
        self.config = config or EmailConfig()
    
    def send(
        self,
        to: List[str],
        subject: str,
        html_body: str,
        attachments: Optional[List[Path]] = None,
        cc: Optional[List[str]] = None
    ) -> bool:
        msg = MIMEMultipart('alternative')
        msg['From'] = self.config.sender_email
        msg['To'] = ', '.join(to)
        msg['Subject'] = subject
        if cc:
            msg['Cc'] = ', '.join(cc)
        
        msg.attach(MIMEText(html_body, 'html'))
        
        if attachments:
            for file_path in attachments:
                with open(file_path, 'rb') as f:
                    part = MIMEApplication(f.read(), Name=file_path.name)
                    part['Content-Disposition'] = f'attachment; filename="{file_path.name}"'
                    msg.attach(part)
        
        context = ssl.create_default_context()
        try:
            with smtplib.SMTP_SSL(self.config.smtp_host, self.config.smtp_port, context=context) as server:
                server.login(self.config.sender_email, self.config.sender_password)
                all_recipients = to + (cc or [])
                server.sendmail(self.config.sender_email, all_recipients, msg.as_string())
            return True
        except Exception as e:
            print(f'Email failed: {e}')
            return False

# Usage
mailer = EmailAutomation()

daily_report_html = '''

Daily Scrape Report
Today's automation run completed successfully.

  Metric Value
  Items Scraped 1,247
  Errors 3
  Duration 4m 32s


'''

mailer.send(
    to=['team@company.com'],
    subject='[AutoBot] Daily Report — 2024-01-15',
    html_body=daily_report_html,
    attachments=[Path('output/results.csv')]
)

Metric	Value
Items Scraped	1,247
Errors	3
Duration	4m 32s

🗂️ Bulk File Processing

import asyncio
import aiofiles
from pathlib import Path
from typing import Callable, List
from concurrent.futures import ProcessPoolExecutor
import csv
import json

# CPU-bound: use ProcessPoolExecutor
def process_csv_file(file_path: Path) -> dict:
    results = []
    with open(file_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            # Heavy data transformation here
            row['processed'] = True
            results.append(row)
    return {'file': str(file_path.name), 'rows': len(results), 'data': results}

async def bulk_process_files(directory: Path, pattern: str = '*.csv', max_workers: int = 4):
    files = list(directory.glob(pattern))
    print(f'Processing {len(files)} files with {max_workers} workers')
    
    loop = asyncio.get_event_loop()
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        tasks = [loop.run_in_executor(executor, process_csv_file, f) for f in files]
        results = await asyncio.gather(*tasks)
    
    # Aggregate all results
    all_rows = []
    for result in results:
        all_rows.extend(result['data'])
    
    output = Path('merged_output.json')
    async with aiofiles.open(output, 'w') as f:
        await f.write(json.dumps(all_rows, indent=2))
    
    print(f'Merged {len(all_rows)} total rows into {output}')
    return all_rows

asyncio.run(bulk_process_files(Path('./data/'), pattern='*.csv', max_workers=4))

6. Task Automation Systems

Module 06: Task Automation

CLI Tools, Email Automation, and System Tasks

🖥️ Building Professional CLI Tools

A CLI tool with proper arguments, help text, and error handling is dramatically more useful than a script with hardcoded values. typer is the modern standard for Python CLI development:

import typer
from pathlib import Path
from typing import Optional
from rich.console import Console
from rich.progress import track
from rich.table import Table

app = typer.Typer(help='VoidX Automation CLI — Operator-grade task runner')
console = Console()

@app.command()
def scrape(
    url: str = typer.Argument(..., help='Target URL to scrape'),
    output: Path = typer.Option('./output', '--output', '-o', help='Output directory'),
    pages: int = typer.Option(1, '--pages', '-p', help='Number of pages to scrape'),
    delay: float = typer.Option(1.5, '--delay', '-d', help='Delay between requests (seconds)'),
    format: str = typer.Option('csv', '--format', '-f', help='Output format: csv, json, sqlite'),
    verbose: bool = typer.Option(False, '--verbose', '-v')
):
    output.mkdir(parents=True, exist_ok=True)
    console.print(f'[cyan]Scraping:[/cyan] {url}')
    console.print(f'[cyan]Pages:[/cyan] {pages} | [cyan]Delay:[/cyan] {delay}s | [cyan]Format:[/cyan] {format}')
    
    results = []
    for page in track(range(1, pages + 1), description='Scraping...'):
        # scraping logic here
        results.append({'page': page, 'url': f'{url}?page={page}'})
    
    table = Table(title='Scrape Results')
    table.add_column('Page', style='cyan')
    table.add_column('URL')
    for r in results:
        table.add_row(str(r['page']), r['url'])
    console.print(table)

@app.command()
def monitor(
    url: str = typer.Argument(..., help='URL to monitor for changes'),
    interval: int = typer.Option(300, help='Check interval in seconds'),
    notify: Optional[str] = typer.Option(None, help='Email to notify on change')
):
    console.print(f'[green]Monitoring:[/green] {url} every {interval}s')

if __name__ == '__main__':
    app()

📧 Professional Email Automation

import smtplib
import ssl
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from pathlib import Path
from typing import List, Optional
from dataclasses import dataclass
import os

@dataclass
class EmailConfig:
    smtp_host: str = 'smtp.gmail.com'
    smtp_port: int = 465
    sender_email: str = os.environ['EMAIL_ADDRESS']
    sender_password: str = os.environ['EMAIL_PASSWORD']

class EmailAutomation:
    def __init__(self, config: EmailConfig = None):
        self.config = config or EmailConfig()
    
    def send(
        self,
        to: List[str],
        subject: str,
        html_body: str,
        attachments: Optional[List[Path]] = None,
        cc: Optional[List[str]] = None
    ) -> bool:
        msg = MIMEMultipart('alternative')
        msg['From'] = self.config.sender_email
        msg['To'] = ', '.join(to)
        msg['Subject'] = subject
        if cc:
            msg['Cc'] = ', '.join(cc)
        
        msg.attach(MIMEText(html_body, 'html'))
        
        if attachments:
            for file_path in attachments:
                with open(file_path, 'rb') as f:
                    part = MIMEApplication(f.read(), Name=file_path.name)
                    part['Content-Disposition'] = f'attachment; filename="{file_path.name}"'
                    msg.attach(part)
        
        context = ssl.create_default_context()
        try:
            with smtplib.SMTP_SSL(self.config.smtp_host, self.config.smtp_port, context=context) as server:
                server.login(self.config.sender_email, self.config.sender_password)
                all_recipients = to + (cc or [])
                server.sendmail(self.config.sender_email, all_recipients, msg.as_string())
            return True
        except Exception as e:
            print(f'Email failed: {e}')
            return False

# Usage
mailer = EmailAutomation()

daily_report_html = '''

Daily Scrape Report
Today's automation run completed successfully.

  Metric Value
  Items Scraped 1,247
  Errors 3
  Duration 4m 32s


'''

mailer.send(
    to=['team@company.com'],
    subject='[AutoBot] Daily Report — 2024-01-15',
    html_body=daily_report_html,
    attachments=[Path('output/results.csv')]
)

Metric	Value
Items Scraped	1,247
Errors	3
Duration	4m 32s

🗂️ Bulk File Processing

import asyncio
import aiofiles
from pathlib import Path
from typing import Callable, List
from concurrent.futures import ProcessPoolExecutor
import csv
import json

# CPU-bound: use ProcessPoolExecutor
def process_csv_file(file_path: Path) -> dict:
    results = []
    with open(file_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            # Heavy data transformation here
            row['processed'] = True
            results.append(row)
    return {'file': str(file_path.name), 'rows': len(results), 'data': results}

async def bulk_process_files(directory: Path, pattern: str = '*.csv', max_workers: int = 4):
    files = list(directory.glob(pattern))
    print(f'Processing {len(files)} files with {max_workers} workers')
    
    loop = asyncio.get_event_loop()
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        tasks = [loop.run_in_executor(executor, process_csv_file, f) for f in files]
        results = await asyncio.gather(*tasks)
    
    # Aggregate all results
    all_rows = []
    for result in results:
        all_rows.extend(result['data'])
    
    output = Path('merged_output.json')
    async with aiofiles.open(output, 'w') as f:
        await f.write(json.dumps(all_rows, indent=2))
    
    print(f'Merged {len(all_rows)} total rows into {output}')
    return all_rows

asyncio.run(bulk_process_files(Path('./data/'), pattern='*.csv', max_workers=4))

6. Task Automation Systems

CLI Tools, Email Automation, and System Tasks

🖥️ Building Professional CLI Tools

📧 Professional Email Automation

Daily Scrape Report

🗂️ Bulk File Processing

Knowledge Check

6. Task Automation Systems

CLI Tools, Email Automation, and System Tasks

🖥️ Building Professional CLI Tools

📧 Professional Email Automation

Daily Scrape Report

🗂️ Bulk File Processing

Knowledge Check