Automation & Bots | VoidX Academy

13. AI-Powered Automation

Module 13: AI Automation

Smart Scrapers, RAG Pipelines, and LLM-Driven Agents

Traditional automation breaks when sites change layout, add CAPTCHAs, or present data in unstructured formats. AI-powered automation adapts. Large language models can parse natural language, extract structured data from arbitrary HTML, make decisions based on content, and complete tasks that would require hundreds of conditional rules to handle programmatically.

This module covers three AI automation paradigms: LLM-based data extraction, Retrieval-Augmented Generation (RAG) pipelines for knowledge bases, and autonomous agents that can browse and act without pre-programmed flows.

🤖 LLM-Powered Data Extraction

from anthropic import Anthropic
from pydantic import BaseModel, validator
from typing import List, Optional
import json

client = Anthropic()

class ProductData(BaseModel):
    name: str
    price: Optional[float]
    currency: str = 'USD'
    availability: str  # 'in_stock', 'out_of_stock', 'unknown'
    description: str = ''
    specifications: dict = {}
    
    @validator('price', pre=True)
    def validate_price(cls, v):
        if v is None:
            return None
        try:
            return float(str(v).replace(',', '').replace('$', ''))
        except ValueError:
            return None

def extract_product_data(html: str, url: str) -> Optional[ProductData]:
    prompt = f'''Extract product information from this HTML page.
Return ONLY a valid JSON object with these fields:
- name: string (product name)
- price: number or null (numeric price only, no currency symbols)
- currency: string (3-letter currency code, default USD)
- availability: 'in_stock', 'out_of_stock', or 'unknown'
- description: string (main product description, max 200 chars)
- specifications: object (key-value pairs of technical specs)

URL: {url}

HTML (truncated to first 8000 chars):
{html[:8000]}

Return only the JSON object, no explanation.'''
    
    message = client.messages.create(
        model='claude-opus-4-5',
        max_tokens=1000,
        messages=[{'role': 'user', 'content': prompt}]
    )
    
    try:
        data = json.loads(message.content[0].text)
        return ProductData(**data)
    except (json.JSONDecodeError, ValueError) as e:
        print(f'Parse error for {url}: {e}')
        return None

# Batch extraction with LLM
import asyncio
from anthropic import AsyncAnthropic

async_client = AsyncAnthropic()

async def batch_extract(pages: list, concurrency: int = 5) -> List[ProductData]:
    semaphore = asyncio.Semaphore(concurrency)
    
    async def extract_one(page):
        async with semaphore:
            message = await async_client.messages.create(
                model='claude-opus-4-5',
                max_tokens=1000,
                messages=[{'role': 'user', 'content': build_prompt(page)}]
            )
            try:
                return ProductData(**json.loads(message.content[0].text))
            except Exception:
                return None
    
    results = await asyncio.gather(*[extract_one(p) for p in pages])
    return [r for r in results if r is not None]

📚 RAG Pipeline for Automation Knowledge Bases

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_anthropic import ChatAnthropic
from langchain.chains import RetrievalQA
from pathlib import Path
import json

# Build a searchable knowledge base from scraped data
class AutomationKnowledgeBase:
    def __init__(self, persist_dir: str = './kb_db'):
        self.embeddings = HuggingFaceEmbeddings(
            model_name='sentence-transformers/all-MiniLM-L6-v2'
        )
        self.persist_dir = persist_dir
        self.vectorstore = None
    
    def index_documents(self, documents: List[dict]):
        '''Index scraped documents into vector store'''
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=200
        )
        texts = []
        for doc in documents:
            chunks = splitter.split_text(doc['content'])
            for chunk in chunks:
                texts.append({'content': chunk, 'metadata': {'source': doc['url'], 'title': doc['title']}})
        
        self.vectorstore = Chroma.from_texts(
            texts=[t['content'] for t in texts],
            metadatas=[t['metadata'] for t in texts],
            embedding=self.embeddings,
            persist_directory=self.persist_dir
        )
        self.vectorstore.persist()
        print(f'Indexed {len(texts)} chunks from {len(documents)} documents')
    
    def query(self, question: str, k: int = 5) -> str:
        '''Query the knowledge base using RAG'''
        if not self.vectorstore:
            self.vectorstore = Chroma(
                persist_directory=self.persist_dir,
                embedding_function=self.embeddings
            )
        
        llm = ChatAnthropic(model='claude-sonnet-4-6', max_tokens=2048)
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type='stuff',
            retriever=self.vectorstore.as_retriever(search_kwargs={'k': k}),
            return_source_documents=True
        )
        result = qa_chain({'query': question})
        return result['result'], result['source_documents']

🤖 Autonomous Browser Agent

from anthropic import Anthropic
from playwright.sync_api import sync_playwright, Page
import json
import base64

client = Anthropic()

def take_screenshot_base64(page: Page) -> str:
    screenshot = page.screenshot(full_page=False)
    return base64.standard_b64encode(screenshot).decode('utf-8')

def browser_agent(objective: str, start_url: str, max_steps: int = 10):
    '''Autonomous browser agent using Claude's vision capabilities'''
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False)
        page = browser.new_page(viewport={'width': 1280, 'height': 720})
        page.goto(start_url)
        
        tools = [
            {'name': 'click', 'description': 'Click on an element', 'input_schema': {'type': 'object', 'properties': {'selector': {'type': 'string'}}, 'required': ['selector']}},
            {'name': 'type_text', 'description': 'Type text into a field', 'input_schema': {'type': 'object', 'properties': {'selector': {'type': 'string'}, 'text': {'type': 'string'}}, 'required': ['selector', 'text']}},
            {'name': 'navigate', 'description': 'Go to a URL', 'input_schema': {'type': 'object', 'properties': {'url': {'type': 'string'}}, 'required': ['url']}},
            {'name': 'extract_data', 'description': 'Extract data from the page', 'input_schema': {'type': 'object', 'properties': {'description': {'type': 'string'}}, 'required': ['description']}},
            {'name': 'done', 'description': 'Task complete — return final result', 'input_schema': {'type': 'object', 'properties': {'result': {'type': 'string'}}, 'required': ['result']}},
        ]
        
        messages = []
        
        for step in range(max_steps):
            screenshot = take_screenshot_base64(page)
            messages.append({'role': 'user', 'content': [
                {'type': 'image', 'source': {'type': 'base64', 'media_type': 'image/png', 'data': screenshot}},
                {'type': 'text', 'text': f'Objective: {objective}\nStep {step+1}/{max_steps}. What action will you take next?'}
            ]})
            
            response = client.messages.create(
                model='claude-opus-4-5', max_tokens=1000,
                system='You are an autonomous browser agent. Take actions to complete the objective efficiently.',
                tools=tools, messages=messages
            )
            
            messages.append({'role': 'assistant', 'content': response.content})
            
            if response.stop_reason == 'end_turn':
                break
            
            tool_results = []
            for block in response.content:
                if block.type == 'tool_use':
                    result = ''
                    if block.name == 'click':
                        page.click(block.input['selector'])
                        result = f'Clicked {block.input["selector"]}'
                    elif block.name == 'type_text':
                        page.fill(block.input['selector'], block.input['text'])
                        result = f'Typed into {block.input["selector"]}'
                    elif block.name == 'navigate':
                        page.goto(block.input['url'])
                        result = f'Navigated to {block.input["url"]}'
                    elif block.name == 'done':
                        browser.close()
                        return block.input['result']
                    
                    tool_results.append({'type': 'tool_result', 'tool_use_id': block.id, 'content': result})
            
            if tool_results:
                messages.append({'role': 'user', 'content': tool_results})
        
        browser.close()
        return 'Max steps reached'

result = browser_agent(
    objective='Find the price of the first product on the page and return it',
    start_url='https://books.toscrape.com'
)
print(f'Agent result: {result}')

13. AI-Powered Automation

Module 13: AI Automation

Smart Scrapers, RAG Pipelines, and LLM-Driven Agents

🤖 LLM-Powered Data Extraction

from anthropic import Anthropic
from pydantic import BaseModel, validator
from typing import List, Optional
import json

client = Anthropic()

class ProductData(BaseModel):
    name: str
    price: Optional[float]
    currency: str = 'USD'
    availability: str  # 'in_stock', 'out_of_stock', 'unknown'
    description: str = ''
    specifications: dict = {}
    
    @validator('price', pre=True)
    def validate_price(cls, v):
        if v is None:
            return None
        try:
            return float(str(v).replace(',', '').replace('$', ''))
        except ValueError:
            return None

def extract_product_data(html: str, url: str) -> Optional[ProductData]:
    prompt = f'''Extract product information from this HTML page.
Return ONLY a valid JSON object with these fields:
- name: string (product name)
- price: number or null (numeric price only, no currency symbols)
- currency: string (3-letter currency code, default USD)
- availability: 'in_stock', 'out_of_stock', or 'unknown'
- description: string (main product description, max 200 chars)
- specifications: object (key-value pairs of technical specs)

URL: {url}

HTML (truncated to first 8000 chars):
{html[:8000]}

Return only the JSON object, no explanation.'''
    
    message = client.messages.create(
        model='claude-opus-4-5',
        max_tokens=1000,
        messages=[{'role': 'user', 'content': prompt}]
    )
    
    try:
        data = json.loads(message.content[0].text)
        return ProductData(**data)
    except (json.JSONDecodeError, ValueError) as e:
        print(f'Parse error for {url}: {e}')
        return None

# Batch extraction with LLM
import asyncio
from anthropic import AsyncAnthropic

async_client = AsyncAnthropic()

async def batch_extract(pages: list, concurrency: int = 5) -> List[ProductData]:
    semaphore = asyncio.Semaphore(concurrency)
    
    async def extract_one(page):
        async with semaphore:
            message = await async_client.messages.create(
                model='claude-opus-4-5',
                max_tokens=1000,
                messages=[{'role': 'user', 'content': build_prompt(page)}]
            )
            try:
                return ProductData(**json.loads(message.content[0].text))
            except Exception:
                return None
    
    results = await asyncio.gather(*[extract_one(p) for p in pages])
    return [r for r in results if r is not None]

📚 RAG Pipeline for Automation Knowledge Bases

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_anthropic import ChatAnthropic
from langchain.chains import RetrievalQA
from pathlib import Path
import json

# Build a searchable knowledge base from scraped data
class AutomationKnowledgeBase:
    def __init__(self, persist_dir: str = './kb_db'):
        self.embeddings = HuggingFaceEmbeddings(
            model_name='sentence-transformers/all-MiniLM-L6-v2'
        )
        self.persist_dir = persist_dir
        self.vectorstore = None
    
    def index_documents(self, documents: List[dict]):
        '''Index scraped documents into vector store'''
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=200
        )
        texts = []
        for doc in documents:
            chunks = splitter.split_text(doc['content'])
            for chunk in chunks:
                texts.append({'content': chunk, 'metadata': {'source': doc['url'], 'title': doc['title']}})
        
        self.vectorstore = Chroma.from_texts(
            texts=[t['content'] for t in texts],
            metadatas=[t['metadata'] for t in texts],
            embedding=self.embeddings,
            persist_directory=self.persist_dir
        )
        self.vectorstore.persist()
        print(f'Indexed {len(texts)} chunks from {len(documents)} documents')
    
    def query(self, question: str, k: int = 5) -> str:
        '''Query the knowledge base using RAG'''
        if not self.vectorstore:
            self.vectorstore = Chroma(
                persist_directory=self.persist_dir,
                embedding_function=self.embeddings
            )
        
        llm = ChatAnthropic(model='claude-sonnet-4-6', max_tokens=2048)
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type='stuff',
            retriever=self.vectorstore.as_retriever(search_kwargs={'k': k}),
            return_source_documents=True
        )
        result = qa_chain({'query': question})
        return result['result'], result['source_documents']

🤖 Autonomous Browser Agent

from anthropic import Anthropic
from playwright.sync_api import sync_playwright, Page
import json
import base64

client = Anthropic()

def take_screenshot_base64(page: Page) -> str:
    screenshot = page.screenshot(full_page=False)
    return base64.standard_b64encode(screenshot).decode('utf-8')

def browser_agent(objective: str, start_url: str, max_steps: int = 10):
    '''Autonomous browser agent using Claude's vision capabilities'''
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False)
        page = browser.new_page(viewport={'width': 1280, 'height': 720})
        page.goto(start_url)
        
        tools = [
            {'name': 'click', 'description': 'Click on an element', 'input_schema': {'type': 'object', 'properties': {'selector': {'type': 'string'}}, 'required': ['selector']}},
            {'name': 'type_text', 'description': 'Type text into a field', 'input_schema': {'type': 'object', 'properties': {'selector': {'type': 'string'}, 'text': {'type': 'string'}}, 'required': ['selector', 'text']}},
            {'name': 'navigate', 'description': 'Go to a URL', 'input_schema': {'type': 'object', 'properties': {'url': {'type': 'string'}}, 'required': ['url']}},
            {'name': 'extract_data', 'description': 'Extract data from the page', 'input_schema': {'type': 'object', 'properties': {'description': {'type': 'string'}}, 'required': ['description']}},
            {'name': 'done', 'description': 'Task complete — return final result', 'input_schema': {'type': 'object', 'properties': {'result': {'type': 'string'}}, 'required': ['result']}},
        ]
        
        messages = []
        
        for step in range(max_steps):
            screenshot = take_screenshot_base64(page)
            messages.append({'role': 'user', 'content': [
                {'type': 'image', 'source': {'type': 'base64', 'media_type': 'image/png', 'data': screenshot}},
                {'type': 'text', 'text': f'Objective: {objective}\nStep {step+1}/{max_steps}. What action will you take next?'}
            ]})
            
            response = client.messages.create(
                model='claude-opus-4-5', max_tokens=1000,
                system='You are an autonomous browser agent. Take actions to complete the objective efficiently.',
                tools=tools, messages=messages
            )
            
            messages.append({'role': 'assistant', 'content': response.content})
            
            if response.stop_reason == 'end_turn':
                break
            
            tool_results = []
            for block in response.content:
                if block.type == 'tool_use':
                    result = ''
                    if block.name == 'click':
                        page.click(block.input['selector'])
                        result = f'Clicked {block.input["selector"]}'
                    elif block.name == 'type_text':
                        page.fill(block.input['selector'], block.input['text'])
                        result = f'Typed into {block.input["selector"]}'
                    elif block.name == 'navigate':
                        page.goto(block.input['url'])
                        result = f'Navigated to {block.input["url"]}'
                    elif block.name == 'done':
                        browser.close()
                        return block.input['result']
                    
                    tool_results.append({'type': 'tool_result', 'tool_use_id': block.id, 'content': result})
            
            if tool_results:
                messages.append({'role': 'user', 'content': tool_results})
        
        browser.close()
        return 'Max steps reached'

result = browser_agent(
    objective='Find the price of the first product on the page and return it',
    start_url='https://books.toscrape.com'
)
print(f'Agent result: {result}')

13. AI-Powered Automation

Smart Scrapers, RAG Pipelines, and LLM-Driven Agents

🤖 LLM-Powered Data Extraction

📚 RAG Pipeline for Automation Knowledge Bases

🤖 Autonomous Browser Agent

Knowledge Check

13. AI-Powered Automation

Smart Scrapers, RAG Pipelines, and LLM-Driven Agents

🤖 LLM-Powered Data Extraction

📚 RAG Pipeline for Automation Knowledge Bases

🤖 Autonomous Browser Agent

Knowledge Check