90 MINS advanced
13. AI-Powered Automation
Module 13: AI Automation
Smart Scrapers, RAG Pipelines, and LLM-Driven Agents
Traditional automation breaks when sites change layout, add CAPTCHAs, or present data in unstructured formats. AI-powered automation adapts. Large language models can parse natural language, extract structured data from arbitrary HTML, make decisions based on content, and complete tasks that would require hundreds of conditional rules to handle programmatically.
This module covers three AI automation paradigms: LLM-based data extraction, Retrieval-Augmented Generation (RAG) pipelines for knowledge bases, and autonomous agents that can browse and act without pre-programmed flows.
๐ค LLM-Powered Data Extraction
from anthropic import Anthropic
from pydantic import BaseModel, validator
from typing import List, Optional
import json
client = Anthropic()
class ProductData(BaseModel):
name: str
price: Optional[float]
currency: str = 'USD'
availability: str # 'in_stock', 'out_of_stock', 'unknown'
description: str = ''
specifications: dict = {}
@validator('price', pre=True)
def validate_price(cls, v):
if v is None:
return None
try:
return float(str(v).replace(',', '').replace('$', ''))
except ValueError:
return None
def extract_product_data(html: str, url: str) -> Optional[ProductData]:
prompt = f'''Extract product information from this HTML page.
Return ONLY a valid JSON object with these fields:
- name: string (product name)
- price: number or null (numeric price only, no currency symbols)
- currency: string (3-letter currency code, default USD)
- availability: 'in_stock', 'out_of_stock', or 'unknown'
- description: string (main product description, max 200 chars)
- specifications: object (key-value pairs of technical specs)
URL: {url}
HTML (truncated to first 8000 chars):
{html[:8000]}
Return only the JSON object, no explanation.'''
message = client.messages.create(
model='claude-opus-4-5',
max_tokens=1000,
messages=[{'role': 'user', 'content': prompt}]
)
try:
data = json.loads(message.content[0].text)
return ProductData(**data)
except (json.JSONDecodeError, ValueError) as e:
print(f'Parse error for {url}: {e}')
return None
# Batch extraction with LLM
import asyncio
from anthropic import AsyncAnthropic
async_client = AsyncAnthropic()
async def batch_extract(pages: list, concurrency: int = 5) -> List[ProductData]:
semaphore = asyncio.Semaphore(concurrency)
async def extract_one(page):
async with semaphore:
message = await async_client.messages.create(
model='claude-opus-4-5',
max_tokens=1000,
messages=[{'role': 'user', 'content': build_prompt(page)}]
)
try:
return ProductData(**json.loads(message.content[0].text))
except Exception:
return None
results = await asyncio.gather(*[extract_one(p) for p in pages])
return [r for r in results if r is not None]๐ RAG Pipeline for Automation Knowledge Bases
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_anthropic import ChatAnthropic
from langchain.chains import RetrievalQA
from pathlib import Path
import json
# Build a searchable knowledge base from scraped data
class AutomationKnowledgeBase:
def __init__(self, persist_dir: str = './kb_db'):
self.embeddings = HuggingFaceEmbeddings(
model_name='sentence-transformers/all-MiniLM-L6-v2'
)
self.persist_dir = persist_dir
self.vectorstore = None
def index_documents(self, documents: List[dict]):
'''Index scraped documents into vector store'''
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200
)
texts = []
for doc in documents:
chunks = splitter.split_text(doc['content'])
for chunk in chunks:
texts.append({'content': chunk, 'metadata': {'source': doc['url'], 'title': doc['title']}})
self.vectorstore = Chroma.from_texts(
texts=[t['content'] for t in texts],
metadatas=[t['metadata'] for t in texts],
embedding=self.embeddings,
persist_directory=self.persist_dir
)
self.vectorstore.persist()
print(f'Indexed {len(texts)} chunks from {len(documents)} documents')
def query(self, question: str, k: int = 5) -> str:
'''Query the knowledge base using RAG'''
if not self.vectorstore:
self.vectorstore = Chroma(
persist_directory=self.persist_dir,
embedding_function=self.embeddings
)
llm = ChatAnthropic(model='claude-sonnet-4-6', max_tokens=2048)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=self.vectorstore.as_retriever(search_kwargs={'k': k}),
return_source_documents=True
)
result = qa_chain({'query': question})
return result['result'], result['source_documents']๐ค Autonomous Browser Agent
from anthropic import Anthropic
from playwright.sync_api import sync_playwright, Page
import json
import base64
client = Anthropic()
def take_screenshot_base64(page: Page) -> str:
screenshot = page.screenshot(full_page=False)
return base64.standard_b64encode(screenshot).decode('utf-8')
def browser_agent(objective: str, start_url: str, max_steps: int = 10):
'''Autonomous browser agent using Claude's vision capabilities'''
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page(viewport={'width': 1280, 'height': 720})
page.goto(start_url)
tools = [
{'name': 'click', 'description': 'Click on an element', 'input_schema': {'type': 'object', 'properties': {'selector': {'type': 'string'}}, 'required': ['selector']}},
{'name': 'type_text', 'description': 'Type text into a field', 'input_schema': {'type': 'object', 'properties': {'selector': {'type': 'string'}, 'text': {'type': 'string'}}, 'required': ['selector', 'text']}},
{'name': 'navigate', 'description': 'Go to a URL', 'input_schema': {'type': 'object', 'properties': {'url': {'type': 'string'}}, 'required': ['url']}},
{'name': 'extract_data', 'description': 'Extract data from the page', 'input_schema': {'type': 'object', 'properties': {'description': {'type': 'string'}}, 'required': ['description']}},
{'name': 'done', 'description': 'Task complete โ return final result', 'input_schema': {'type': 'object', 'properties': {'result': {'type': 'string'}}, 'required': ['result']}},
]
messages = []
for step in range(max_steps):
screenshot = take_screenshot_base64(page)
messages.append({'role': 'user', 'content': [
{'type': 'image', 'source': {'type': 'base64', 'media_type': 'image/png', 'data': screenshot}},
{'type': 'text', 'text': f'Objective: {objective}\nStep {step+1}/{max_steps}. What action will you take next?'}
]})
response = client.messages.create(
model='claude-opus-4-5', max_tokens=1000,
system='You are an autonomous browser agent. Take actions to complete the objective efficiently.',
tools=tools, messages=messages
)
messages.append({'role': 'assistant', 'content': response.content})
if response.stop_reason == 'end_turn':
break
tool_results = []
for block in response.content:
if block.type == 'tool_use':
result = ''
if block.name == 'click':
page.click(block.input['selector'])
result = f'Clicked {block.input["selector"]}'
elif block.name == 'type_text':
page.fill(block.input['selector'], block.input['text'])
result = f'Typed into {block.input["selector"]}'
elif block.name == 'navigate':
page.goto(block.input['url'])
result = f'Navigated to {block.input["url"]}'
elif block.name == 'done':
browser.close()
return block.input['result']
tool_results.append({'type': 'tool_result', 'tool_use_id': block.id, 'content': result})
if tool_results:
messages.append({'role': 'user', 'content': tool_results})
browser.close()
return 'Max steps reached'
result = browser_agent(
objective='Find the price of the first product on the page and return it',
start_url='https://books.toscrape.com'
)
print(f'Agent result: {result}')Knowledge Check
Ready to test your understanding of 13. AI-Powered Automation?