90 MINS intermediate
5. Browser Automation
Module 05: Browser Automation
Filling Forms, Clicking Buttons, and Automating User Flows
Browser automation goes beyond data extraction โ it replicates complete user journeys. Login flows, form submissions, multi-step checkout processes, file uploads, dropdown navigation โ anything a human can do in a browser, Playwright can automate. This is the foundation of RPA (Robotic Process Automation), end-to-end testing, and complex site interactions that require authentication.
๐ฑ๏ธ Core Interaction Patterns
from playwright.sync_api import sync_playwright
import time
with sync_playwright() as p:
browser = p.chromium.launch(headless=False) # visible for debugging
page = browser.new_page()
page.goto('https://example-form.com/register')
# Fill text inputs
page.fill('#username', 'automation_user')
page.fill('#email', 'user@example.com')
page.fill('#password', 'SecurePass123!')
# Handle dropdowns
page.select_option('#country', 'Ghana') # by value
page.select_option('#timezone', label='Africa/Accra') # by visible text
# Handle checkboxes and radio buttons
page.check('#terms-checkbox')
page.click('input[type="radio"][value="monthly"]')
# Click buttons
page.click('button[type="submit"]')
# Wait for navigation after form submit
page.wait_for_url('**/dashboard**', timeout=10000)
print('Registration successful, now on dashboard')
# Take a screenshot for verification
page.screenshot(path='dashboard.png', full_page=True)
browser.close()๐ Login Automation and Session Management
import json
from pathlib import Path
from playwright.sync_api import sync_playwright
COOKIE_FILE = Path('session_cookies.json')
def login_and_save_session(email: str, password: str):
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
page.goto('https://example.com/login')
page.fill('input[name="email"]', email)
page.fill('input[name="password"]', password)
page.click('button[type="submit"]')
page.wait_for_url('**/dashboard', timeout=15000)
# Save cookies and local storage
cookies = context.cookies()
COOKIE_FILE.write_text(json.dumps(cookies, indent=2))
print(f'Session saved with {len(cookies)} cookies')
browser.close()
def use_saved_session():
if not COOKIE_FILE.exists():
raise RuntimeError('No saved session. Run login_and_save_session first.')
cookies = json.loads(COOKIE_FILE.read_text())
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
context.add_cookies(cookies) # restore session
page = context.new_page()
# Go directly to protected page โ already authenticated
page.goto('https://example.com/dashboard/orders')
page.wait_for_selector('table.orders-table')
# Now scrape the authenticated content
orders = page.eval_on_selector_all(
'tr.order-row',
'rows => rows.map(r => ({ id: r.dataset.orderId, status: r.querySelector(".status").innerText }))'
)
return orders๐ File Upload and Download Automation
from playwright.sync_api import sync_playwright
from pathlib import Path
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(accept_downloads=True)
page = context.new_page()
# --- FILE UPLOAD ---
page.goto('https://example.com/upload')
# Use set_input_files to upload without triggering a file dialog
page.set_input_files('input[type="file"]', 'report.csv')
# Upload multiple files
page.set_input_files('input[type="file"][multiple]', ['file1.csv', 'file2.csv'])
page.click('button#upload-btn')
page.wait_for_selector('div.upload-success')
print('Upload complete')
# --- FILE DOWNLOAD ---
with page.expect_download() as download_info:
page.click('a#export-csv-btn')
download = download_info.value
save_path = Path('downloads') / download.suggested_filename
save_path.parent.mkdir(exist_ok=True)
download.save_as(str(save_path))
print(f'Downloaded: {save_path}')
browser.close()โก Handling Dynamic Content and Waits
from playwright.sync_api import sync_playwright, expect
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto('https://dynamic-app.com')
# Wait for element to be visible
page.wait_for_selector('div#results', state='visible', timeout=15000)
# Wait for element to disappear (e.g., loading spinner)
page.wait_for_selector('div.loading-spinner', state='hidden', timeout=20000)
# Wait for a network request to complete
with page.expect_response('**/api/search**') as response_info:
page.fill('input#search', 'automation tools')
page.click('button#search-btn')
response = response_info.value
data = response.json()
# Use Playwright's built-in assertions
expect(page.locator('h1')).to_contain_text('Search Results')
expect(page.locator('ul.results li')).to_have_count(10)
browser.close()Automation Arena: Target Practice
VOID MART.
CyberDeck Keyboard
$149.99
In Stock
Neural-Link Headset
$899.00
Low Stock
Haptic Gloves v2
$245.50
Out of Stock
Quantum SSD (2TB)
$120.00
In Stock
$
Scraper Output0 matches
No elements matched.
Knowledge Check
Ready to test your understanding of 5. Browser Automation?