1:"$Sreact.fragment" 2:I[22016,["/_next/static/chunks/0sqf3kwsxhw92.js","/_next/static/chunks/15vvi4du_kj4d.js","/_next/static/chunks/0t2xr05rlu96l.js","/_next/static/chunks/0j_00-43ohwi..js","/_next/static/chunks/074m5~1.spxnd.js","/_next/static/chunks/03pwh54kk_crp.js"],""] 8:I[6966,["/_next/static/chunks/0sqf3kwsxhw92.js","/_next/static/chunks/15vvi4du_kj4d.js","/_next/static/chunks/0t2xr05rlu96l.js","/_next/static/chunks/0j_00-43ohwi..js","/_next/static/chunks/074m5~1.spxnd.js","/_next/static/chunks/03pwh54kk_crp.js"],"BlogPostContent"] a:I[97367,["/_next/static/chunks/0sqf3kwsxhw92.js","/_next/static/chunks/15vvi4du_kj4d.js","/_next/static/chunks/0t2xr05rlu96l.js","/_next/static/chunks/0j_00-43ohwi..js","/_next/static/chunks/074m5~1.spxnd.js"],"OutletBoundary"] b:"$Sreact.suspense" 0:{"rsc":["$","$1","c",{"children":[["$","div",null,{"className":"min-h-screen bg-background text-foreground","children":[["$","section",null,{"className":"pt-28 pb-16 md:pt-36 md:pb-24 bg-gradient-to-b from-accent/30 to-background","children":["$","div",null,{"className":"container px-4 md:px-6","children":["$","div",null,{"className":"max-w-4xl mx-auto","children":[["$","$L2",null,{"href":"/blog","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-arrow-left mr-2 h-4 w-4","children":[["$","path","1l729n",{"d":"m12 19-7-7 7-7"}],["$","path","x3x0zl",{"d":"M19 12H5"}],"$undefined"]}],"Back to Blog"],"className":"inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 hover:bg-accent hover:text-accent-foreground h-10 px-4 py-2 mb-6","ref":null}],["$","div",null,{"className":"inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 border-transparent bg-primary text-primary-foreground hover:bg-primary/80 mb-4","children":"Web Scraping"}],["$","h1",null,{"className":"text-3xl md:text-4xl lg:text-5xl font-bold tracking-tighter mb-6 animate-fade-in","children":"Advanced Web Scraping Techniques: Handling Dynamic Content with Selenium"}],["$","div",null,{"className":"flex flex-wrap items-center gap-4 text-muted-foreground mb-8 animate-fade-in","children":[["$","div",null,{"className":"flex items-center gap-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-calendar h-4 w-4","children":[["$","path","1cmpym",{"d":"M8 2v4"}],["$","path","4m81vk",{"d":"M16 2v4"}],["$","rect","1hopcy",{"width":"18","height":"18","x":"3","y":"4","rx":"2"}],["$","path","8toen8",{"d":"M3 10h18"}],"$undefined"]}],["$","span",null,{"children":"December 10, 2024"}]]}],["$","div",null,{"className":"flex items-center gap-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-clock h-4 w-4","children":[["$","circle","1mglay",{"cx":"12","cy":"12","r":"10"}],["$","polyline","68esgv",{"points":"12 6 12 12 16 14"}],"$undefined"]}],["$","span",null,{"children":"12 min read"}]]}],["$","div",null,{"className":"flex items-center gap-2","children":["$","span",null,{"children":["By ","Muhammad Zaid"]}]}]]}],["$","div",null,{"className":"flex flex-wrap gap-2 mb-8 animate-fade-in","children":[["$","div","Selenium",{"className":"inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 text-foreground","children":"Selenium"}],["$","div","Web Scraping",{"className":"inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 text-foreground","children":"Web Scraping"}],["$","div","Python",{"className":"inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 text-foreground","children":"Python"}],["$","div","Data Extraction",{"className":"inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 text-foreground","children":"Data Extraction"}]]}]]}]}]}],"$L3","$L4","$L5"]}],["$L6"],"$L7"]}],"isPartial":false,"staleTime":300,"varyParams":null,"buildId":"QX83e4YaSJMU9KhrDXtKJ"} 3:["$","section",null,{"className":"pb-12","children":["$","div",null,{"className":"container px-4 md:px-6","children":["$","div",null,{"className":"max-w-4xl mx-auto","children":["$","img",null,{"src":"https://images.unsplash.com/photo-1551288049-bebda4e38f71?auto=format&fit=crop&w=800","alt":"Advanced Web Scraping Techniques: Handling Dynamic Content with Selenium","className":"w-full h-auto rounded-lg shadow-xl","loading":"lazy"}]}]}]}] 9:Tffb,# Advanced Web Scraping Techniques: Handling Dynamic Content with Selenium As a **web scraping expert**, I've encountered countless challenges when scraping modern JavaScript-heavy websites. This guide shares advanced techniques I've developed over years of **data extraction** projects. ## Why Selenium for Web Scraping? While tools like BeautifulSoup are excellent for static content, modern websites require a browser automation tool. Selenium allows you to: - Execute JavaScript - Handle dynamic content - Interact with page elements - Wait for content to load - Simulate user behavior ## Setting Up Selenium ```python from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC # Configure Chrome options options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') driver = webdriver.Chrome(options=options) ``` ## Handling Dynamic Content As a **Python developer** specializing in **web scraping**, I always use explicit waits: ```python # Wait for element to be present wait = WebDriverWait(driver, 10) element = wait.until( EC.presence_of_element_located((By.CLASS_NAME, "product-title")) ) ``` ## Infinite Scrolling Many modern websites use infinite scrolling. Here's how to handle it: ```python import time def scroll_to_bottom(driver, pause_time=2): last_height = driver.execute_script("return document.body.scrollHeight") while True: # Scroll down driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(pause_time) # Calculate new height new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height ``` ## Bypassing Anti-Scraping Measures ### 1. User Agent Rotation ```python user_agents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', ] options.add_argument(f'user-agent={random.choice(user_agents)}') ``` ### 2. Adding Random Delays ```python import random time.sleep(random.uniform(1, 3)) ``` ### 3. Handling CAPTCHAs For production **web scraping** projects, consider: - CAPTCHA solving services - Rotating proxies - Session management ## Error Handling Robust error handling is essential in **automation**: ```python from selenium.common.exceptions import TimeoutException, NoSuchElementException try: element = wait.until(EC.presence_of_element_located((By.ID, "content"))) except TimeoutException: print("Element not found within timeout period") driver.save_screenshot('error.png') except NoSuchElementException: print("Element does not exist") ``` ## Data Storage Store scraped data efficiently: ```python import json data = [] elements = driver.find_elements(By.CLASS_NAME, "product") for element in elements: product = { 'title': element.find_element(By.CLASS_NAME, "title").text, 'price': element.find_element(By.CLASS_NAME, "price").text, } data.append(product) with open('scraped_data.json', 'w') as f: json.dump(data, f, indent=2) ``` ## Best Practices As a **data scraping expert**, I always recommend: 1. **Respect robots.txt** 2. **Implement rate limiting** 3. **Use proper error handling** 4. **Clean up resources** (close browsers) 5. **Monitor your scrapers** ## Conclusion Advanced **web scraping** requires understanding both the technical aspects and ethical considerations. These techniques have helped me successfully complete numerous **data extraction** projects. Need help with your **web scraping** project? As a **freelance Python developer**, I specialize in building robust, scalable scraping solutions.4:["$","section",null,{"className":"pb-16","children":["$","div",null,{"className":"container px-4 md:px-6","children":["$","div",null,{"className":"max-w-4xl mx-auto","children":["$","article",null,{"className":"prose prose-lg dark:prose-invert max-w-none prose-pre:p-0 prose-pre:bg-transparent prose-pre:border-0","children":["$","$L8",null,{"content":"$9"}]}]}]}]}] 5:["$","section",null,{"className":"section bg-accent/30","children":["$","div",null,{"className":"container px-4 md:px-6","children":["$","div",null,{"className":"max-w-3xl mx-auto text-center space-y-6","children":[["$","h2",null,{"className":"text-3xl md:text-4xl font-bold tracking-tighter","children":["Need Expert ",["$","span",null,{"className":"text-primary","children":"Python Development"}],"?"]}],["$","p",null,{"className":"text-xl text-muted-foreground","children":["Looking to ",["$","strong",null,{"children":"hire Python developer"}]," or need help with ",["$","strong",null,{"children":"Django"}],", ",["$","strong",null,{"children":"web scraping"}],", or ",["$","strong",null,{"children":"automation"}],"projects? Let's work together!"]}],["$","div",null,{"className":"flex flex-col sm:flex-row gap-4 justify-center","children":[["$","$L2",null,{"href":"/contact","children":["Get In Touch ",["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-arrow-right ml-2 h-5 w-5","children":[["$","path","1ays0h",{"d":"M5 12h14"}],["$","path","xquz4c",{"d":"m12 5 7 7-7 7"}],"$undefined"]}]],"className":"inline-flex items-center justify-center gap-2 whitespace-nowrap text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 bg-primary text-primary-foreground hover:bg-primary/90 h-11 px-8 rounded-full","ref":null}],["$","$L2",null,{"href":"/blog","children":"View All Posts","className":"inline-flex items-center justify-center gap-2 whitespace-nowrap text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 border border-input bg-background hover:bg-accent hover:text-accent-foreground h-11 px-8 rounded-full","ref":null}]]}]]}]}]}] 6:["$","script","script-0",{"src":"/_next/static/chunks/03pwh54kk_crp.js","async":true}] 7:["$","$La",null,{"children":["$","$b",null,{"name":"Next.MetadataOutlet","children":"$@c"}]}] c:null