lundi 28 septembre 2020

Next Page Iteration in Selenium/BeautfulSoup for Scraping E-Commerce Website

I'm scraping an E-Commerce website, Lazada using Selenium and bs4, I manage to scrape on the 1st page but I unable to iterate to the next page. What I'm tyring to achieve is to scrape the whole pages based on the categories I've selected.

Here what I've tried :

Run the argument with incognito

option = webdriver.ChromeOptions()

option.add_argument(' — incognito')

driver = webdriver.Chrome(executable_path='chromedriver', chrome_options=option)

driver.get('https://www.lazada.com.my/')

driver.maximize_window()
```
# Select category item #


element = driver.find_elements_by_class_name('card-categories-li-content')[0]

webdriver.ActionChains(driver).move_to_element(element).click(element).perform()

t = 10

try:
    
WebDriverWait(driver,t).until(EC.visibility_of_element_located((By.ID,"a2o4k.searchlistcategory.0.i0.460b6883jV3Y0q")))
except TimeoutException:

    print('Page Refresh!')

    driver.refresh()

element = driver.find_elements_by_class_name('card-categories-li-content')[0]

webdriver.ActionChains(driver).move_to_element(element).click(element).perform()

print('Page Load!')

#Soup and select element

def getData(np):

soup = bs(driver.page_source, "lxml")

product_containers = soup.findAll("div", class_='c2prKC')

for p in product_containers:

    title = (p.find(class_='c16H9d').text)#title

    selling_price = (p.find(class_='c13VH6').text)#selling price

    try:

        original_price=(p.find("del", class_='c13VH6').text)#original price 

    except:

        original_price = "-1"

    if p.find("i", class_='ic-dynamic-badge ic-dynamic-badge-freeShipping ic-dynamic-group-2'):
        freeShipping = 1
    else:
        freeShipping = 0
    try:
        discount = (p.find("span", class_='c1hkC1').text)
    except:
        discount ="-1"
    if p.find(("div", {'class':['c16H9d']})):
        url = "https:"+(p.find("a").get("href"))
    else:
        url = "-1"
    nextpage_elements = driver.find_elements_by_class_name('ant-pagination-next')[0]
 

np=webdriver.ActionChains(driver).move_to_element(nextpage_elements).click(nextpage_elements).perform()

    print("- -"*30)
    toSave = [title,selling_price,original_price,freeShipping,discount,url]
    print(toSave)
    writerows(toSave,filename)

getData(np)

``````````````````````````




Aucun commentaire:

Enregistrer un commentaire