app.py
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | import requests from bs4 import BeautifulSoup import pandas as pd import time def extract_product_info(url):     headers = {         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 '                       '(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',         'Accept-Language': 'en-US, en;q=0.5'     }     try:         response = requests.get(url.strip(), headers=headers, timeout=10)         response.raise_for_status()     except requests.RequestException as e:         print(f"Request error for URL {url}: {e}")         return None     soup = BeautifulSoup(response.content, "lxml")     # Extract product title     title_tag = soup.find("span", attrs={"id": 'productTitle'})     title = title_tag.get_text(strip=True) if title_tag else "NA"     # Extract price with multiple fallbacks     price = "NA"     price_ids = ['priceblock_ourprice', 'priceblock_dealprice', 'priceblock_saleprice']     for pid in price_ids:         price_tag = soup.find("span", attrs={"id": pid})         if price_tag:             price = price_tag.get_text(strip=True).replace(',', '')             break     if price == "NA":         price_span = soup.find("span", class_="a-price-whole")         if price_span:             price = price_span.get_text(strip=True).replace(',', '')     # Extract rating with multiple fallbacks     rating = "NA"     rating_tag = soup.find("i", class_="a-icon a-icon-star a-star-4-5")     if rating_tag:         rating = rating_tag.get_text(strip=True).replace(',', '')     else:         rating_span = soup.find("span", class_="a-icon-alt")         if rating_span:             rating = rating_span.get_text(strip=True).replace(',', '')     # Extract review count     review_count_tag = soup.find("span", attrs={'id': 'acrCustomerReviewText'})     review_count = review_count_tag.get_text(strip=True).replace(',', '') if review_count_tag else "NA"     # Extract availability     availability = "NA"     availability_div = soup.find("div", attrs={'id': 'availability'})     if availability_div:         availability_span = availability_div.find("span")         if availability_span:             availability = availability_span.get_text(strip=True).replace(',', '')     print(f"Title: {title}")     print(f"Price: {price}")     print(f"Rating: {rating}")     print(f"Reviews: {review_count}")     print(f"Availability: {availability}")     print("-" * 80)     return {         "Title": title,         "Price": price,         "Rating": rating,         "Reviews": review_count,         "Availability": availability,         "URL": url.strip()     } def main():     try:         with open("url.txt", "r", encoding="utf-8") as file:             urls = [line.strip() for line in file if line.strip()]     except FileNotFoundError:         print("The file 'url.txt' was not found.")         return     data = []     for url in urls:         info = extract_product_info(url)         if info:             data.append(info)         time.sleep(2)  # Respectful delay between requests     if data:         df = pd.DataFrame(data)         df.to_excel("out.xlsx", index=False)         print("Data has been written to 'out.xlsx'.")     else:         print("No data was extracted.") if __name__ == "__main__":     main() | 
Make a url.txt file before running the script and store the Amazon product url’s as shown below
| 1 2 3 4 | https://www.amazon.in/Voltas-Vectra-Platina-Fixed-Window/dp/B0BQYB5YVF/?_encoding=UTF8&pd_rd_w=hq1sE&content-id=amzn1.sym.509965a2-791b-4055-b876-943397d37ed3%3Aamzn1.symc.fc11ad14-99c1-406b-aa77-051d0ba1aade&pf_rd_p=509965a2-791b-4055-b876-943397d37ed3&pf_rd_r=ZYEY4GCZWG1XC6FFCHZS&pd_rd_wg=WJwcO&pd_rd_r=bcabcaaf-9551-472c-a739-23c371f490d3&ref_=pd_hp_d_atf_ci_mcx_mr_ca_hp_atf_d https://www.amazon.in/gp/product/B0BYSDH7P9/ref=ewc_pr_img_1?smid=A1WYWER0W24N8S&th=1&psc=1 https://www.amazon.in/Van-Heusen-Sport-Regular-VSKP517S011408_Black_Small/dp/B076CJFG6J/ref=srd_d_vsims_d_sccl_2_4/261-0648790-8166604?pd_rd_w=eNLyX&content-id=amzn1.sym.7ccbe032-5929-4c88-ab39-4923842061df&pf_rd_p=7ccbe032-5929-4c88-ab39-4923842061df&pf_rd_r=4PCK8VF24A9PMXVC8EES&pd_rd_wg=qnZKc&pd_rd_r=6632a723-14ec-4620-87f4-ba1ecdb21d9a&pd_rd_i=B0F6YQBDYT&psc=1 https://www.amazon.in/Microtek-EM4170-170V-270V-Digital-Stabilizer/dp/B01GTQFHOC/ref=srd_d_vsims_d_sccl_3_2/261-0648790-8166604?pd_rd_w=TR8wz&content-id=amzn1.sym.7ccbe032-5929-4c88-ab39-4923842061df&pf_rd_p=7ccbe032-5929-4c88-ab39-4923842061df&pf_rd_r=GZJFXRMKVK0G6K9N1Z34&pd_rd_wg=J6bwo&pd_rd_r=faaa8d40-b1b5-4179-bcbb-b65fca85475a&pd_rd_i=B01GTQFHOC&psc=1 |