app.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from urllib.parse import urljoin import pandas as pd # Setup headless Chrome options = Options() options.add_argument("--headless") driver = webdriver.Chrome(options=options) # or use full path: webdriver.Chrome(executable_path="path/to/chromedriver") # Load the website base_url = 'https://freemediatools.com' driver.get(base_url) # Extract all <a> tags elements = driver.find_elements(By.TAG_NAME, "a") # Collect unique absolute URLs urls = set() for elem in elements: href = elem.get_attribute("href") if href: full_url = urljoin(base_url, href) urls.add(full_url) driver.quit() # Save to Excel df = pd.DataFrame(list(urls), columns=["URLs"]) df.to_excel("freemediatools_urls.xlsx", index=False) print("✅ All URLs saved to 'freemediatools_urls.xlsx'") |