requirement.txt
1 2 3 |
selenium==3.141.0 fake-headers==1.0.2 webdriver_manager==3.2.2 |
pip install -r requirement.txt
reddit.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
try: import argparse import json from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.firefox.options import Options as FirefoxOptions from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import csv from fake_headers import Headers from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.firefox import GeckoDriverManager except ModuleNotFoundError: print("Please download dependencies from requirement.txt") except Exception as ex: print(ex) class Reddit: @staticmethod def init_driver(browser_name): def set_properties(browser_option): ua = Headers().generate() #fake user agent browser_option.add_argument('--headless') browser_option.add_argument('--disable-extensions') browser_option.add_argument('--incognito') browser_option.add_argument('--disable-gpu') browser_option.add_argument('--log-level=3') browser_option.add_argument(f'user-agent={ua}') browser_option.add_argument('--disable-notifications') browser_option.add_argument('--disable-popup-blocking') return browser_option try: browser_name = browser_name.strip().title() ua = Headers().generate() #fake user agent #automating and opening URL in headless browser if browser_name.lower() == "chrome": browser_option = ChromeOptions() browser_option = set_properties(browser_option) driver = webdriver.Chrome(ChromeDriverManager().install(),options=browser_option) #chromedriver's path in first argument driver.maximize_window() elif browser_name.lower() == "firefox": browser_option = FirefoxOptions() browser_option = set_properties(browser_option) driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(),options=browser_option) driver.maximize_window() else: driver = "Browser Not Supported!" return driver except Exception as ex: print(ex) @staticmethod def close_driver(driver): driver.close() driver.quit() @staticmethod def scrap(username,browser_name): try: URL = "https://reddit.com/user/{}".format(username) try: driver = Reddit.init_driver(browser_name) driver.get(URL) except AttributeError: print("Driver is not set") exit() wait = WebDriverWait(driver, 10) element = wait.until(EC.presence_of_element_located( (By.ID, 'profile--id-card--highlight-tooltip--karma'))) name = driver.title.split(" ")[0] bio = driver.find_element_by_class_name("bVfceI5F_twrnRcVO1328").text.strip() try: banner = driver.find_element_by_class_name("_2ZyL7luKQghNeMnczY3gqW").get_attribute("style") except: banner = None profile = driver.find_element_by_class_name( '_2bLCGrtCCJIMNCZgmAMZFM').get_attribute("src") karma = driver.find_element_by_id( "profile--id-card--highlight-tooltip--karma") cake_date = driver.find_element_by_id("profile--id-card--highlight-tooltip--cakeday") data = { "name" : name, "bio" : bio, "banner" : banner.split('(')[-1].split(')')[0] if banner is not None else "", "profile_image" : profile, "karma": karma.get_attribute("innerHTML"), "cake_date": cake_date.get_attribute("innerHTML") } f = open('info.csv', 'w') headerrow= ['Name','Bio','Banner','ProfileImage','Karma','Birthday'] writer = csv.writer(f) writer.writerow(headerrow) writer.writerow([name,bio,banner.split('(')[-1].split(')')[0],profile,karma.get_attribute('innerHTML'),cake_date.get_attribute("innerHTML")]) Reddit.close_driver(driver) return json.dumps(data) except Exception as ex: driver.close() driver.quit() print(ex) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("username",help="username to scrap") parser.add_argument("--browser",help="What browser your PC have?") args = parser.parse_args() browser_name = args.browser if args.browser is not None else "chrome" print(Reddit.scrap(args.username,browser_name)) #last updated - 27th December, 2021 |