diff --git a/stolichki/browser.py b/stolichki/browser.py index a1fa961..6822cb6 100644 --- a/stolichki/browser.py +++ b/stolichki/browser.py @@ -14,6 +14,7 @@ from selenium.common.exceptions import ( NoSuchElementException, ) +from webdriver_manager.chrome import ChromeDriverManager from selenium_stealth import stealth from twocaptcha import TwoCaptcha @@ -27,7 +28,7 @@ class StolichkiDriver(webdriver.Chrome): self, options: Options = None, service: Service = None, keep_alive: bool = True ) -> None: - assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY" + # assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY" if options is None: options = webdriver.ChromeOptions() @@ -35,11 +36,13 @@ class StolichkiDriver(webdriver.Chrome): if not os.path.exists("errors"): os.mkdir("errors") + service = webdriver.ChromeService(ChromeDriverManager().install()) + options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) options.page_load_strategy = "eager" - self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY")) + # self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY")) super().__init__(options, service, keep_alive) @@ -53,6 +56,12 @@ class StolichkiDriver(webdriver.Chrome): fix_hairline=True, ) + + def set_city(self, id: int): + self.get("https://stolichki.ru/") + self.__edit_cookie("cityId", id) + self.refresh() + def get(self, url: str) -> None: super().get(url) logging.info(f"Loading {url}") @@ -63,7 +72,7 @@ class StolichkiDriver(webdriver.Chrome): # Если не появился, обновляем страницу и ждём ещё раз. # И так пять раз. Если за 5 попыток ничего не вышло, кидаем исключение if not self.__wait_for_presence('//img[@alt="Логотип"]'): - self.__handle_captcha() + # self.__handle_captcha() self.execute_script("window.stop();") time.sleep(1) self.refresh() @@ -83,8 +92,9 @@ class StolichkiDriver(webdriver.Chrome): def __wait_for_presence(self, xpath: str, delay: int = 60): - wait = WebDriverWait(self, delay) try: + wait = WebDriverWait(self, delay) + wait.until( EC.presence_of_element_located( (By.XPATH, xpath) @@ -97,6 +107,16 @@ class StolichkiDriver(webdriver.Chrome): except (NoSuchElementException, ElementNotVisibleException): return False + def __edit_cookie(self, name: str, value): + cookie = self.get_cookie(name) + if cookie: + self.delete_cookie(name) + + new_cookie = cookie.copy() + new_cookie["value"] = str(value) + + self.add_cookie(new_cookie) + def __handle_captcha(self) -> None: for attempt in range(5): logging.info(f"Trying to solve captcha {attempt + 1}/5") diff --git a/stolichki/parser.py b/stolichki/parser.py index 9e37037..7537490 100644 --- a/stolichki/parser.py +++ b/stolichki/parser.py @@ -1,7 +1,6 @@ import logging from multiprocessing import Pool -from selenium import webdriver from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.common.by import By @@ -11,7 +10,7 @@ from .product import Product class StolichkiParser: city = { - "id": 1, + "id": 77, "name": "Москва", } @@ -19,8 +18,9 @@ class StolichkiParser: if city is not None: self.city = city - service = webdriver.ChromeService("/home/winet/.local/bin/chromedriver") - self.driver = StolichkiDriver(service=service) + self.driver = StolichkiDriver() + self.driver.set_city(self.city.get("id")) + logging.info(f"Parser initialize complete! City: {self.city.get('name')}") def run(self): @@ -102,8 +102,3 @@ class StolichkiParser: data.append(product) return data - - - def set_city(self, id: int): - # TODO: Написать смену города путём заменой значения в куки браузера - pass