From 19821b22d672f2131bf6ca5987368f558ff7b65d Mon Sep 17 00:00:00 2001 From: winet Date: Tue, 26 Dec 2023 20:59:19 +1000 Subject: [PATCH] Fixed data lose on first loading --- stolichki/driver.py | 11 +++++----- stolichki/parsers/category.py | 40 +++++++++++++---------------------- 2 files changed, 21 insertions(+), 30 deletions(-) diff --git a/stolichki/driver.py b/stolichki/driver.py index 9d2deec..56cbae4 100644 --- a/stolichki/driver.py +++ b/stolichki/driver.py @@ -57,9 +57,10 @@ class StolichkiDriver(uc.Chrome): def set_proxy(self): ... - - def get_response(self, url_re: re.Pattern[str]) -> None | dict: - logs = self.get_log("performance") + + def get_response(self, url_re: re.Pattern[str], logs: list[dict] | None = None) -> None | dict: + if not logs: + logs = self.get_log("performance") body = None for log in filter(self.__filter_logs, logs): @@ -86,14 +87,14 @@ class StolichkiDriver(uc.Chrome): try: return self.wait_for_presence(**kwargs) except: - if not self.__handle_captcha(): + if not self.handle_captcha(): self.execute_script("window.stop();") time.sleep(1) self.refresh() raise LoadingError("For some reason can't load page. Check logs") - def __handle_captcha(self): + def handle_captcha(self): for _ in range(10): try: captcha_image = self.find_element(By.ID, "captcha_image") diff --git a/stolichki/parsers/category.py b/stolichki/parsers/category.py index c3493ad..98d31a7 100644 --- a/stolichki/parsers/category.py +++ b/stolichki/parsers/category.py @@ -44,6 +44,7 @@ class BaseCategoryParser: return self.products + def get_products_links(self) -> list[str]: products_links: list[str] = [] @@ -96,44 +97,33 @@ class ByfarmCategoryParser(BaseCategoryParser): self.driver.get(link) for _ in range(10): - self.farms_loading_handler(self.driver) + self.farms_loading_handler() + + page_logs = self.driver.get_log('performance') product_info_re = re.compile(r"https://stolichki.ru/drugs/\d{1,}/get") - product_info = self.driver.get_response(product_info_re) + product_info = self.driver.get_response(product_info_re, page_logs) product_farms_re = re.compile(r"https://stolichki\.ru/drugs/\d{1,}/stores\?cityId=\d{1,}&no-captcha-token=.{1,}") - product_farms = self.driver.get_response(product_farms_re) + product_farms = self.driver.get_response(product_farms_re, page_logs) if (product_info and product_farms) and (product_farms.get("status") == product_info.get("status")): return Product(product_info["drug"], product_farms["stores"]) + self.driver.refresh() + return None - def farms_loading_handler(self, driver: StolichkiDriver): + def farms_loading_handler(self): try: - store_stock_button = self.driver.wait_for_presence(By.CLASS_NAME, "stores-stock") - - if store_stock_button: - store_stock_button.click() - - # Костыль для компонентов, которые начинают работать только при скроле - logger.debug("Scrolling up to 50") - ActionChains(self.driver).scroll_by_amount(0, -50).perform() - time.sleep(1) - logger.debug("Scrolling down to 50") - ActionChains(self.driver).scroll_by_amount(0, 50).perform() - time.sleep(1) + ActionChains(self.driver).scroll_by_amount(0, 1300).scroll_by_amount(0, -100).perform() - element = self.driver.wait_for_presence(by=By.CLASS_NAME, value="tr-start-store", delay=60) - - return element + return self.driver.wait_for_presence(by=By.CLASS_NAME, value="tr-start-store", delay=60) - except: - pass - - if not driver.__handle_captcha(): - self.driver.execute_script("window.stop;") - self.driver.refresh() + except: + if not self.driver.handle_captcha(): + self.driver.execute_script("window.stop;") + self.driver.refresh() def get_category_parser(city: City): return ByfarmCategoryParser if bool(city.is_byapt) else NormalCategoryParser #type: ignore \ No newline at end of file