Compare commits

..

No commits in common. "67834375824348ef9a73aead0a95fdd0980247b8" and "5ee90bca3d5a16d406a35f467bc0597507dc21cb" have entirely different histories.

3 changed files with 23 additions and 40 deletions

View File

@ -1,20 +1,18 @@
2captcha-python==1.2.2
attrs==23.1.0 attrs==23.1.0
certifi==2023.11.17 certifi==2023.7.22
charset-normalizer==3.3.2 charset-normalizer==3.2.0
exceptiongroup==1.1.3
h11==0.14.0 h11==0.14.0
idna==3.6 idna==3.4
outcome==1.3.0.post0 outcome==1.2.0
packaging==23.2
PySocks==1.7.1 PySocks==1.7.1
python-dotenv==1.0.0
requests==2.31.0 requests==2.31.0
selenium==4.16.0 selenium==4.12.0
selenium-stealth==1.0.6 selenium-stealth==1.0.6
sniffio==1.3.0 sniffio==1.3.0
sortedcontainers==2.4.0 sortedcontainers==2.4.0
trio==0.23.2 trio==0.22.2
trio-websocket==0.11.1 trio-websocket==0.10.4
urllib3==2.1.0 urllib3==2.0.4
webdriver-manager==4.0.1
wsproto==1.2.0 wsproto==1.2.0
2captcha-python==1.2.1

View File

@ -14,7 +14,6 @@ from selenium.common.exceptions import (
NoSuchElementException, NoSuchElementException,
) )
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth from selenium_stealth import stealth
from twocaptcha import TwoCaptcha from twocaptcha import TwoCaptcha
@ -28,7 +27,7 @@ class StolichkiDriver(webdriver.Chrome):
self, options: Options = None, service: Service = None, keep_alive: bool = True self, options: Options = None, service: Service = None, keep_alive: bool = True
) -> None: ) -> None:
# assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY" assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
if options is None: if options is None:
options = webdriver.ChromeOptions() options = webdriver.ChromeOptions()
@ -36,13 +35,11 @@ class StolichkiDriver(webdriver.Chrome):
if not os.path.exists("errors"): if not os.path.exists("errors"):
os.mkdir("errors") os.mkdir("errors")
service = webdriver.ChromeService(ChromeDriverManager().install())
options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False) options.add_experimental_option("useAutomationExtension", False)
options.page_load_strategy = "eager" options.page_load_strategy = "eager"
# self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY")) self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
super().__init__(options, service, keep_alive) super().__init__(options, service, keep_alive)
@ -56,12 +53,6 @@ class StolichkiDriver(webdriver.Chrome):
fix_hairline=True, fix_hairline=True,
) )
def set_city(self, id: int):
self.get("https://stolichki.ru/")
self.__edit_cookie("cityId", id)
self.refresh()
def get(self, url: str) -> None: def get(self, url: str) -> None:
super().get(url) super().get(url)
logging.info(f"Loading {url}") logging.info(f"Loading {url}")
@ -72,7 +63,7 @@ class StolichkiDriver(webdriver.Chrome):
# Если не появился, обновляем страницу и ждём ещё раз. # Если не появился, обновляем страницу и ждём ещё раз.
# И так пять раз. Если за 5 попыток ничего не вышло, кидаем исключение # И так пять раз. Если за 5 попыток ничего не вышло, кидаем исключение
if not self.__wait_for_presence('//img[@alt="Логотип"]'): if not self.__wait_for_presence('//img[@alt="Логотип"]'):
# self.__handle_captcha() self.__handle_captcha()
self.execute_script("window.stop();") self.execute_script("window.stop();")
time.sleep(1) time.sleep(1)
self.refresh() self.refresh()
@ -92,9 +83,8 @@ class StolichkiDriver(webdriver.Chrome):
def __wait_for_presence(self, xpath: str, delay: int = 60): def __wait_for_presence(self, xpath: str, delay: int = 60):
try:
wait = WebDriverWait(self, delay) wait = WebDriverWait(self, delay)
try:
wait.until( wait.until(
EC.presence_of_element_located( EC.presence_of_element_located(
(By.XPATH, xpath) (By.XPATH, xpath)
@ -107,16 +97,6 @@ class StolichkiDriver(webdriver.Chrome):
except (NoSuchElementException, ElementNotVisibleException): except (NoSuchElementException, ElementNotVisibleException):
return False return False
def __edit_cookie(self, name: str, value):
cookie = self.get_cookie(name)
if cookie:
self.delete_cookie(name)
new_cookie = cookie.copy()
new_cookie["value"] = str(value)
self.add_cookie(new_cookie)
def __handle_captcha(self) -> None: def __handle_captcha(self) -> None:
for attempt in range(5): for attempt in range(5):
logging.info(f"Trying to solve captcha {attempt + 1}/5") logging.info(f"Trying to solve captcha {attempt + 1}/5")

View File

@ -1,6 +1,7 @@
import logging import logging
from multiprocessing import Pool from multiprocessing import Pool
from selenium import webdriver
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
@ -10,7 +11,7 @@ from .product import Product
class StolichkiParser: class StolichkiParser:
city = { city = {
"id": 77, "id": 1,
"name": "Москва", "name": "Москва",
} }
@ -18,9 +19,8 @@ class StolichkiParser:
if city is not None: if city is not None:
self.city = city self.city = city
self.driver = StolichkiDriver() service = webdriver.ChromeService("/home/winet/.local/bin/chromedriver")
self.driver.set_city(self.city.get("id")) self.driver = StolichkiDriver(service=service)
logging.info(f"Parser initialize complete! City: {self.city.get('name')}") logging.info(f"Parser initialize complete! City: {self.city.get('name')}")
def run(self): def run(self):
@ -102,3 +102,8 @@ class StolichkiParser:
data.append(product) data.append(product)
return data return data
def set_city(self, id: int):
# TODO: Написать смену города путём заменой значения в куки браузера
pass