Compare commits

..

2 Commits

3 changed files with 40 additions and 23 deletions

View File

@ -1,18 +1,20 @@
2captcha-python==1.2.2
attrs==23.1.0
certifi==2023.7.22
charset-normalizer==3.2.0
exceptiongroup==1.1.3
certifi==2023.11.17
charset-normalizer==3.3.2
h11==0.14.0
idna==3.4
outcome==1.2.0
idna==3.6
outcome==1.3.0.post0
packaging==23.2
PySocks==1.7.1
python-dotenv==1.0.0
requests==2.31.0
selenium==4.12.0
selenium==4.16.0
selenium-stealth==1.0.6
sniffio==1.3.0
sortedcontainers==2.4.0
trio==0.22.2
trio-websocket==0.10.4
urllib3==2.0.4
trio==0.23.2
trio-websocket==0.11.1
urllib3==2.1.0
webdriver-manager==4.0.1
wsproto==1.2.0
2captcha-python==1.2.1

View File

@ -14,6 +14,7 @@ from selenium.common.exceptions import (
NoSuchElementException,
)
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth
from twocaptcha import TwoCaptcha
@ -27,7 +28,7 @@ class StolichkiDriver(webdriver.Chrome):
self, options: Options = None, service: Service = None, keep_alive: bool = True
) -> None:
assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
# assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
if options is None:
options = webdriver.ChromeOptions()
@ -35,11 +36,13 @@ class StolichkiDriver(webdriver.Chrome):
if not os.path.exists("errors"):
os.mkdir("errors")
service = webdriver.ChromeService(ChromeDriverManager().install())
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
options.page_load_strategy = "eager"
self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
# self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
super().__init__(options, service, keep_alive)
@ -53,6 +56,12 @@ class StolichkiDriver(webdriver.Chrome):
fix_hairline=True,
)
def set_city(self, id: int):
self.get("https://stolichki.ru/")
self.__edit_cookie("cityId", id)
self.refresh()
def get(self, url: str) -> None:
super().get(url)
logging.info(f"Loading {url}")
@ -63,7 +72,7 @@ class StolichkiDriver(webdriver.Chrome):
# Если не появился, обновляем страницу и ждём ещё раз.
# И так пять раз. Если за 5 попыток ничего не вышло, кидаем исключение
if not self.__wait_for_presence('//img[@alt="Логотип"]'):
self.__handle_captcha()
# self.__handle_captcha()
self.execute_script("window.stop();")
time.sleep(1)
self.refresh()
@ -83,8 +92,9 @@ class StolichkiDriver(webdriver.Chrome):
def __wait_for_presence(self, xpath: str, delay: int = 60):
wait = WebDriverWait(self, delay)
try:
wait = WebDriverWait(self, delay)
wait.until(
EC.presence_of_element_located(
(By.XPATH, xpath)
@ -97,6 +107,16 @@ class StolichkiDriver(webdriver.Chrome):
except (NoSuchElementException, ElementNotVisibleException):
return False
def __edit_cookie(self, name: str, value):
cookie = self.get_cookie(name)
if cookie:
self.delete_cookie(name)
new_cookie = cookie.copy()
new_cookie["value"] = str(value)
self.add_cookie(new_cookie)
def __handle_captcha(self) -> None:
for attempt in range(5):
logging.info(f"Trying to solve captcha {attempt + 1}/5")

View File

@ -1,7 +1,6 @@
import logging
from multiprocessing import Pool
from selenium import webdriver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.common.by import By
@ -11,7 +10,7 @@ from .product import Product
class StolichkiParser:
city = {
"id": 1,
"id": 77,
"name": "Москва",
}
@ -19,8 +18,9 @@ class StolichkiParser:
if city is not None:
self.city = city
service = webdriver.ChromeService("/home/winet/.local/bin/chromedriver")
self.driver = StolichkiDriver(service=service)
self.driver = StolichkiDriver()
self.driver.set_city(self.city.get("id"))
logging.info(f"Parser initialize complete! City: {self.city.get('name')}")
def run(self):
@ -102,8 +102,3 @@ class StolichkiParser:
data.append(product)
return data
def set_city(self, id: int):
# TODO: Написать смену города путём заменой значения в куки браузера
pass