Compare commits

..

No commits in common. "67834375824348ef9a73aead0a95fdd0980247b8" and "5ee90bca3d5a16d406a35f467bc0597507dc21cb" have entirely different histories.

3 changed files with 23 additions and 40 deletions

View File

@ -1,20 +1,18 @@
2captcha-python==1.2.2
attrs==23.1.0
certifi==2023.11.17
charset-normalizer==3.3.2
certifi==2023.7.22
charset-normalizer==3.2.0
exceptiongroup==1.1.3
h11==0.14.0
idna==3.6
outcome==1.3.0.post0
packaging==23.2
idna==3.4
outcome==1.2.0
PySocks==1.7.1
python-dotenv==1.0.0
requests==2.31.0
selenium==4.16.0
selenium==4.12.0
selenium-stealth==1.0.6
sniffio==1.3.0
sortedcontainers==2.4.0
trio==0.23.2
trio-websocket==0.11.1
urllib3==2.1.0
webdriver-manager==4.0.1
trio==0.22.2
trio-websocket==0.10.4
urllib3==2.0.4
wsproto==1.2.0
2captcha-python==1.2.1

View File

@ -14,7 +14,6 @@ from selenium.common.exceptions import (
NoSuchElementException,
)
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth
from twocaptcha import TwoCaptcha
@ -28,7 +27,7 @@ class StolichkiDriver(webdriver.Chrome):
self, options: Options = None, service: Service = None, keep_alive: bool = True
) -> None:
# assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
if options is None:
options = webdriver.ChromeOptions()
@ -36,13 +35,11 @@ class StolichkiDriver(webdriver.Chrome):
if not os.path.exists("errors"):
os.mkdir("errors")
service = webdriver.ChromeService(ChromeDriverManager().install())
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
options.page_load_strategy = "eager"
# self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
super().__init__(options, service, keep_alive)
@ -56,12 +53,6 @@ class StolichkiDriver(webdriver.Chrome):
fix_hairline=True,
)
def set_city(self, id: int):
self.get("https://stolichki.ru/")
self.__edit_cookie("cityId", id)
self.refresh()
def get(self, url: str) -> None:
super().get(url)
logging.info(f"Loading {url}")
@ -72,7 +63,7 @@ class StolichkiDriver(webdriver.Chrome):
# Если не появился, обновляем страницу и ждём ещё раз.
# И так пять раз. Если за 5 попыток ничего не вышло, кидаем исключение
if not self.__wait_for_presence('//img[@alt="Логотип"]'):
# self.__handle_captcha()
self.__handle_captcha()
self.execute_script("window.stop();")
time.sleep(1)
self.refresh()
@ -92,9 +83,8 @@ class StolichkiDriver(webdriver.Chrome):
def __wait_for_presence(self, xpath: str, delay: int = 60):
try:
wait = WebDriverWait(self, delay)
try:
wait.until(
EC.presence_of_element_located(
(By.XPATH, xpath)
@ -107,16 +97,6 @@ class StolichkiDriver(webdriver.Chrome):
except (NoSuchElementException, ElementNotVisibleException):
return False
def __edit_cookie(self, name: str, value):
cookie = self.get_cookie(name)
if cookie:
self.delete_cookie(name)
new_cookie = cookie.copy()
new_cookie["value"] = str(value)
self.add_cookie(new_cookie)
def __handle_captcha(self) -> None:
for attempt in range(5):
logging.info(f"Trying to solve captcha {attempt + 1}/5")

View File

@ -1,6 +1,7 @@
import logging
from multiprocessing import Pool
from selenium import webdriver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.common.by import By
@ -10,7 +11,7 @@ from .product import Product
class StolichkiParser:
city = {
"id": 77,
"id": 1,
"name": "Москва",
}
@ -18,9 +19,8 @@ class StolichkiParser:
if city is not None:
self.city = city
self.driver = StolichkiDriver()
self.driver.set_city(self.city.get("id"))
service = webdriver.ChromeService("/home/winet/.local/bin/chromedriver")
self.driver = StolichkiDriver(service=service)
logging.info(f"Parser initialize complete! City: {self.city.get('name')}")
def run(self):
@ -102,3 +102,8 @@ class StolichkiParser:
data.append(product)
return data
def set_city(self, id: int):
# TODO: Написать смену города путём заменой значения в куки браузера
pass