Compare commits
No commits in common. "67834375824348ef9a73aead0a95fdd0980247b8" and "5ee90bca3d5a16d406a35f467bc0597507dc21cb" have entirely different histories.
6783437582
...
5ee90bca3d
|
@ -1,20 +1,18 @@
|
|||
2captcha-python==1.2.2
|
||||
attrs==23.1.0
|
||||
certifi==2023.11.17
|
||||
charset-normalizer==3.3.2
|
||||
certifi==2023.7.22
|
||||
charset-normalizer==3.2.0
|
||||
exceptiongroup==1.1.3
|
||||
h11==0.14.0
|
||||
idna==3.6
|
||||
outcome==1.3.0.post0
|
||||
packaging==23.2
|
||||
idna==3.4
|
||||
outcome==1.2.0
|
||||
PySocks==1.7.1
|
||||
python-dotenv==1.0.0
|
||||
requests==2.31.0
|
||||
selenium==4.16.0
|
||||
selenium==4.12.0
|
||||
selenium-stealth==1.0.6
|
||||
sniffio==1.3.0
|
||||
sortedcontainers==2.4.0
|
||||
trio==0.23.2
|
||||
trio-websocket==0.11.1
|
||||
urllib3==2.1.0
|
||||
webdriver-manager==4.0.1
|
||||
trio==0.22.2
|
||||
trio-websocket==0.10.4
|
||||
urllib3==2.0.4
|
||||
wsproto==1.2.0
|
||||
2captcha-python==1.2.1
|
|
@ -14,7 +14,6 @@ from selenium.common.exceptions import (
|
|||
NoSuchElementException,
|
||||
)
|
||||
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from selenium_stealth import stealth
|
||||
|
||||
from twocaptcha import TwoCaptcha
|
||||
|
@ -28,7 +27,7 @@ class StolichkiDriver(webdriver.Chrome):
|
|||
self, options: Options = None, service: Service = None, keep_alive: bool = True
|
||||
) -> None:
|
||||
|
||||
# assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
|
||||
assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
|
||||
|
||||
if options is None:
|
||||
options = webdriver.ChromeOptions()
|
||||
|
@ -36,13 +35,11 @@ class StolichkiDriver(webdriver.Chrome):
|
|||
if not os.path.exists("errors"):
|
||||
os.mkdir("errors")
|
||||
|
||||
service = webdriver.ChromeService(ChromeDriverManager().install())
|
||||
|
||||
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||
options.add_experimental_option("useAutomationExtension", False)
|
||||
options.page_load_strategy = "eager"
|
||||
|
||||
# self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
|
||||
self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
|
||||
|
||||
super().__init__(options, service, keep_alive)
|
||||
|
||||
|
@ -56,12 +53,6 @@ class StolichkiDriver(webdriver.Chrome):
|
|||
fix_hairline=True,
|
||||
)
|
||||
|
||||
|
||||
def set_city(self, id: int):
|
||||
self.get("https://stolichki.ru/")
|
||||
self.__edit_cookie("cityId", id)
|
||||
self.refresh()
|
||||
|
||||
def get(self, url: str) -> None:
|
||||
super().get(url)
|
||||
logging.info(f"Loading {url}")
|
||||
|
@ -72,7 +63,7 @@ class StolichkiDriver(webdriver.Chrome):
|
|||
# Если не появился, обновляем страницу и ждём ещё раз.
|
||||
# И так пять раз. Если за 5 попыток ничего не вышло, кидаем исключение
|
||||
if not self.__wait_for_presence('//img[@alt="Логотип"]'):
|
||||
# self.__handle_captcha()
|
||||
self.__handle_captcha()
|
||||
self.execute_script("window.stop();")
|
||||
time.sleep(1)
|
||||
self.refresh()
|
||||
|
@ -92,9 +83,8 @@ class StolichkiDriver(webdriver.Chrome):
|
|||
|
||||
|
||||
def __wait_for_presence(self, xpath: str, delay: int = 60):
|
||||
try:
|
||||
wait = WebDriverWait(self, delay)
|
||||
|
||||
try:
|
||||
wait.until(
|
||||
EC.presence_of_element_located(
|
||||
(By.XPATH, xpath)
|
||||
|
@ -107,16 +97,6 @@ class StolichkiDriver(webdriver.Chrome):
|
|||
except (NoSuchElementException, ElementNotVisibleException):
|
||||
return False
|
||||
|
||||
def __edit_cookie(self, name: str, value):
|
||||
cookie = self.get_cookie(name)
|
||||
if cookie:
|
||||
self.delete_cookie(name)
|
||||
|
||||
new_cookie = cookie.copy()
|
||||
new_cookie["value"] = str(value)
|
||||
|
||||
self.add_cookie(new_cookie)
|
||||
|
||||
def __handle_captcha(self) -> None:
|
||||
for attempt in range(5):
|
||||
logging.info(f"Trying to solve captcha {attempt + 1}/5")
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import logging
|
||||
from multiprocessing import Pool
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
|
@ -10,7 +11,7 @@ from .product import Product
|
|||
|
||||
class StolichkiParser:
|
||||
city = {
|
||||
"id": 77,
|
||||
"id": 1,
|
||||
"name": "Москва",
|
||||
}
|
||||
|
||||
|
@ -18,9 +19,8 @@ class StolichkiParser:
|
|||
if city is not None:
|
||||
self.city = city
|
||||
|
||||
self.driver = StolichkiDriver()
|
||||
self.driver.set_city(self.city.get("id"))
|
||||
|
||||
service = webdriver.ChromeService("/home/winet/.local/bin/chromedriver")
|
||||
self.driver = StolichkiDriver(service=service)
|
||||
logging.info(f"Parser initialize complete! City: {self.city.get('name')}")
|
||||
|
||||
def run(self):
|
||||
|
@ -102,3 +102,8 @@ class StolichkiParser:
|
|||
data.append(product)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def set_city(self, id: int):
|
||||
# TODO: Написать смену города путём заменой значения в куки браузера
|
||||
pass
|
||||
|
|
Loading…
Reference in New Issue