Compare commits
2 Commits
5ee90bca3d
...
6783437582
Author | SHA1 | Date |
---|---|---|
Анатолий Богомолов | 6783437582 | |
Анатолий Богомолов | 15d1bc3133 |
|
@ -1,18 +1,20 @@
|
||||||
|
2captcha-python==1.2.2
|
||||||
attrs==23.1.0
|
attrs==23.1.0
|
||||||
certifi==2023.7.22
|
certifi==2023.11.17
|
||||||
charset-normalizer==3.2.0
|
charset-normalizer==3.3.2
|
||||||
exceptiongroup==1.1.3
|
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
idna==3.4
|
idna==3.6
|
||||||
outcome==1.2.0
|
outcome==1.3.0.post0
|
||||||
|
packaging==23.2
|
||||||
PySocks==1.7.1
|
PySocks==1.7.1
|
||||||
|
python-dotenv==1.0.0
|
||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
selenium==4.12.0
|
selenium==4.16.0
|
||||||
selenium-stealth==1.0.6
|
selenium-stealth==1.0.6
|
||||||
sniffio==1.3.0
|
sniffio==1.3.0
|
||||||
sortedcontainers==2.4.0
|
sortedcontainers==2.4.0
|
||||||
trio==0.22.2
|
trio==0.23.2
|
||||||
trio-websocket==0.10.4
|
trio-websocket==0.11.1
|
||||||
urllib3==2.0.4
|
urllib3==2.1.0
|
||||||
|
webdriver-manager==4.0.1
|
||||||
wsproto==1.2.0
|
wsproto==1.2.0
|
||||||
2captcha-python==1.2.1
|
|
|
@ -14,6 +14,7 @@ from selenium.common.exceptions import (
|
||||||
NoSuchElementException,
|
NoSuchElementException,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
from selenium_stealth import stealth
|
from selenium_stealth import stealth
|
||||||
|
|
||||||
from twocaptcha import TwoCaptcha
|
from twocaptcha import TwoCaptcha
|
||||||
|
@ -27,7 +28,7 @@ class StolichkiDriver(webdriver.Chrome):
|
||||||
self, options: Options = None, service: Service = None, keep_alive: bool = True
|
self, options: Options = None, service: Service = None, keep_alive: bool = True
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
||||||
assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
|
# assert os.environ.get("TWOCAPTCA_KEY") is not None, "Can't fins environment variable TWOCAPTCHA_KEY"
|
||||||
|
|
||||||
if options is None:
|
if options is None:
|
||||||
options = webdriver.ChromeOptions()
|
options = webdriver.ChromeOptions()
|
||||||
|
@ -35,11 +36,13 @@ class StolichkiDriver(webdriver.Chrome):
|
||||||
if not os.path.exists("errors"):
|
if not os.path.exists("errors"):
|
||||||
os.mkdir("errors")
|
os.mkdir("errors")
|
||||||
|
|
||||||
|
service = webdriver.ChromeService(ChromeDriverManager().install())
|
||||||
|
|
||||||
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||||
options.add_experimental_option("useAutomationExtension", False)
|
options.add_experimental_option("useAutomationExtension", False)
|
||||||
options.page_load_strategy = "eager"
|
options.page_load_strategy = "eager"
|
||||||
|
|
||||||
self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
|
# self.__solver = TwoCaptcha(os.environ.get("TWOCAPTCA_KEY"))
|
||||||
|
|
||||||
super().__init__(options, service, keep_alive)
|
super().__init__(options, service, keep_alive)
|
||||||
|
|
||||||
|
@ -53,6 +56,12 @@ class StolichkiDriver(webdriver.Chrome):
|
||||||
fix_hairline=True,
|
fix_hairline=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def set_city(self, id: int):
|
||||||
|
self.get("https://stolichki.ru/")
|
||||||
|
self.__edit_cookie("cityId", id)
|
||||||
|
self.refresh()
|
||||||
|
|
||||||
def get(self, url: str) -> None:
|
def get(self, url: str) -> None:
|
||||||
super().get(url)
|
super().get(url)
|
||||||
logging.info(f"Loading {url}")
|
logging.info(f"Loading {url}")
|
||||||
|
@ -63,7 +72,7 @@ class StolichkiDriver(webdriver.Chrome):
|
||||||
# Если не появился, обновляем страницу и ждём ещё раз.
|
# Если не появился, обновляем страницу и ждём ещё раз.
|
||||||
# И так пять раз. Если за 5 попыток ничего не вышло, кидаем исключение
|
# И так пять раз. Если за 5 попыток ничего не вышло, кидаем исключение
|
||||||
if not self.__wait_for_presence('//img[@alt="Логотип"]'):
|
if not self.__wait_for_presence('//img[@alt="Логотип"]'):
|
||||||
self.__handle_captcha()
|
# self.__handle_captcha()
|
||||||
self.execute_script("window.stop();")
|
self.execute_script("window.stop();")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
self.refresh()
|
self.refresh()
|
||||||
|
@ -83,8 +92,9 @@ class StolichkiDriver(webdriver.Chrome):
|
||||||
|
|
||||||
|
|
||||||
def __wait_for_presence(self, xpath: str, delay: int = 60):
|
def __wait_for_presence(self, xpath: str, delay: int = 60):
|
||||||
wait = WebDriverWait(self, delay)
|
|
||||||
try:
|
try:
|
||||||
|
wait = WebDriverWait(self, delay)
|
||||||
|
|
||||||
wait.until(
|
wait.until(
|
||||||
EC.presence_of_element_located(
|
EC.presence_of_element_located(
|
||||||
(By.XPATH, xpath)
|
(By.XPATH, xpath)
|
||||||
|
@ -97,6 +107,16 @@ class StolichkiDriver(webdriver.Chrome):
|
||||||
except (NoSuchElementException, ElementNotVisibleException):
|
except (NoSuchElementException, ElementNotVisibleException):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def __edit_cookie(self, name: str, value):
|
||||||
|
cookie = self.get_cookie(name)
|
||||||
|
if cookie:
|
||||||
|
self.delete_cookie(name)
|
||||||
|
|
||||||
|
new_cookie = cookie.copy()
|
||||||
|
new_cookie["value"] = str(value)
|
||||||
|
|
||||||
|
self.add_cookie(new_cookie)
|
||||||
|
|
||||||
def __handle_captcha(self) -> None:
|
def __handle_captcha(self) -> None:
|
||||||
for attempt in range(5):
|
for attempt in range(5):
|
||||||
logging.info(f"Trying to solve captcha {attempt + 1}/5")
|
logging.info(f"Trying to solve captcha {attempt + 1}/5")
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
from selenium import webdriver
|
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
from selenium.webdriver.remote.webelement import WebElement
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
@ -11,7 +10,7 @@ from .product import Product
|
||||||
|
|
||||||
class StolichkiParser:
|
class StolichkiParser:
|
||||||
city = {
|
city = {
|
||||||
"id": 1,
|
"id": 77,
|
||||||
"name": "Москва",
|
"name": "Москва",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,8 +18,9 @@ class StolichkiParser:
|
||||||
if city is not None:
|
if city is not None:
|
||||||
self.city = city
|
self.city = city
|
||||||
|
|
||||||
service = webdriver.ChromeService("/home/winet/.local/bin/chromedriver")
|
self.driver = StolichkiDriver()
|
||||||
self.driver = StolichkiDriver(service=service)
|
self.driver.set_city(self.city.get("id"))
|
||||||
|
|
||||||
logging.info(f"Parser initialize complete! City: {self.city.get('name')}")
|
logging.info(f"Parser initialize complete! City: {self.city.get('name')}")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
@ -102,8 +102,3 @@ class StolichkiParser:
|
||||||
data.append(product)
|
data.append(product)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def set_city(self, id: int):
|
|
||||||
# TODO: Написать смену города путём заменой значения в куки браузера
|
|
||||||
pass
|
|
||||||
|
|
Loading…
Reference in New Issue