Compare commits
2 Commits
049e436d37
...
fe16d4eee9
Author | SHA1 | Date |
---|---|---|
Анатолий Богомолов | fe16d4eee9 | |
Анатолий Богомолов | b97e8fe739 |
|
@ -0,0 +1,177 @@
|
||||||
|
{
|
||||||
|
"Москва": 1,
|
||||||
|
"Санкт-Петербург": 77,
|
||||||
|
"Бутово": 111,
|
||||||
|
"Внуковское": 207,
|
||||||
|
"Зеленоград": 71,
|
||||||
|
"Котельники": 98,
|
||||||
|
"Поселение Вороновское": 174,
|
||||||
|
"Рабочий поселок Столбовая": 173,
|
||||||
|
"Троицк": 195,
|
||||||
|
"Щербинка": 162,
|
||||||
|
"мкр.Град Московский": 105,
|
||||||
|
"п. Коммунарка": 165,
|
||||||
|
"п.Рязановское": 121,
|
||||||
|
"пос. Десеновское": 104,
|
||||||
|
"рп Тучково": 164,
|
||||||
|
"Андреевка": 185,
|
||||||
|
"Балашиха": 44,
|
||||||
|
"Бронницы": 106,
|
||||||
|
"Видное": 126,
|
||||||
|
"Волоколамск": 137,
|
||||||
|
"Высоковск": 184,
|
||||||
|
"Голицыно": 163,
|
||||||
|
"Дедовск ": 107,
|
||||||
|
"Дзержинский": 29,
|
||||||
|
"Дмитров": 6,
|
||||||
|
"Долгопрудный": 36,
|
||||||
|
"Домодедово": 115,
|
||||||
|
"Дубна": 90,
|
||||||
|
"Егорьевск": 30,
|
||||||
|
"Жуковский": 110,
|
||||||
|
"Зарайск": 113,
|
||||||
|
"Звенигород": 116,
|
||||||
|
"Ивантеевка": 127,
|
||||||
|
"Истра": 102,
|
||||||
|
"Кашира": 65,
|
||||||
|
"Клин": 64,
|
||||||
|
"Коломна": 31,
|
||||||
|
"Королев": 60,
|
||||||
|
"Красноармейск": 210,
|
||||||
|
"Красногорск": 69,
|
||||||
|
"Краснозаводск": 189,
|
||||||
|
"Кубинка": 134,
|
||||||
|
"Куровское": 80,
|
||||||
|
"Ликино-Дулево": 48,
|
||||||
|
"Лобня": 61,
|
||||||
|
"Лосино-Петровский": 74,
|
||||||
|
"Луховицы": 25,
|
||||||
|
"Лыткарино": 117,
|
||||||
|
"Люберцы": 27,
|
||||||
|
"Малаховка": 130,
|
||||||
|
"Можайск": 122,
|
||||||
|
"Мытищи": 79,
|
||||||
|
"Наро-Фоминск": 124,
|
||||||
|
"Нахабино": 196,
|
||||||
|
"Некрасовский": 188,
|
||||||
|
"Ногинск": 26,
|
||||||
|
"Одинцово": 38,
|
||||||
|
"Озеры": 114,
|
||||||
|
"Орехово-Зуево": 33,
|
||||||
|
"Павловский Посад": 70,
|
||||||
|
"Пересвет": 67,
|
||||||
|
"Подольск": 28,
|
||||||
|
"Протвино": 203,
|
||||||
|
"Путилково": 201,
|
||||||
|
"Пушкино": 84,
|
||||||
|
"Раменское": 75,
|
||||||
|
"Реутов": 35,
|
||||||
|
"Руза": 197,
|
||||||
|
"Сапроново": 211,
|
||||||
|
"Сергиев Посад": 46,
|
||||||
|
"Серебряные пруды": 45,
|
||||||
|
"Серпухов": 50,
|
||||||
|
"Солнечногорск": 118,
|
||||||
|
"Старая Купавна": 88,
|
||||||
|
"Ступино": 66,
|
||||||
|
"Талдом": 135,
|
||||||
|
"Фрязино": 32,
|
||||||
|
"Химки": 62,
|
||||||
|
"Хотьково": 204,
|
||||||
|
"Черноголовка": 54,
|
||||||
|
"Чехов": 59,
|
||||||
|
"Шатура": 129,
|
||||||
|
"Щелково": 19,
|
||||||
|
"Электрогорск": 208,
|
||||||
|
"Электросталь": 91,
|
||||||
|
"Электроугли": 216,
|
||||||
|
"Яхрома": 68,
|
||||||
|
"р.п. Лесной": 92,
|
||||||
|
"р.п. Октябрьский": 99,
|
||||||
|
"рп. Боброво": 215,
|
||||||
|
"Колпино": 97,
|
||||||
|
"Кронштадт ": 103,
|
||||||
|
"Петергоф": 212,
|
||||||
|
"Сестрорецк": 96,
|
||||||
|
"Шушары": 123,
|
||||||
|
"п. Металлострой": 161,
|
||||||
|
"Бокситогорск": 205,
|
||||||
|
"Волосово": 202,
|
||||||
|
"Волхов": 206,
|
||||||
|
"Всеволожск": 153,
|
||||||
|
"Выборг": 156,
|
||||||
|
"Гатчина": 108,
|
||||||
|
"Кингисепп": 144,
|
||||||
|
"Кириши": 148,
|
||||||
|
"Кировск": 193,
|
||||||
|
"Кудрово": 158,
|
||||||
|
"Ломоносов": 159,
|
||||||
|
"Луга": 147,
|
||||||
|
"Мурино": 157,
|
||||||
|
"Никольское": 150,
|
||||||
|
"Отрадное": 151,
|
||||||
|
"Поселок имени Морозова": 182,
|
||||||
|
"Приозерск": 136,
|
||||||
|
"Пушкин": 160,
|
||||||
|
"Сертолово": 132,
|
||||||
|
"Сланцы": 154,
|
||||||
|
"Тихвин": 178,
|
||||||
|
"Тосно": 176,
|
||||||
|
"Шлиссельбург": 155,
|
||||||
|
"Александров": 53,
|
||||||
|
"Владимир": 52,
|
||||||
|
"Вязники": 86,
|
||||||
|
"Гусь-Хрустальный": 145,
|
||||||
|
"Киржач": 199,
|
||||||
|
"Ковров": 56,
|
||||||
|
"Кольчугино": 89,
|
||||||
|
"Лакинск": 179,
|
||||||
|
"Муром": 43,
|
||||||
|
"Петушки": 167,
|
||||||
|
"Покров": 138,
|
||||||
|
"Радужный": 120,
|
||||||
|
"Собинка": 168,
|
||||||
|
"Струнино": 131,
|
||||||
|
"Судогда ": 109,
|
||||||
|
"Юрьев-Польский": 81,
|
||||||
|
"Заволжск": 194,
|
||||||
|
"Иваново": 85,
|
||||||
|
"Кохма": 200,
|
||||||
|
"Родники": 186,
|
||||||
|
"Тейково": 128,
|
||||||
|
"Шуя": 187,
|
||||||
|
"Белоусово": 171,
|
||||||
|
"Боровск": 170,
|
||||||
|
"Калуга": 140,
|
||||||
|
"Кременки": 191,
|
||||||
|
"Малоярославец": 172,
|
||||||
|
"Обнинск": 141,
|
||||||
|
"Кострома": 76,
|
||||||
|
"Выкса": 181,
|
||||||
|
"Дзержинск": 169,
|
||||||
|
"Кулебаки": 183,
|
||||||
|
"Нижний Новгород": 146,
|
||||||
|
"Боровичи": 180,
|
||||||
|
"Валдай": 192,
|
||||||
|
"Великий Новгород": 143,
|
||||||
|
"Старая Русса": 149,
|
||||||
|
"Великие Луки": 166,
|
||||||
|
"Остров": 177,
|
||||||
|
"Псков": 142,
|
||||||
|
"Рязань": 63,
|
||||||
|
"Тверь": 213,
|
||||||
|
"Алексин": 217,
|
||||||
|
"Богородицк": 221,
|
||||||
|
"Венев": 152,
|
||||||
|
"Донской": 82,
|
||||||
|
"Ефремов": 219,
|
||||||
|
"Кимовск": 101,
|
||||||
|
"Новомосковск": 94,
|
||||||
|
"Суворов": 190,
|
||||||
|
"Тула": 40,
|
||||||
|
"Узловая": 125,
|
||||||
|
"Щекино": 51,
|
||||||
|
"Переславль-Залесский": 209,
|
||||||
|
"Рыбинск": 198,
|
||||||
|
"Ярославль": 100
|
||||||
|
}
|
|
@ -1,16 +1,16 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import configparser
|
import configparser
|
||||||
import random
|
import random
|
||||||
import logging
|
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import backoff
|
import backoff
|
||||||
|
|
||||||
from utils.exceptions import ConfigError
|
from crawler.utils.exceptions import ConfigError
|
||||||
from crawler.types import City, Proxy
|
from crawler.types import City, Proxy
|
||||||
from utils.classes import Singleton
|
from crawler.utils.classes import Singleton
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logger
|
||||||
|
|
||||||
class CrawlerAPI(metaclass=Singleton):
|
class CrawlerAPI(metaclass=Singleton):
|
||||||
api_baseurl = "https://q.asburo.ru/ch/"
|
api_baseurl = "https://q.asburo.ru/ch/"
|
||||||
|
@ -94,7 +94,7 @@ class CrawlerAPI(metaclass=Singleton):
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
||||||
async def send_products(self, results: list):
|
async def send_products(self, results: list):
|
||||||
log.info("Sending data")
|
log.info("Sending data...")
|
||||||
|
|
||||||
url = f"{self.api_url}/prices/{self.rival_tag}"
|
url = f"{self.api_url}/prices/{self.rival_tag}"
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ class CrawlerAPI(metaclass=Singleton):
|
||||||
response = await self.session.post(url, json=data, auth=self.auth)
|
response = await self.session.post(url, json=data, auth=self.auth)
|
||||||
status, response_text = response.status, await response.text()
|
status, response_text = response.status, await response.text()
|
||||||
|
|
||||||
log.info(f"{data} was sended. Status: {status}. Response: {response_text}")
|
log.debug(f"{data} was sended. Status: {status}. Response: {response_text}")
|
||||||
|
|
||||||
if status >= 500:
|
if status >= 500:
|
||||||
await asyncio.sleep(15)
|
await asyncio.sleep(15)
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from utils.classes import DataclassBase
|
from crawler.utils.classes import DataclassBase
|
||||||
|
|
||||||
@dataclass(init=False)
|
@dataclass(init=False)
|
||||||
class City(DataclassBase):
|
class City(DataclassBase):
|
||||||
|
id: int = 0
|
||||||
city: str
|
city: str
|
||||||
region_id: int | None
|
region_id: int | None
|
||||||
region_name: str | None
|
region_name: str | None
|
||||||
|
is_byapt: 1 | 0
|
||||||
|
|
||||||
@dataclass(init=False)
|
@dataclass(init=False)
|
||||||
class Proxy(DataclassBase):
|
class Proxy(DataclassBase):
|
||||||
|
|
43
main.py
43
main.py
|
@ -1,17 +1,50 @@
|
||||||
import sys
|
import sys
|
||||||
|
import asyncio
|
||||||
|
import configparser
|
||||||
|
import json
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from stolichki.parsers.city import CityParser
|
from stolichki.parsers.city import CityParser
|
||||||
from stolichki.types.city import City
|
from stolichki.types.city import City
|
||||||
|
from crawler.api import CrawlerAPI
|
||||||
|
from crawler.utils.classes import DataclassJSONEncoder
|
||||||
|
|
||||||
|
from crawler.types import City, Proxy
|
||||||
|
|
||||||
|
async def get_crawler_data():
|
||||||
|
crawler = CrawlerAPI()
|
||||||
|
cities = await crawler.get_cities()
|
||||||
|
proxies = await crawler.get_proxies()
|
||||||
|
return cities, proxies
|
||||||
|
|
||||||
|
def filter_cities(config, cities: list[City]) -> list[City]:
|
||||||
|
|
||||||
|
with open(config["parser"]["cities_path"]) as f:
|
||||||
|
cities_stolichki = json.load(f)
|
||||||
|
|
||||||
|
for city in cities:
|
||||||
|
city.id = cities_stolichki[city.city]
|
||||||
|
|
||||||
|
return cities
|
||||||
|
|
||||||
@logger.catch
|
@logger.catch
|
||||||
def main():
|
def main(cities: list[City], proxies: list[Proxy]):
|
||||||
city = City(111, "Бутово", 1, [])
|
config = configparser.ConfigParser()
|
||||||
result = CityParser(city).parse()
|
config.read("config.ini")
|
||||||
print(result)
|
|
||||||
|
cities = filter_cities(config, cities)
|
||||||
|
quantity = config['parser']['cities_quantity'] or len(cities)
|
||||||
|
|
||||||
|
with multiprocessing.Pool(processes=quantity) as pool: #type: ignore
|
||||||
|
results = pool.map(lambda city: CityParser(city, proxies).parse(), cities)
|
||||||
|
|
||||||
|
with open("results.json", "w") as f:
|
||||||
|
json.dump(results, f, cls=DataclassJSONEncoder, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore
|
logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore
|
||||||
main()
|
|
||||||
|
cities, proxies = asyncio.run(get_crawler_data())
|
||||||
|
main(cities, proxies)
|
|
@ -19,7 +19,8 @@ from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
|
||||||
from twocaptcha import TwoCaptcha
|
from twocaptcha import TwoCaptcha
|
||||||
|
|
||||||
from stolichki.errors import CaptchaError, ConfigError, LoadingError
|
from stolichki.errors import CaptchaError, LoadingError
|
||||||
|
from crawler.utils.exceptions import ConfigError
|
||||||
|
|
||||||
class StolichkiDriver(uc.Chrome):
|
class StolichkiDriver(uc.Chrome):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
|
|
|
@ -5,7 +5,3 @@ class CaptchaError(Exception):
|
||||||
class LoadingError(Exception):
|
class LoadingError(Exception):
|
||||||
def __init__(self, *args: object) -> None:
|
def __init__(self, *args: object) -> None:
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
|
|
||||||
class ConfigError(Exception):
|
|
||||||
def __init__(self, *args: object) -> None:
|
|
||||||
super().__init__(*args)
|
|
|
@ -1,12 +1,14 @@
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
from crawler.types import Proxy
|
||||||
|
|
||||||
from stolichki.driver import StolichkiDriver
|
from stolichki.driver import StolichkiDriver
|
||||||
from stolichki.parsers.category import get_category_parser
|
from stolichki.parsers.category import get_category_parser
|
||||||
from stolichki.types.city import City
|
from stolichki.types.city import City
|
||||||
|
|
||||||
|
|
||||||
class CityParser:
|
class CityParser:
|
||||||
def __init__(self, city: City) -> None:
|
def __init__(self, city, proxies: list[Proxy]) -> None:
|
||||||
self.driver = StolichkiDriver()
|
self.driver = StolichkiDriver()
|
||||||
self.city = city
|
self.city = city
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue