Compare commits
2 Commits
049e436d37
...
fe16d4eee9
Author | SHA1 | Date |
---|---|---|
Анатолий Богомолов | fe16d4eee9 | |
Анатолий Богомолов | b97e8fe739 |
|
@ -0,0 +1,177 @@
|
|||
{
|
||||
"Москва": 1,
|
||||
"Санкт-Петербург": 77,
|
||||
"Бутово": 111,
|
||||
"Внуковское": 207,
|
||||
"Зеленоград": 71,
|
||||
"Котельники": 98,
|
||||
"Поселение Вороновское": 174,
|
||||
"Рабочий поселок Столбовая": 173,
|
||||
"Троицк": 195,
|
||||
"Щербинка": 162,
|
||||
"мкр.Град Московский": 105,
|
||||
"п. Коммунарка": 165,
|
||||
"п.Рязановское": 121,
|
||||
"пос. Десеновское": 104,
|
||||
"рп Тучково": 164,
|
||||
"Андреевка": 185,
|
||||
"Балашиха": 44,
|
||||
"Бронницы": 106,
|
||||
"Видное": 126,
|
||||
"Волоколамск": 137,
|
||||
"Высоковск": 184,
|
||||
"Голицыно": 163,
|
||||
"Дедовск ": 107,
|
||||
"Дзержинский": 29,
|
||||
"Дмитров": 6,
|
||||
"Долгопрудный": 36,
|
||||
"Домодедово": 115,
|
||||
"Дубна": 90,
|
||||
"Егорьевск": 30,
|
||||
"Жуковский": 110,
|
||||
"Зарайск": 113,
|
||||
"Звенигород": 116,
|
||||
"Ивантеевка": 127,
|
||||
"Истра": 102,
|
||||
"Кашира": 65,
|
||||
"Клин": 64,
|
||||
"Коломна": 31,
|
||||
"Королев": 60,
|
||||
"Красноармейск": 210,
|
||||
"Красногорск": 69,
|
||||
"Краснозаводск": 189,
|
||||
"Кубинка": 134,
|
||||
"Куровское": 80,
|
||||
"Ликино-Дулево": 48,
|
||||
"Лобня": 61,
|
||||
"Лосино-Петровский": 74,
|
||||
"Луховицы": 25,
|
||||
"Лыткарино": 117,
|
||||
"Люберцы": 27,
|
||||
"Малаховка": 130,
|
||||
"Можайск": 122,
|
||||
"Мытищи": 79,
|
||||
"Наро-Фоминск": 124,
|
||||
"Нахабино": 196,
|
||||
"Некрасовский": 188,
|
||||
"Ногинск": 26,
|
||||
"Одинцово": 38,
|
||||
"Озеры": 114,
|
||||
"Орехово-Зуево": 33,
|
||||
"Павловский Посад": 70,
|
||||
"Пересвет": 67,
|
||||
"Подольск": 28,
|
||||
"Протвино": 203,
|
||||
"Путилково": 201,
|
||||
"Пушкино": 84,
|
||||
"Раменское": 75,
|
||||
"Реутов": 35,
|
||||
"Руза": 197,
|
||||
"Сапроново": 211,
|
||||
"Сергиев Посад": 46,
|
||||
"Серебряные пруды": 45,
|
||||
"Серпухов": 50,
|
||||
"Солнечногорск": 118,
|
||||
"Старая Купавна": 88,
|
||||
"Ступино": 66,
|
||||
"Талдом": 135,
|
||||
"Фрязино": 32,
|
||||
"Химки": 62,
|
||||
"Хотьково": 204,
|
||||
"Черноголовка": 54,
|
||||
"Чехов": 59,
|
||||
"Шатура": 129,
|
||||
"Щелково": 19,
|
||||
"Электрогорск": 208,
|
||||
"Электросталь": 91,
|
||||
"Электроугли": 216,
|
||||
"Яхрома": 68,
|
||||
"р.п. Лесной": 92,
|
||||
"р.п. Октябрьский": 99,
|
||||
"рп. Боброво": 215,
|
||||
"Колпино": 97,
|
||||
"Кронштадт ": 103,
|
||||
"Петергоф": 212,
|
||||
"Сестрорецк": 96,
|
||||
"Шушары": 123,
|
||||
"п. Металлострой": 161,
|
||||
"Бокситогорск": 205,
|
||||
"Волосово": 202,
|
||||
"Волхов": 206,
|
||||
"Всеволожск": 153,
|
||||
"Выборг": 156,
|
||||
"Гатчина": 108,
|
||||
"Кингисепп": 144,
|
||||
"Кириши": 148,
|
||||
"Кировск": 193,
|
||||
"Кудрово": 158,
|
||||
"Ломоносов": 159,
|
||||
"Луга": 147,
|
||||
"Мурино": 157,
|
||||
"Никольское": 150,
|
||||
"Отрадное": 151,
|
||||
"Поселок имени Морозова": 182,
|
||||
"Приозерск": 136,
|
||||
"Пушкин": 160,
|
||||
"Сертолово": 132,
|
||||
"Сланцы": 154,
|
||||
"Тихвин": 178,
|
||||
"Тосно": 176,
|
||||
"Шлиссельбург": 155,
|
||||
"Александров": 53,
|
||||
"Владимир": 52,
|
||||
"Вязники": 86,
|
||||
"Гусь-Хрустальный": 145,
|
||||
"Киржач": 199,
|
||||
"Ковров": 56,
|
||||
"Кольчугино": 89,
|
||||
"Лакинск": 179,
|
||||
"Муром": 43,
|
||||
"Петушки": 167,
|
||||
"Покров": 138,
|
||||
"Радужный": 120,
|
||||
"Собинка": 168,
|
||||
"Струнино": 131,
|
||||
"Судогда ": 109,
|
||||
"Юрьев-Польский": 81,
|
||||
"Заволжск": 194,
|
||||
"Иваново": 85,
|
||||
"Кохма": 200,
|
||||
"Родники": 186,
|
||||
"Тейково": 128,
|
||||
"Шуя": 187,
|
||||
"Белоусово": 171,
|
||||
"Боровск": 170,
|
||||
"Калуга": 140,
|
||||
"Кременки": 191,
|
||||
"Малоярославец": 172,
|
||||
"Обнинск": 141,
|
||||
"Кострома": 76,
|
||||
"Выкса": 181,
|
||||
"Дзержинск": 169,
|
||||
"Кулебаки": 183,
|
||||
"Нижний Новгород": 146,
|
||||
"Боровичи": 180,
|
||||
"Валдай": 192,
|
||||
"Великий Новгород": 143,
|
||||
"Старая Русса": 149,
|
||||
"Великие Луки": 166,
|
||||
"Остров": 177,
|
||||
"Псков": 142,
|
||||
"Рязань": 63,
|
||||
"Тверь": 213,
|
||||
"Алексин": 217,
|
||||
"Богородицк": 221,
|
||||
"Венев": 152,
|
||||
"Донской": 82,
|
||||
"Ефремов": 219,
|
||||
"Кимовск": 101,
|
||||
"Новомосковск": 94,
|
||||
"Суворов": 190,
|
||||
"Тула": 40,
|
||||
"Узловая": 125,
|
||||
"Щекино": 51,
|
||||
"Переславль-Залесский": 209,
|
||||
"Рыбинск": 198,
|
||||
"Ярославль": 100
|
||||
}
|
|
@ -1,16 +1,16 @@
|
|||
import asyncio
|
||||
import configparser
|
||||
import random
|
||||
import logging
|
||||
|
||||
from loguru import logger
|
||||
import aiohttp
|
||||
import backoff
|
||||
|
||||
from utils.exceptions import ConfigError
|
||||
from crawler.utils.exceptions import ConfigError
|
||||
from crawler.types import City, Proxy
|
||||
from utils.classes import Singleton
|
||||
from crawler.utils.classes import Singleton
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log = logger
|
||||
|
||||
class CrawlerAPI(metaclass=Singleton):
|
||||
api_baseurl = "https://q.asburo.ru/ch/"
|
||||
|
@ -94,7 +94,7 @@ class CrawlerAPI(metaclass=Singleton):
|
|||
|
||||
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
||||
async def send_products(self, results: list):
|
||||
log.info("Sending data")
|
||||
log.info("Sending data...")
|
||||
|
||||
url = f"{self.api_url}/prices/{self.rival_tag}"
|
||||
|
||||
|
@ -105,7 +105,7 @@ class CrawlerAPI(metaclass=Singleton):
|
|||
response = await self.session.post(url, json=data, auth=self.auth)
|
||||
status, response_text = response.status, await response.text()
|
||||
|
||||
log.info(f"{data} was sended. Status: {status}. Response: {response_text}")
|
||||
log.debug(f"{data} was sended. Status: {status}. Response: {response_text}")
|
||||
|
||||
if status >= 500:
|
||||
await asyncio.sleep(15)
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
from dataclasses import dataclass
|
||||
from utils.classes import DataclassBase
|
||||
from crawler.utils.classes import DataclassBase
|
||||
|
||||
@dataclass(init=False)
|
||||
class City(DataclassBase):
|
||||
id: int = 0
|
||||
city: str
|
||||
region_id: int | None
|
||||
region_name: str | None
|
||||
is_byapt: 1 | 0
|
||||
|
||||
@dataclass(init=False)
|
||||
class Proxy(DataclassBase):
|
||||
|
|
43
main.py
43
main.py
|
@ -1,17 +1,50 @@
|
|||
import sys
|
||||
import asyncio
|
||||
import configparser
|
||||
import json
|
||||
import multiprocessing
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from stolichki.parsers.city import CityParser
|
||||
from stolichki.types.city import City
|
||||
from crawler.api import CrawlerAPI
|
||||
from crawler.utils.classes import DataclassJSONEncoder
|
||||
|
||||
from crawler.types import City, Proxy
|
||||
|
||||
async def get_crawler_data():
|
||||
crawler = CrawlerAPI()
|
||||
cities = await crawler.get_cities()
|
||||
proxies = await crawler.get_proxies()
|
||||
return cities, proxies
|
||||
|
||||
def filter_cities(config, cities: list[City]) -> list[City]:
|
||||
|
||||
with open(config["parser"]["cities_path"]) as f:
|
||||
cities_stolichki = json.load(f)
|
||||
|
||||
for city in cities:
|
||||
city.id = cities_stolichki[city.city]
|
||||
|
||||
return cities
|
||||
|
||||
@logger.catch
|
||||
def main():
|
||||
city = City(111, "Бутово", 1, [])
|
||||
result = CityParser(city).parse()
|
||||
print(result)
|
||||
def main(cities: list[City], proxies: list[Proxy]):
|
||||
config = configparser.ConfigParser()
|
||||
config.read("config.ini")
|
||||
|
||||
cities = filter_cities(config, cities)
|
||||
quantity = config['parser']['cities_quantity'] or len(cities)
|
||||
|
||||
with multiprocessing.Pool(processes=quantity) as pool: #type: ignore
|
||||
results = pool.map(lambda city: CityParser(city, proxies).parse(), cities)
|
||||
|
||||
with open("results.json", "w") as f:
|
||||
json.dump(results, f, cls=DataclassJSONEncoder, ensure_ascii=False, indent=4)
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore
|
||||
main()
|
||||
|
||||
cities, proxies = asyncio.run(get_crawler_data())
|
||||
main(cities, proxies)
|
|
@ -19,7 +19,8 @@ from webdriver_manager.chrome import ChromeDriverManager
|
|||
|
||||
from twocaptcha import TwoCaptcha
|
||||
|
||||
from stolichki.errors import CaptchaError, ConfigError, LoadingError
|
||||
from stolichki.errors import CaptchaError, LoadingError
|
||||
from crawler.utils.exceptions import ConfigError
|
||||
|
||||
class StolichkiDriver(uc.Chrome):
|
||||
def __init__(self, **kwargs):
|
||||
|
|
|
@ -3,9 +3,5 @@ class CaptchaError(Exception):
|
|||
super().__init__(*args)
|
||||
|
||||
class LoadingError(Exception):
|
||||
def __init__(self, *args: object) -> None:
|
||||
super().__init__(*args)
|
||||
|
||||
class ConfigError(Exception):
|
||||
def __init__(self, *args: object) -> None:
|
||||
super().__init__(*args)
|
|
@ -1,12 +1,14 @@
|
|||
from selenium.webdriver.common.by import By
|
||||
|
||||
from crawler.types import Proxy
|
||||
|
||||
from stolichki.driver import StolichkiDriver
|
||||
from stolichki.parsers.category import get_category_parser
|
||||
from stolichki.types.city import City
|
||||
|
||||
|
||||
class CityParser:
|
||||
def __init__(self, city: City) -> None:
|
||||
def __init__(self, city, proxies: list[Proxy]) -> None:
|
||||
self.driver = StolichkiDriver()
|
||||
self.city = city
|
||||
|
||||
|
|
Loading…
Reference in New Issue