Compare commits
No commits in common. "fe16d4eee9953d8ab99a268b7eed48b62c8838ce" and "049e436d37277cd97109da927426cb796e3226d2" have entirely different histories.
fe16d4eee9
...
049e436d37
177
cities.json
177
cities.json
|
@ -1,177 +0,0 @@
|
||||||
{
|
|
||||||
"Москва": 1,
|
|
||||||
"Санкт-Петербург": 77,
|
|
||||||
"Бутово": 111,
|
|
||||||
"Внуковское": 207,
|
|
||||||
"Зеленоград": 71,
|
|
||||||
"Котельники": 98,
|
|
||||||
"Поселение Вороновское": 174,
|
|
||||||
"Рабочий поселок Столбовая": 173,
|
|
||||||
"Троицк": 195,
|
|
||||||
"Щербинка": 162,
|
|
||||||
"мкр.Град Московский": 105,
|
|
||||||
"п. Коммунарка": 165,
|
|
||||||
"п.Рязановское": 121,
|
|
||||||
"пос. Десеновское": 104,
|
|
||||||
"рп Тучково": 164,
|
|
||||||
"Андреевка": 185,
|
|
||||||
"Балашиха": 44,
|
|
||||||
"Бронницы": 106,
|
|
||||||
"Видное": 126,
|
|
||||||
"Волоколамск": 137,
|
|
||||||
"Высоковск": 184,
|
|
||||||
"Голицыно": 163,
|
|
||||||
"Дедовск ": 107,
|
|
||||||
"Дзержинский": 29,
|
|
||||||
"Дмитров": 6,
|
|
||||||
"Долгопрудный": 36,
|
|
||||||
"Домодедово": 115,
|
|
||||||
"Дубна": 90,
|
|
||||||
"Егорьевск": 30,
|
|
||||||
"Жуковский": 110,
|
|
||||||
"Зарайск": 113,
|
|
||||||
"Звенигород": 116,
|
|
||||||
"Ивантеевка": 127,
|
|
||||||
"Истра": 102,
|
|
||||||
"Кашира": 65,
|
|
||||||
"Клин": 64,
|
|
||||||
"Коломна": 31,
|
|
||||||
"Королев": 60,
|
|
||||||
"Красноармейск": 210,
|
|
||||||
"Красногорск": 69,
|
|
||||||
"Краснозаводск": 189,
|
|
||||||
"Кубинка": 134,
|
|
||||||
"Куровское": 80,
|
|
||||||
"Ликино-Дулево": 48,
|
|
||||||
"Лобня": 61,
|
|
||||||
"Лосино-Петровский": 74,
|
|
||||||
"Луховицы": 25,
|
|
||||||
"Лыткарино": 117,
|
|
||||||
"Люберцы": 27,
|
|
||||||
"Малаховка": 130,
|
|
||||||
"Можайск": 122,
|
|
||||||
"Мытищи": 79,
|
|
||||||
"Наро-Фоминск": 124,
|
|
||||||
"Нахабино": 196,
|
|
||||||
"Некрасовский": 188,
|
|
||||||
"Ногинск": 26,
|
|
||||||
"Одинцово": 38,
|
|
||||||
"Озеры": 114,
|
|
||||||
"Орехово-Зуево": 33,
|
|
||||||
"Павловский Посад": 70,
|
|
||||||
"Пересвет": 67,
|
|
||||||
"Подольск": 28,
|
|
||||||
"Протвино": 203,
|
|
||||||
"Путилково": 201,
|
|
||||||
"Пушкино": 84,
|
|
||||||
"Раменское": 75,
|
|
||||||
"Реутов": 35,
|
|
||||||
"Руза": 197,
|
|
||||||
"Сапроново": 211,
|
|
||||||
"Сергиев Посад": 46,
|
|
||||||
"Серебряные пруды": 45,
|
|
||||||
"Серпухов": 50,
|
|
||||||
"Солнечногорск": 118,
|
|
||||||
"Старая Купавна": 88,
|
|
||||||
"Ступино": 66,
|
|
||||||
"Талдом": 135,
|
|
||||||
"Фрязино": 32,
|
|
||||||
"Химки": 62,
|
|
||||||
"Хотьково": 204,
|
|
||||||
"Черноголовка": 54,
|
|
||||||
"Чехов": 59,
|
|
||||||
"Шатура": 129,
|
|
||||||
"Щелково": 19,
|
|
||||||
"Электрогорск": 208,
|
|
||||||
"Электросталь": 91,
|
|
||||||
"Электроугли": 216,
|
|
||||||
"Яхрома": 68,
|
|
||||||
"р.п. Лесной": 92,
|
|
||||||
"р.п. Октябрьский": 99,
|
|
||||||
"рп. Боброво": 215,
|
|
||||||
"Колпино": 97,
|
|
||||||
"Кронштадт ": 103,
|
|
||||||
"Петергоф": 212,
|
|
||||||
"Сестрорецк": 96,
|
|
||||||
"Шушары": 123,
|
|
||||||
"п. Металлострой": 161,
|
|
||||||
"Бокситогорск": 205,
|
|
||||||
"Волосово": 202,
|
|
||||||
"Волхов": 206,
|
|
||||||
"Всеволожск": 153,
|
|
||||||
"Выборг": 156,
|
|
||||||
"Гатчина": 108,
|
|
||||||
"Кингисепп": 144,
|
|
||||||
"Кириши": 148,
|
|
||||||
"Кировск": 193,
|
|
||||||
"Кудрово": 158,
|
|
||||||
"Ломоносов": 159,
|
|
||||||
"Луга": 147,
|
|
||||||
"Мурино": 157,
|
|
||||||
"Никольское": 150,
|
|
||||||
"Отрадное": 151,
|
|
||||||
"Поселок имени Морозова": 182,
|
|
||||||
"Приозерск": 136,
|
|
||||||
"Пушкин": 160,
|
|
||||||
"Сертолово": 132,
|
|
||||||
"Сланцы": 154,
|
|
||||||
"Тихвин": 178,
|
|
||||||
"Тосно": 176,
|
|
||||||
"Шлиссельбург": 155,
|
|
||||||
"Александров": 53,
|
|
||||||
"Владимир": 52,
|
|
||||||
"Вязники": 86,
|
|
||||||
"Гусь-Хрустальный": 145,
|
|
||||||
"Киржач": 199,
|
|
||||||
"Ковров": 56,
|
|
||||||
"Кольчугино": 89,
|
|
||||||
"Лакинск": 179,
|
|
||||||
"Муром": 43,
|
|
||||||
"Петушки": 167,
|
|
||||||
"Покров": 138,
|
|
||||||
"Радужный": 120,
|
|
||||||
"Собинка": 168,
|
|
||||||
"Струнино": 131,
|
|
||||||
"Судогда ": 109,
|
|
||||||
"Юрьев-Польский": 81,
|
|
||||||
"Заволжск": 194,
|
|
||||||
"Иваново": 85,
|
|
||||||
"Кохма": 200,
|
|
||||||
"Родники": 186,
|
|
||||||
"Тейково": 128,
|
|
||||||
"Шуя": 187,
|
|
||||||
"Белоусово": 171,
|
|
||||||
"Боровск": 170,
|
|
||||||
"Калуга": 140,
|
|
||||||
"Кременки": 191,
|
|
||||||
"Малоярославец": 172,
|
|
||||||
"Обнинск": 141,
|
|
||||||
"Кострома": 76,
|
|
||||||
"Выкса": 181,
|
|
||||||
"Дзержинск": 169,
|
|
||||||
"Кулебаки": 183,
|
|
||||||
"Нижний Новгород": 146,
|
|
||||||
"Боровичи": 180,
|
|
||||||
"Валдай": 192,
|
|
||||||
"Великий Новгород": 143,
|
|
||||||
"Старая Русса": 149,
|
|
||||||
"Великие Луки": 166,
|
|
||||||
"Остров": 177,
|
|
||||||
"Псков": 142,
|
|
||||||
"Рязань": 63,
|
|
||||||
"Тверь": 213,
|
|
||||||
"Алексин": 217,
|
|
||||||
"Богородицк": 221,
|
|
||||||
"Венев": 152,
|
|
||||||
"Донской": 82,
|
|
||||||
"Ефремов": 219,
|
|
||||||
"Кимовск": 101,
|
|
||||||
"Новомосковск": 94,
|
|
||||||
"Суворов": 190,
|
|
||||||
"Тула": 40,
|
|
||||||
"Узловая": 125,
|
|
||||||
"Щекино": 51,
|
|
||||||
"Переславль-Залесский": 209,
|
|
||||||
"Рыбинск": 198,
|
|
||||||
"Ярославль": 100
|
|
||||||
}
|
|
|
@ -1,16 +1,16 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import configparser
|
import configparser
|
||||||
import random
|
import random
|
||||||
|
import logging
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import backoff
|
import backoff
|
||||||
|
|
||||||
from crawler.utils.exceptions import ConfigError
|
from utils.exceptions import ConfigError
|
||||||
from crawler.types import City, Proxy
|
from crawler.types import City, Proxy
|
||||||
from crawler.utils.classes import Singleton
|
from utils.classes import Singleton
|
||||||
|
|
||||||
log = logger
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
class CrawlerAPI(metaclass=Singleton):
|
class CrawlerAPI(metaclass=Singleton):
|
||||||
api_baseurl = "https://q.asburo.ru/ch/"
|
api_baseurl = "https://q.asburo.ru/ch/"
|
||||||
|
@ -94,7 +94,7 @@ class CrawlerAPI(metaclass=Singleton):
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
||||||
async def send_products(self, results: list):
|
async def send_products(self, results: list):
|
||||||
log.info("Sending data...")
|
log.info("Sending data")
|
||||||
|
|
||||||
url = f"{self.api_url}/prices/{self.rival_tag}"
|
url = f"{self.api_url}/prices/{self.rival_tag}"
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ class CrawlerAPI(metaclass=Singleton):
|
||||||
response = await self.session.post(url, json=data, auth=self.auth)
|
response = await self.session.post(url, json=data, auth=self.auth)
|
||||||
status, response_text = response.status, await response.text()
|
status, response_text = response.status, await response.text()
|
||||||
|
|
||||||
log.debug(f"{data} was sended. Status: {status}. Response: {response_text}")
|
log.info(f"{data} was sended. Status: {status}. Response: {response_text}")
|
||||||
|
|
||||||
if status >= 500:
|
if status >= 500:
|
||||||
await asyncio.sleep(15)
|
await asyncio.sleep(15)
|
||||||
|
|
|
@ -1,13 +1,11 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from crawler.utils.classes import DataclassBase
|
from utils.classes import DataclassBase
|
||||||
|
|
||||||
@dataclass(init=False)
|
@dataclass(init=False)
|
||||||
class City(DataclassBase):
|
class City(DataclassBase):
|
||||||
id: int = 0
|
|
||||||
city: str
|
city: str
|
||||||
region_id: int | None
|
region_id: int | None
|
||||||
region_name: str | None
|
region_name: str | None
|
||||||
is_byapt: 1 | 0
|
|
||||||
|
|
||||||
@dataclass(init=False)
|
@dataclass(init=False)
|
||||||
class Proxy(DataclassBase):
|
class Proxy(DataclassBase):
|
||||||
|
|
43
main.py
43
main.py
|
@ -1,50 +1,17 @@
|
||||||
import sys
|
import sys
|
||||||
import asyncio
|
|
||||||
import configparser
|
|
||||||
import json
|
|
||||||
import multiprocessing
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from stolichki.parsers.city import CityParser
|
from stolichki.parsers.city import CityParser
|
||||||
from stolichki.types.city import City
|
from stolichki.types.city import City
|
||||||
from crawler.api import CrawlerAPI
|
|
||||||
from crawler.utils.classes import DataclassJSONEncoder
|
|
||||||
|
|
||||||
from crawler.types import City, Proxy
|
|
||||||
|
|
||||||
async def get_crawler_data():
|
|
||||||
crawler = CrawlerAPI()
|
|
||||||
cities = await crawler.get_cities()
|
|
||||||
proxies = await crawler.get_proxies()
|
|
||||||
return cities, proxies
|
|
||||||
|
|
||||||
def filter_cities(config, cities: list[City]) -> list[City]:
|
|
||||||
|
|
||||||
with open(config["parser"]["cities_path"]) as f:
|
|
||||||
cities_stolichki = json.load(f)
|
|
||||||
|
|
||||||
for city in cities:
|
|
||||||
city.id = cities_stolichki[city.city]
|
|
||||||
|
|
||||||
return cities
|
|
||||||
|
|
||||||
@logger.catch
|
@logger.catch
|
||||||
def main(cities: list[City], proxies: list[Proxy]):
|
def main():
|
||||||
config = configparser.ConfigParser()
|
city = City(111, "Бутово", 1, [])
|
||||||
config.read("config.ini")
|
result = CityParser(city).parse()
|
||||||
|
print(result)
|
||||||
cities = filter_cities(config, cities)
|
|
||||||
quantity = config['parser']['cities_quantity'] or len(cities)
|
|
||||||
|
|
||||||
with multiprocessing.Pool(processes=quantity) as pool: #type: ignore
|
|
||||||
results = pool.map(lambda city: CityParser(city, proxies).parse(), cities)
|
|
||||||
|
|
||||||
with open("results.json", "w") as f:
|
|
||||||
json.dump(results, f, cls=DataclassJSONEncoder, ensure_ascii=False, indent=4)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore
|
logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore
|
||||||
|
main()
|
||||||
cities, proxies = asyncio.run(get_crawler_data())
|
|
||||||
main(cities, proxies)
|
|
|
@ -19,8 +19,7 @@ from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
|
||||||
from twocaptcha import TwoCaptcha
|
from twocaptcha import TwoCaptcha
|
||||||
|
|
||||||
from stolichki.errors import CaptchaError, LoadingError
|
from stolichki.errors import CaptchaError, ConfigError, LoadingError
|
||||||
from crawler.utils.exceptions import ConfigError
|
|
||||||
|
|
||||||
class StolichkiDriver(uc.Chrome):
|
class StolichkiDriver(uc.Chrome):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
|
|
|
@ -3,5 +3,9 @@ class CaptchaError(Exception):
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
|
|
||||||
class LoadingError(Exception):
|
class LoadingError(Exception):
|
||||||
|
def __init__(self, *args: object) -> None:
|
||||||
|
super().__init__(*args)
|
||||||
|
|
||||||
|
class ConfigError(Exception):
|
||||||
def __init__(self, *args: object) -> None:
|
def __init__(self, *args: object) -> None:
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
|
@ -1,14 +1,12 @@
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
from crawler.types import Proxy
|
|
||||||
|
|
||||||
from stolichki.driver import StolichkiDriver
|
from stolichki.driver import StolichkiDriver
|
||||||
from stolichki.parsers.category import get_category_parser
|
from stolichki.parsers.category import get_category_parser
|
||||||
from stolichki.types.city import City
|
from stolichki.types.city import City
|
||||||
|
|
||||||
|
|
||||||
class CityParser:
|
class CityParser:
|
||||||
def __init__(self, city, proxies: list[Proxy]) -> None:
|
def __init__(self, city: City) -> None:
|
||||||
self.driver = StolichkiDriver()
|
self.driver = StolichkiDriver()
|
||||||
self.city = city
|
self.city = city
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue