import sys import asyncio import configparser import json import multiprocessing from loguru import logger from stolichki.parsers.city import CityParser from stolichki.types.city import City from crawler.api import CrawlerAPI from crawler.utils.classes import DataclassJSONEncoder from crawler.types import City, Proxy async def get_crawler_data(): crawler = CrawlerAPI() cities = await crawler.get_cities() proxies = await crawler.get_proxies() return cities, proxies def filter_cities(config, cities: list[City]) -> list[City]: with open(config["parser"]["cities_path"]) as f: cities_stolichki = json.load(f) for city in cities: city.id = cities_stolichki[city.city] return cities @logger.catch def main(cities: list[City], proxies: list[Proxy]): config = configparser.ConfigParser() config.read("config.ini") cities = filter_cities(config, cities) quantity = config['parser']['cities_quantity'] or len(cities) with multiprocessing.Pool(processes=quantity) as pool: #type: ignore results = pool.map(lambda city: CityParser(city, proxies).parse(), cities) with open("results.json", "w") as f: json.dump(results, f, cls=DataclassJSONEncoder, ensure_ascii=False, indent=4) if __name__ == "__main__": logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore cities, proxies = asyncio.run(get_crawler_data()) main(cities, proxies)