diff --git a/crawler/types.py b/crawler/types.py index f6d745d..affd148 100644 --- a/crawler/types.py +++ b/crawler/types.py @@ -1,4 +1,6 @@ from dataclasses import dataclass +from typing import Literal + from crawler.utils.classes import DataclassBase @dataclass(init=False) @@ -7,7 +9,7 @@ class City(DataclassBase): city: str region_id: int | None region_name: str | None - is_byapt: 1 | 0 + is_byapt: Literal[1, 0] @dataclass(init=False) class Proxy(DataclassBase): diff --git a/main.py b/main.py index c941467..8277b1c 100644 --- a/main.py +++ b/main.py @@ -17,6 +17,7 @@ async def get_crawler_data(): crawler = CrawlerAPI() cities = await crawler.get_cities() proxies = await crawler.get_proxies() + await crawler.close() return cities, proxies def filter_cities(config, cities: list[City]) -> list[City]: @@ -29,16 +30,19 @@ def filter_cities(config, cities: list[City]) -> list[City]: return cities +def parse_city(city: City): + CityParser(city, proxies).parse() + @logger.catch -def main(cities: list[City], proxies: list[Proxy]): +def main(cities: list[City]): config = configparser.ConfigParser() config.read("config.ini") cities = filter_cities(config, cities) quantity = config['parser']['cities_quantity'] or len(cities) - with multiprocessing.Pool(processes=quantity) as pool: #type: ignore - results = pool.map(lambda city: CityParser(city, proxies).parse(), cities) + with multiprocessing.Pool(processes=int(quantity)) as pool: #type: ignore + results = pool.map(parse_city, cities) with open("results.json", "w") as f: json.dump(results, f, cls=DataclassJSONEncoder, ensure_ascii=False, indent=4) @@ -47,4 +51,4 @@ if __name__ == "__main__": logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore cities, proxies = asyncio.run(get_crawler_data()) - main(cities, proxies) \ No newline at end of file + main(cities) \ No newline at end of file