Testing
This commit is contained in:
parent
b97e8fe739
commit
fe16d4eee9
|
@ -1,16 +1,16 @@
|
|||
import asyncio
|
||||
import configparser
|
||||
import random
|
||||
import logging
|
||||
|
||||
from loguru import logger
|
||||
import aiohttp
|
||||
import backoff
|
||||
|
||||
from utils.exceptions import ConfigError
|
||||
from crawler.utils.exceptions import ConfigError
|
||||
from crawler.types import City, Proxy
|
||||
from utils.classes import Singleton
|
||||
from crawler.utils.classes import Singleton
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log = logger
|
||||
|
||||
class CrawlerAPI(metaclass=Singleton):
|
||||
api_baseurl = "https://q.asburo.ru/ch/"
|
||||
|
@ -94,7 +94,7 @@ class CrawlerAPI(metaclass=Singleton):
|
|||
|
||||
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
||||
async def send_products(self, results: list):
|
||||
log.info("Sending data")
|
||||
log.info("Sending data...")
|
||||
|
||||
url = f"{self.api_url}/prices/{self.rival_tag}"
|
||||
|
||||
|
@ -105,7 +105,7 @@ class CrawlerAPI(metaclass=Singleton):
|
|||
response = await self.session.post(url, json=data, auth=self.auth)
|
||||
status, response_text = response.status, await response.text()
|
||||
|
||||
log.info(f"{data} was sended. Status: {status}. Response: {response_text}")
|
||||
log.debug(f"{data} was sended. Status: {status}. Response: {response_text}")
|
||||
|
||||
if status >= 500:
|
||||
await asyncio.sleep(15)
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
from dataclasses import dataclass
|
||||
from utils.classes import DataclassBase
|
||||
from crawler.utils.classes import DataclassBase
|
||||
|
||||
@dataclass(init=False)
|
||||
class City(DataclassBase):
|
||||
id: int = 0
|
||||
city: str
|
||||
region_id: int | None
|
||||
region_name: str | None
|
||||
is_byapt: 1 | 0
|
||||
|
||||
@dataclass(init=False)
|
||||
class Proxy(DataclassBase):
|
||||
|
|
43
main.py
43
main.py
|
@ -1,17 +1,50 @@
|
|||
import sys
|
||||
import asyncio
|
||||
import configparser
|
||||
import json
|
||||
import multiprocessing
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from stolichki.parsers.city import CityParser
|
||||
from stolichki.types.city import City
|
||||
from crawler.api import CrawlerAPI
|
||||
from crawler.utils.classes import DataclassJSONEncoder
|
||||
|
||||
from crawler.types import City, Proxy
|
||||
|
||||
async def get_crawler_data():
|
||||
crawler = CrawlerAPI()
|
||||
cities = await crawler.get_cities()
|
||||
proxies = await crawler.get_proxies()
|
||||
return cities, proxies
|
||||
|
||||
def filter_cities(config, cities: list[City]) -> list[City]:
|
||||
|
||||
with open(config["parser"]["cities_path"]) as f:
|
||||
cities_stolichki = json.load(f)
|
||||
|
||||
for city in cities:
|
||||
city.id = cities_stolichki[city.city]
|
||||
|
||||
return cities
|
||||
|
||||
@logger.catch
|
||||
def main():
|
||||
city = City(111, "Бутово", 1, [])
|
||||
result = CityParser(city).parse()
|
||||
print(result)
|
||||
def main(cities: list[City], proxies: list[Proxy]):
|
||||
config = configparser.ConfigParser()
|
||||
config.read("config.ini")
|
||||
|
||||
cities = filter_cities(config, cities)
|
||||
quantity = config['parser']['cities_quantity'] or len(cities)
|
||||
|
||||
with multiprocessing.Pool(processes=quantity) as pool: #type: ignore
|
||||
results = pool.map(lambda city: CityParser(city, proxies).parse(), cities)
|
||||
|
||||
with open("results.json", "w") as f:
|
||||
json.dump(results, f, cls=DataclassJSONEncoder, ensure_ascii=False, indent=4)
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore
|
||||
main()
|
||||
|
||||
cities, proxies = asyncio.run(get_crawler_data())
|
||||
main(cities, proxies)
|
|
@ -19,7 +19,8 @@ from webdriver_manager.chrome import ChromeDriverManager
|
|||
|
||||
from twocaptcha import TwoCaptcha
|
||||
|
||||
from stolichki.errors import CaptchaError, ConfigError, LoadingError
|
||||
from stolichki.errors import CaptchaError, LoadingError
|
||||
from crawler.utils.exceptions import ConfigError
|
||||
|
||||
class StolichkiDriver(uc.Chrome):
|
||||
def __init__(self, **kwargs):
|
||||
|
|
|
@ -3,9 +3,5 @@ class CaptchaError(Exception):
|
|||
super().__init__(*args)
|
||||
|
||||
class LoadingError(Exception):
|
||||
def __init__(self, *args: object) -> None:
|
||||
super().__init__(*args)
|
||||
|
||||
class ConfigError(Exception):
|
||||
def __init__(self, *args: object) -> None:
|
||||
super().__init__(*args)
|
|
@ -1,12 +1,14 @@
|
|||
from selenium.webdriver.common.by import By
|
||||
|
||||
from crawler.types import Proxy
|
||||
|
||||
from stolichki.driver import StolichkiDriver
|
||||
from stolichki.parsers.category import get_category_parser
|
||||
from stolichki.types.city import City
|
||||
|
||||
|
||||
class CityParser:
|
||||
def __init__(self, city: City) -> None:
|
||||
def __init__(self, city, proxies: list[Proxy]) -> None:
|
||||
self.driver = StolichkiDriver()
|
||||
self.city = city
|
||||
|
||||
|
|
Loading…
Reference in New Issue