Testing
This commit is contained in:
parent
b97e8fe739
commit
fe16d4eee9
|
@ -1,16 +1,16 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import configparser
|
import configparser
|
||||||
import random
|
import random
|
||||||
import logging
|
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import backoff
|
import backoff
|
||||||
|
|
||||||
from utils.exceptions import ConfigError
|
from crawler.utils.exceptions import ConfigError
|
||||||
from crawler.types import City, Proxy
|
from crawler.types import City, Proxy
|
||||||
from utils.classes import Singleton
|
from crawler.utils.classes import Singleton
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logger
|
||||||
|
|
||||||
class CrawlerAPI(metaclass=Singleton):
|
class CrawlerAPI(metaclass=Singleton):
|
||||||
api_baseurl = "https://q.asburo.ru/ch/"
|
api_baseurl = "https://q.asburo.ru/ch/"
|
||||||
|
@ -94,7 +94,7 @@ class CrawlerAPI(metaclass=Singleton):
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
||||||
async def send_products(self, results: list):
|
async def send_products(self, results: list):
|
||||||
log.info("Sending data")
|
log.info("Sending data...")
|
||||||
|
|
||||||
url = f"{self.api_url}/prices/{self.rival_tag}"
|
url = f"{self.api_url}/prices/{self.rival_tag}"
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ class CrawlerAPI(metaclass=Singleton):
|
||||||
response = await self.session.post(url, json=data, auth=self.auth)
|
response = await self.session.post(url, json=data, auth=self.auth)
|
||||||
status, response_text = response.status, await response.text()
|
status, response_text = response.status, await response.text()
|
||||||
|
|
||||||
log.info(f"{data} was sended. Status: {status}. Response: {response_text}")
|
log.debug(f"{data} was sended. Status: {status}. Response: {response_text}")
|
||||||
|
|
||||||
if status >= 500:
|
if status >= 500:
|
||||||
await asyncio.sleep(15)
|
await asyncio.sleep(15)
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from utils.classes import DataclassBase
|
from crawler.utils.classes import DataclassBase
|
||||||
|
|
||||||
@dataclass(init=False)
|
@dataclass(init=False)
|
||||||
class City(DataclassBase):
|
class City(DataclassBase):
|
||||||
|
id: int = 0
|
||||||
city: str
|
city: str
|
||||||
region_id: int | None
|
region_id: int | None
|
||||||
region_name: str | None
|
region_name: str | None
|
||||||
|
is_byapt: 1 | 0
|
||||||
|
|
||||||
@dataclass(init=False)
|
@dataclass(init=False)
|
||||||
class Proxy(DataclassBase):
|
class Proxy(DataclassBase):
|
||||||
|
|
43
main.py
43
main.py
|
@ -1,17 +1,50 @@
|
||||||
import sys
|
import sys
|
||||||
|
import asyncio
|
||||||
|
import configparser
|
||||||
|
import json
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from stolichki.parsers.city import CityParser
|
from stolichki.parsers.city import CityParser
|
||||||
from stolichki.types.city import City
|
from stolichki.types.city import City
|
||||||
|
from crawler.api import CrawlerAPI
|
||||||
|
from crawler.utils.classes import DataclassJSONEncoder
|
||||||
|
|
||||||
|
from crawler.types import City, Proxy
|
||||||
|
|
||||||
|
async def get_crawler_data():
|
||||||
|
crawler = CrawlerAPI()
|
||||||
|
cities = await crawler.get_cities()
|
||||||
|
proxies = await crawler.get_proxies()
|
||||||
|
return cities, proxies
|
||||||
|
|
||||||
|
def filter_cities(config, cities: list[City]) -> list[City]:
|
||||||
|
|
||||||
|
with open(config["parser"]["cities_path"]) as f:
|
||||||
|
cities_stolichki = json.load(f)
|
||||||
|
|
||||||
|
for city in cities:
|
||||||
|
city.id = cities_stolichki[city.city]
|
||||||
|
|
||||||
|
return cities
|
||||||
|
|
||||||
@logger.catch
|
@logger.catch
|
||||||
def main():
|
def main(cities: list[City], proxies: list[Proxy]):
|
||||||
city = City(111, "Бутово", 1, [])
|
config = configparser.ConfigParser()
|
||||||
result = CityParser(city).parse()
|
config.read("config.ini")
|
||||||
print(result)
|
|
||||||
|
cities = filter_cities(config, cities)
|
||||||
|
quantity = config['parser']['cities_quantity'] or len(cities)
|
||||||
|
|
||||||
|
with multiprocessing.Pool(processes=quantity) as pool: #type: ignore
|
||||||
|
results = pool.map(lambda city: CityParser(city, proxies).parse(), cities)
|
||||||
|
|
||||||
|
with open("results.json", "w") as f:
|
||||||
|
json.dump(results, f, cls=DataclassJSONEncoder, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore
|
logger.add(sys.stderr, level="DEBUG", backtrace=True, enqueue=True) #type: ignore
|
||||||
main()
|
|
||||||
|
cities, proxies = asyncio.run(get_crawler_data())
|
||||||
|
main(cities, proxies)
|
|
@ -19,7 +19,8 @@ from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
|
||||||
from twocaptcha import TwoCaptcha
|
from twocaptcha import TwoCaptcha
|
||||||
|
|
||||||
from stolichki.errors import CaptchaError, ConfigError, LoadingError
|
from stolichki.errors import CaptchaError, LoadingError
|
||||||
|
from crawler.utils.exceptions import ConfigError
|
||||||
|
|
||||||
class StolichkiDriver(uc.Chrome):
|
class StolichkiDriver(uc.Chrome):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
|
|
|
@ -3,9 +3,5 @@ class CaptchaError(Exception):
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
|
|
||||||
class LoadingError(Exception):
|
class LoadingError(Exception):
|
||||||
def __init__(self, *args: object) -> None:
|
|
||||||
super().__init__(*args)
|
|
||||||
|
|
||||||
class ConfigError(Exception):
|
|
||||||
def __init__(self, *args: object) -> None:
|
def __init__(self, *args: object) -> None:
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
|
@ -1,12 +1,14 @@
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
from crawler.types import Proxy
|
||||||
|
|
||||||
from stolichki.driver import StolichkiDriver
|
from stolichki.driver import StolichkiDriver
|
||||||
from stolichki.parsers.category import get_category_parser
|
from stolichki.parsers.category import get_category_parser
|
||||||
from stolichki.types.city import City
|
from stolichki.types.city import City
|
||||||
|
|
||||||
|
|
||||||
class CityParser:
|
class CityParser:
|
||||||
def __init__(self, city: City) -> None:
|
def __init__(self, city, proxies: list[Proxy]) -> None:
|
||||||
self.driver = StolichkiDriver()
|
self.driver = StolichkiDriver()
|
||||||
self.city = city
|
self.city = city
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue