Added crawler api library
This commit is contained in:
parent
71825351c8
commit
049e436d37
|
@ -0,0 +1,122 @@
|
|||
import asyncio
|
||||
import configparser
|
||||
import random
|
||||
import logging
|
||||
|
||||
import aiohttp
|
||||
import backoff
|
||||
|
||||
from utils.exceptions import ConfigError
|
||||
from crawler.types import City, Proxy
|
||||
from utils.classes import Singleton
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class CrawlerAPI(metaclass=Singleton):
|
||||
api_baseurl = "https://q.asburo.ru/ch/"
|
||||
api_version = "v1"
|
||||
|
||||
cities = []
|
||||
proxies = []
|
||||
|
||||
def __init__(self) -> None:
|
||||
log.info("Initializing crawler API class")
|
||||
|
||||
self.api_url = self.api_baseurl + self.api_version
|
||||
|
||||
config = configparser.ConfigParser()
|
||||
|
||||
try:
|
||||
config.read("config.ini")
|
||||
log.debug("Reading config file")
|
||||
|
||||
self.config = config["crawler"]
|
||||
|
||||
self.rival_tag = self.config["tag"]
|
||||
self.auth = aiohttp.BasicAuth(
|
||||
self.config["username"], self.config["password"]
|
||||
)
|
||||
|
||||
log.info("Successfully parsed config file.")
|
||||
log.debug(f"Your auth: {self.auth}")
|
||||
|
||||
except:
|
||||
raise ConfigError("Can't read settings for crawler api. Check your config.ini.")
|
||||
|
||||
timeout = aiohttp.ClientTimeout(12000)
|
||||
self.session = aiohttp.ClientSession(timeout=timeout)
|
||||
|
||||
log.info("Crawler API initialized")
|
||||
|
||||
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_time=60)
|
||||
async def get_cities(self) -> list[City]:
|
||||
if len(self.cities) <= 0:
|
||||
url = f"{self.api_url}/cities/{self.rival_tag}"
|
||||
|
||||
response = await self.session.get(url, auth=self.auth)
|
||||
if response.status >= 500:
|
||||
raise aiohttp.ServerConnectionError()
|
||||
|
||||
log.debug(f"Response status: {response.status} for {url}")
|
||||
|
||||
if response.status == 200:
|
||||
json_response = await response.json()
|
||||
|
||||
self.cities = [City(**city) for city in json_response.get("city_list")]
|
||||
|
||||
return self.cities
|
||||
|
||||
@backoff.on_exception(backoff.expo, aiohttp.ClientError, max_time=60)
|
||||
async def get_proxies(self) -> list[Proxy]:
|
||||
if len(self.proxies) <= 0:
|
||||
url = f"{self.api_url}/proxies/"
|
||||
|
||||
response = await self.session.get(url, auth=self.auth)
|
||||
log.debug(f"Response status: {response.status} for {url}")
|
||||
|
||||
if response.status == 200:
|
||||
json_response = await response.json()
|
||||
|
||||
self.proxies = [Proxy(**proxy) for proxy in json_response.get("proxy_list")]
|
||||
|
||||
return self.proxies
|
||||
|
||||
async def get_random_proxy(self) -> Proxy:
|
||||
proxies = await self.get_proxies()
|
||||
|
||||
return proxies[random.randint(0, len(proxies) - 1)]
|
||||
|
||||
async def remove_proxy(self, proxy: Proxy) -> Proxy:
|
||||
self.proxies.remove(proxy)
|
||||
|
||||
return await self.get_random_proxy()
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, (aiohttp.ClientError, aiohttp.ServerConnectionError), max_tries=15, logger=log)
|
||||
async def send_products(self, results: list):
|
||||
log.info("Sending data")
|
||||
|
||||
url = f"{self.api_url}/prices/{self.rival_tag}"
|
||||
|
||||
data = {
|
||||
"rows": results
|
||||
}
|
||||
|
||||
response = await self.session.post(url, json=data, auth=self.auth)
|
||||
status, response_text = response.status, await response.text()
|
||||
|
||||
log.info(f"{data} was sended. Status: {status}. Response: {response_text}")
|
||||
|
||||
if status >= 500:
|
||||
await asyncio.sleep(15)
|
||||
raise aiohttp.ServerConnectionError(response_text)
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *args, **kwargs):
|
||||
await self.close()
|
||||
|
||||
async def close(self):
|
||||
if not self.session.closed:
|
||||
await self.session.close()
|
|
@ -0,0 +1,15 @@
|
|||
from dataclasses import dataclass
|
||||
from utils.classes import DataclassBase
|
||||
|
||||
@dataclass(init=False)
|
||||
class City(DataclassBase):
|
||||
city: str
|
||||
region_id: int | None
|
||||
region_name: str | None
|
||||
|
||||
@dataclass(init=False)
|
||||
class Proxy(DataclassBase):
|
||||
ip: str
|
||||
port: str
|
||||
login: str
|
||||
password: str
|
|
@ -0,0 +1,4 @@
|
|||
def chunks(lst: list, n: int):
|
||||
"""Yield successive n-sized chunks from lst."""
|
||||
for i in range(0, len(lst), n):
|
||||
yield lst[i:i + n]
|
|
@ -0,0 +1,11 @@
|
|||
import asyncio
|
||||
|
||||
|
||||
async def gather_with_concurrency(n, *coros):
|
||||
semaphore = asyncio.Semaphore(int(n))
|
||||
|
||||
async def sem_coro(coro):
|
||||
async with semaphore:
|
||||
return await coro
|
||||
|
||||
return await asyncio.gather(*(sem_coro(c) for c in coros))
|
|
@ -0,0 +1,28 @@
|
|||
import dataclasses
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class DataclassBase:
|
||||
def __init__(self, **kwargs):
|
||||
names = set([f.name for f in dataclasses.fields(self)])
|
||||
for k, v in kwargs.items():
|
||||
if k in names:
|
||||
setattr(self, k, v)
|
||||
|
||||
|
||||
class Singleton(type):
|
||||
_instances = {}
|
||||
|
||||
def __call__(cls, *args, **kwargs):
|
||||
if cls not in cls._instances:
|
||||
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
|
||||
return cls._instances[cls]
|
||||
|
||||
|
||||
class DataclassJSONEncoder(json.JSONEncoder):
|
||||
def default(self, o):
|
||||
if dataclasses.is_dataclass(o):
|
||||
return dataclasses.asdict(o)
|
||||
return super().default(o)
|
|
@ -0,0 +1,3 @@
|
|||
class ConfigError(Exception):
|
||||
def __init__(self, *args: object) -> None:
|
||||
super().__init__(*args)
|
|
@ -0,0 +1,19 @@
|
|||
#!/bin/python3
|
||||
|
||||
import sys
|
||||
|
||||
# Мне надоело вручную форматить json ответы, поэтому написал
|
||||
# Эту утилиту. Берёт всего один аргумент: путь до json файла
|
||||
|
||||
file_path = sys.argv[1:][0]
|
||||
|
||||
if file_path:
|
||||
import json
|
||||
|
||||
f = open(file_path)
|
||||
data = json.load(f)
|
||||
|
||||
f = open(file_path, "w")
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
f.close()
|
Loading…
Reference in New Issue