1
0
Fork 0
mirror of https://github.com/Findus23/HNReader.git synced 2024-09-19 15:23:44 +02:00

ratelimiting and async redis

This commit is contained in:
Lukas Winkler 2021-04-17 23:05:32 +02:00
parent 740a5b7484
commit 6a78aec5f1
Signed by: lukas
GPG key ID: 54DE4D798D244853
6 changed files with 81 additions and 34 deletions

View file

@ -2,19 +2,19 @@ import asyncio
import json
from aiohttp import ClientSession
from redis import Redis
from aredis import StrictRedis
API_BASEURL = "https://hacker-news.firebaseio.com/v0/"
class HNClient:
def __init__(self, session: ClientSession, redis: Redis):
def __init__(self, session: ClientSession, redis: StrictRedis):
self.s = session
self.r = redis
async def get_item(self, item_id: int, remove_kids=True):
key = f"hnclient_item_{item_id}"
cache = self.r.get(key)
cache = await self.r.get(key)
if cache:
return json.loads(cache)
url = f"{API_BASEURL}item/{item_id}.json"
@ -22,7 +22,7 @@ class HNClient:
response.raise_for_status()
text = await response.text()
item = json.loads(text)
self.r.set(key, text, ex=60 * 15)
await self.r.set(key, text, ex=60 * 15)
if "kids" in item and remove_kids:
del item["kids"]
return item
@ -42,7 +42,7 @@ class HNClient:
async def get_stories(self, page: str, offset=0):
limit = 25
key = f"hnclient_stories_{page}_{offset}"
cached = self.r.get(key)
cached = await self.r.get(key)
if cached:
return json.loads(cached)
url = f"{API_BASEURL}{page}.json"
@ -56,5 +56,5 @@ class HNClient:
tasks.append(task)
full_stories = await asyncio.gather(*tasks)
self.r.set(key, json.dumps(full_stories), ex=60 * 15)
await self.r.set(key, json.dumps(full_stories), ex=60 * 15)
return full_stories

56
poetry.lock generated
View file

@ -17,6 +17,30 @@ yarl = ">=1.0,<2.0"
[package.extras]
speedups = ["aiodns", "brotlipy", "cchardet"]
[[package]]
name = "aredis"
version = "1.1.8"
description = "Python async client for Redis key-value store"
category = "main"
optional = false
python-versions = "*"
[[package]]
name = "asgi-ratelimit"
version = "0.4.0"
description = ""
category = "main"
optional = false
python-versions = ">=3.6,<4.0"
[package.dependencies]
aredis = {version = ">=1.1.8,<2.0.0", optional = true, markers = "extra == \"redis\" or extra == \"full\""}
[package.extras]
redis = ["aredis (>=1.1.8,<2.0.0)"]
full = ["aredis (>=1.1.8,<2.0.0)", "pyjwt (>=1.7.1,<2.0.0)"]
jwt = ["pyjwt (>=1.7.1,<2.0.0)"]
[[package]]
name = "async-timeout"
version = "3.0.1"
@ -98,11 +122,11 @@ test = ["Cython (==0.29.14)"]
[[package]]
name = "idna"
version = "2.10"
version = "3.1"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
python-versions = ">=3.4"
[[package]]
name = "multidict"
@ -112,17 +136,6 @@ category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "redis"
version = "3.5.3"
description = "Python client for Redis key-value store"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[package.extras]
hiredis = ["hiredis (>=0.1.3)"]
[[package]]
name = "setproctitle"
version = "1.2.2"
@ -196,7 +209,7 @@ multidict = ">=4.0"
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "f7f569602609d03bb5ecb33a2d19f9aff8695ba448c6825e0c6b35b0d509b5fc"
content-hash = "73769bcc1c65e6a322d764a52621402d30fa35dc25c94a2b27a4cf64980a5e9c"
[metadata.files]
aiohttp = [
@ -238,6 +251,13 @@ aiohttp = [
{file = "aiohttp-3.7.4.post0-cp39-cp39-win_amd64.whl", hash = "sha256:02f46fc0e3c5ac58b80d4d56eb0a7c7d97fcef69ace9326289fb9f1955e65cfe"},
{file = "aiohttp-3.7.4.post0.tar.gz", hash = "sha256:493d3299ebe5f5a7c66b9819eacdcfbbaaf1a8e84911ddffcdc48888497afecf"},
]
aredis = [
{file = "aredis-1.1.8.tar.gz", hash = "sha256:880bcf91c4f89b919311cc93626bbc70901c6e5c4fdb3dcba643411e3ee40bcf"},
]
asgi-ratelimit = [
{file = "asgi-ratelimit-0.4.0.tar.gz", hash = "sha256:fa0cd403cc95eb1d9d4580f89a64f94287d42c1c057e417d9a0de523cdafb367"},
{file = "asgi_ratelimit-0.4.0-py3-none-any.whl", hash = "sha256:6b336d2598096911e47dc49104974384d12dbe5da1a57ff93c3901c263e89845"},
]
async-timeout = [
{file = "async-timeout-3.0.1.tar.gz", hash = "sha256:0c3c816a028d47f659d6ff5c745cb2acf1f966da1fe5c19c77a70282b25f4c5f"},
{file = "async_timeout-3.0.1-py3-none-any.whl", hash = "sha256:4291ca197d287d274d0b6cb5d6f8f8f82d434ed288f962539ff18cc9012f9ea3"},
@ -319,8 +339,8 @@ httptools = [
{file = "httptools-0.1.1.tar.gz", hash = "sha256:41b573cf33f64a8f8f3400d0a7faf48e1888582b6f6e02b82b9bd4f0bf7497ce"},
]
idna = [
{file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"},
{file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"},
{file = "idna-3.1-py3-none-any.whl", hash = "sha256:5205d03e7bcbb919cc9c19885f9920d622ca52448306f2377daede5cf3faac16"},
{file = "idna-3.1.tar.gz", hash = "sha256:c5b02147e01ea9920e6b0a3f1f7bb833612d507592c837a6c49552768f4054e1"},
]
multidict = [
{file = "multidict-5.1.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:b7993704f1a4b204e71debe6095150d43b2ee6150fa4f44d6d966ec356a8d61f"},
@ -361,10 +381,6 @@ multidict = [
{file = "multidict-5.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:7df80d07818b385f3129180369079bd6934cf70469f99daaebfac89dca288359"},
{file = "multidict-5.1.0.tar.gz", hash = "sha256:25b4e5f22d3a37ddf3effc0710ba692cfc792c2b9edfb9c05aefe823256e84d5"},
]
redis = [
{file = "redis-3.5.3-py2.py3-none-any.whl", hash = "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24"},
{file = "redis-3.5.3.tar.gz", hash = "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2"},
]
setproctitle = [
{file = "setproctitle-1.2.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:9106bcbacae534b6f82955b176723f1b2ca6514518aab44dffec05a583f8dca8"},
{file = "setproctitle-1.2.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:30bc7a769a4451639a0adcbc97bdf7a6e9ac0ef3ddad8d63eb1e338edb3ebeda"},

View file

@ -6,7 +6,7 @@ authors = ["Lukas Winkler <git@lw1.at>"]
[tool.poetry.dependencies]
python = "^3.9"
redis = "^3.5.3"
aredis = {extras = ["hiredis"], version = "^1.1.8"}
hiredis = "^2.0.0"
aiohttp = "^3.7.4"
starlette = "^0.14.2"
@ -15,6 +15,7 @@ gunicorn = "^20.1.0"
uvloop = "^0.15.2"
httptools = "^0.1.1"
setproctitle = "^1.2.2"
asgi-ratelimit = {extras = ["redis"], version = "^0.4.0"}
[tool.poetry.dev-dependencies]

View file

@ -4,10 +4,20 @@ require_once "vendor/autoload.php";
use Graby\Graby;
$defaultUseragent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
$customUseragent = "HNClient (in development)";
$userAgent = $defaultUseragent . " " . $customUseragent;
$url = file_get_contents("php://stdin");
$graby = new Graby();
$graby = new Graby([
"http_client" => [
'ua_browser' => $userAgent,
]
]);
$result = $graby->fetchContent($url);
echo json_encode($result);

View file

@ -1,22 +1,26 @@
import aiohttp
from redis import Redis
from aredis import StrictRedis
from ratelimit import RateLimitMiddleware, Rule
from ratelimit.auths.session import from_session
from starlette.applications import Starlette
from starlette.middleware import Middleware
from starlette.middleware.sessions import SessionMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse, Response
from starlette.routing import Route
from config import debug, user_agent, redis_socket
from hnapi import HNClient
from reader import Reader
from redis_backend import CustomRedisBackend
conn = aiohttp.TCPConnector(ttl_dns_cache=60 * 10)
session = aiohttp.ClientSession(connector=conn, headers={
"User-Agent": user_agent
})
if redis_socket:
r = Redis(unix_socket_path=redis_socket)
r = StrictRedis(unix_socket_path=redis_socket)
else:
r = Redis()
r = StrictRedis()
reader = Reader()
api = HNClient(session, r)
@ -29,18 +33,20 @@ async def item(request: Request):
async def read(request: Request):
print(request.session)
request.session.update({"a": "b"})
item_id = request.path_params["item_id"]
item = await api.get_item(item_id)
if "url" not in item:
return "Url not found", 404
key = f"hnclient_read_{item_id}"
cache = r.get(key)
cache = await r.get(key)
if cache:
response = Response(cache)
else:
output = await reader.readable_html(item["url"])
r.set(key, output, ex=60 * 60 * 24)
await r.set(key, output, ex=60 * 60 * 24)
response = Response(output)
response.media_type = "application/json"
return response
@ -57,3 +63,13 @@ app = Starlette(debug=debug, routes=[
Route('/api/read/{item_id:int}', read),
Route('/api/topstories', topstories),
])
if not debug:
app.add_middleware(
RateLimitMiddleware,
authenticate=from_session,
backend=CustomRedisBackend(r),
config={
r"^/api/": [Rule(minute=4)],
},
)

View file

@ -111,6 +111,10 @@ pre {
.comment {
border-top: .5px solid rgba(0, 0, 0, .09);
a {
color: black;
}
.text {
padding: 10px 15px;
}