mirror of
https://github.com/Findus23/HNReader.git
synced 2024-09-19 15:23:44 +02:00
ratelimiting and async redis
This commit is contained in:
parent
740a5b7484
commit
6a78aec5f1
6 changed files with 81 additions and 34 deletions
|
@ -2,19 +2,19 @@ import asyncio
|
|||
import json
|
||||
|
||||
from aiohttp import ClientSession
|
||||
from redis import Redis
|
||||
from aredis import StrictRedis
|
||||
|
||||
API_BASEURL = "https://hacker-news.firebaseio.com/v0/"
|
||||
|
||||
|
||||
class HNClient:
|
||||
def __init__(self, session: ClientSession, redis: Redis):
|
||||
def __init__(self, session: ClientSession, redis: StrictRedis):
|
||||
self.s = session
|
||||
self.r = redis
|
||||
|
||||
async def get_item(self, item_id: int, remove_kids=True):
|
||||
key = f"hnclient_item_{item_id}"
|
||||
cache = self.r.get(key)
|
||||
cache = await self.r.get(key)
|
||||
if cache:
|
||||
return json.loads(cache)
|
||||
url = f"{API_BASEURL}item/{item_id}.json"
|
||||
|
@ -22,7 +22,7 @@ class HNClient:
|
|||
response.raise_for_status()
|
||||
text = await response.text()
|
||||
item = json.loads(text)
|
||||
self.r.set(key, text, ex=60 * 15)
|
||||
await self.r.set(key, text, ex=60 * 15)
|
||||
if "kids" in item and remove_kids:
|
||||
del item["kids"]
|
||||
return item
|
||||
|
@ -42,7 +42,7 @@ class HNClient:
|
|||
async def get_stories(self, page: str, offset=0):
|
||||
limit = 25
|
||||
key = f"hnclient_stories_{page}_{offset}"
|
||||
cached = self.r.get(key)
|
||||
cached = await self.r.get(key)
|
||||
if cached:
|
||||
return json.loads(cached)
|
||||
url = f"{API_BASEURL}{page}.json"
|
||||
|
@ -56,5 +56,5 @@ class HNClient:
|
|||
tasks.append(task)
|
||||
|
||||
full_stories = await asyncio.gather(*tasks)
|
||||
self.r.set(key, json.dumps(full_stories), ex=60 * 15)
|
||||
await self.r.set(key, json.dumps(full_stories), ex=60 * 15)
|
||||
return full_stories
|
||||
|
|
56
poetry.lock
generated
56
poetry.lock
generated
|
@ -17,6 +17,30 @@ yarl = ">=1.0,<2.0"
|
|||
[package.extras]
|
||||
speedups = ["aiodns", "brotlipy", "cchardet"]
|
||||
|
||||
[[package]]
|
||||
name = "aredis"
|
||||
version = "1.1.8"
|
||||
description = "Python async client for Redis key-value store"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "asgi-ratelimit"
|
||||
version = "0.4.0"
|
||||
description = ""
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6,<4.0"
|
||||
|
||||
[package.dependencies]
|
||||
aredis = {version = ">=1.1.8,<2.0.0", optional = true, markers = "extra == \"redis\" or extra == \"full\""}
|
||||
|
||||
[package.extras]
|
||||
redis = ["aredis (>=1.1.8,<2.0.0)"]
|
||||
full = ["aredis (>=1.1.8,<2.0.0)", "pyjwt (>=1.7.1,<2.0.0)"]
|
||||
jwt = ["pyjwt (>=1.7.1,<2.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "async-timeout"
|
||||
version = "3.0.1"
|
||||
|
@ -98,11 +122,11 @@ test = ["Cython (==0.29.14)"]
|
|||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "2.10"
|
||||
version = "3.1"
|
||||
description = "Internationalized Domain Names in Applications (IDNA)"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
python-versions = ">=3.4"
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
|
@ -112,17 +136,6 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "redis"
|
||||
version = "3.5.3"
|
||||
description = "Python client for Redis key-value store"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[package.extras]
|
||||
hiredis = ["hiredis (>=0.1.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "setproctitle"
|
||||
version = "1.2.2"
|
||||
|
@ -196,7 +209,7 @@ multidict = ">=4.0"
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "f7f569602609d03bb5ecb33a2d19f9aff8695ba448c6825e0c6b35b0d509b5fc"
|
||||
content-hash = "73769bcc1c65e6a322d764a52621402d30fa35dc25c94a2b27a4cf64980a5e9c"
|
||||
|
||||
[metadata.files]
|
||||
aiohttp = [
|
||||
|
@ -238,6 +251,13 @@ aiohttp = [
|
|||
{file = "aiohttp-3.7.4.post0-cp39-cp39-win_amd64.whl", hash = "sha256:02f46fc0e3c5ac58b80d4d56eb0a7c7d97fcef69ace9326289fb9f1955e65cfe"},
|
||||
{file = "aiohttp-3.7.4.post0.tar.gz", hash = "sha256:493d3299ebe5f5a7c66b9819eacdcfbbaaf1a8e84911ddffcdc48888497afecf"},
|
||||
]
|
||||
aredis = [
|
||||
{file = "aredis-1.1.8.tar.gz", hash = "sha256:880bcf91c4f89b919311cc93626bbc70901c6e5c4fdb3dcba643411e3ee40bcf"},
|
||||
]
|
||||
asgi-ratelimit = [
|
||||
{file = "asgi-ratelimit-0.4.0.tar.gz", hash = "sha256:fa0cd403cc95eb1d9d4580f89a64f94287d42c1c057e417d9a0de523cdafb367"},
|
||||
{file = "asgi_ratelimit-0.4.0-py3-none-any.whl", hash = "sha256:6b336d2598096911e47dc49104974384d12dbe5da1a57ff93c3901c263e89845"},
|
||||
]
|
||||
async-timeout = [
|
||||
{file = "async-timeout-3.0.1.tar.gz", hash = "sha256:0c3c816a028d47f659d6ff5c745cb2acf1f966da1fe5c19c77a70282b25f4c5f"},
|
||||
{file = "async_timeout-3.0.1-py3-none-any.whl", hash = "sha256:4291ca197d287d274d0b6cb5d6f8f8f82d434ed288f962539ff18cc9012f9ea3"},
|
||||
|
@ -319,8 +339,8 @@ httptools = [
|
|||
{file = "httptools-0.1.1.tar.gz", hash = "sha256:41b573cf33f64a8f8f3400d0a7faf48e1888582b6f6e02b82b9bd4f0bf7497ce"},
|
||||
]
|
||||
idna = [
|
||||
{file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"},
|
||||
{file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"},
|
||||
{file = "idna-3.1-py3-none-any.whl", hash = "sha256:5205d03e7bcbb919cc9c19885f9920d622ca52448306f2377daede5cf3faac16"},
|
||||
{file = "idna-3.1.tar.gz", hash = "sha256:c5b02147e01ea9920e6b0a3f1f7bb833612d507592c837a6c49552768f4054e1"},
|
||||
]
|
||||
multidict = [
|
||||
{file = "multidict-5.1.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:b7993704f1a4b204e71debe6095150d43b2ee6150fa4f44d6d966ec356a8d61f"},
|
||||
|
@ -361,10 +381,6 @@ multidict = [
|
|||
{file = "multidict-5.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:7df80d07818b385f3129180369079bd6934cf70469f99daaebfac89dca288359"},
|
||||
{file = "multidict-5.1.0.tar.gz", hash = "sha256:25b4e5f22d3a37ddf3effc0710ba692cfc792c2b9edfb9c05aefe823256e84d5"},
|
||||
]
|
||||
redis = [
|
||||
{file = "redis-3.5.3-py2.py3-none-any.whl", hash = "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24"},
|
||||
{file = "redis-3.5.3.tar.gz", hash = "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2"},
|
||||
]
|
||||
setproctitle = [
|
||||
{file = "setproctitle-1.2.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:9106bcbacae534b6f82955b176723f1b2ca6514518aab44dffec05a583f8dca8"},
|
||||
{file = "setproctitle-1.2.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:30bc7a769a4451639a0adcbc97bdf7a6e9ac0ef3ddad8d63eb1e338edb3ebeda"},
|
||||
|
|
|
@ -6,7 +6,7 @@ authors = ["Lukas Winkler <git@lw1.at>"]
|
|||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
redis = "^3.5.3"
|
||||
aredis = {extras = ["hiredis"], version = "^1.1.8"}
|
||||
hiredis = "^2.0.0"
|
||||
aiohttp = "^3.7.4"
|
||||
starlette = "^0.14.2"
|
||||
|
@ -15,6 +15,7 @@ gunicorn = "^20.1.0"
|
|||
uvloop = "^0.15.2"
|
||||
httptools = "^0.1.1"
|
||||
setproctitle = "^1.2.2"
|
||||
asgi-ratelimit = {extras = ["redis"], version = "^0.4.0"}
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
|
|
|
@ -4,10 +4,20 @@ require_once "vendor/autoload.php";
|
|||
|
||||
use Graby\Graby;
|
||||
|
||||
$defaultUseragent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
|
||||
|
||||
$customUseragent = "HNClient (in development)";
|
||||
|
||||
$userAgent = $defaultUseragent . " " . $customUseragent;
|
||||
|
||||
$url = file_get_contents("php://stdin");
|
||||
|
||||
|
||||
$graby = new Graby();
|
||||
$graby = new Graby([
|
||||
"http_client" => [
|
||||
'ua_browser' => $userAgent,
|
||||
]
|
||||
]);
|
||||
|
||||
$result = $graby->fetchContent($url);
|
||||
echo json_encode($result);
|
||||
|
|
28
server.py
28
server.py
|
@ -1,22 +1,26 @@
|
|||
import aiohttp
|
||||
from redis import Redis
|
||||
from aredis import StrictRedis
|
||||
from ratelimit import RateLimitMiddleware, Rule
|
||||
from ratelimit.auths.session import from_session
|
||||
from starlette.applications import Starlette
|
||||
from starlette.middleware import Middleware
|
||||
from starlette.middleware.sessions import SessionMiddleware
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import JSONResponse, Response
|
||||
from starlette.routing import Route
|
||||
|
||||
from config import debug, user_agent, redis_socket
|
||||
from hnapi import HNClient
|
||||
from reader import Reader
|
||||
from redis_backend import CustomRedisBackend
|
||||
|
||||
conn = aiohttp.TCPConnector(ttl_dns_cache=60 * 10)
|
||||
session = aiohttp.ClientSession(connector=conn, headers={
|
||||
"User-Agent": user_agent
|
||||
})
|
||||
if redis_socket:
|
||||
r = Redis(unix_socket_path=redis_socket)
|
||||
r = StrictRedis(unix_socket_path=redis_socket)
|
||||
else:
|
||||
r = Redis()
|
||||
r = StrictRedis()
|
||||
reader = Reader()
|
||||
|
||||
api = HNClient(session, r)
|
||||
|
@ -29,18 +33,20 @@ async def item(request: Request):
|
|||
|
||||
|
||||
async def read(request: Request):
|
||||
print(request.session)
|
||||
request.session.update({"a": "b"})
|
||||
item_id = request.path_params["item_id"]
|
||||
item = await api.get_item(item_id)
|
||||
if "url" not in item:
|
||||
return "Url not found", 404
|
||||
key = f"hnclient_read_{item_id}"
|
||||
|
||||
cache = r.get(key)
|
||||
cache = await r.get(key)
|
||||
if cache:
|
||||
response = Response(cache)
|
||||
else:
|
||||
output = await reader.readable_html(item["url"])
|
||||
r.set(key, output, ex=60 * 60 * 24)
|
||||
await r.set(key, output, ex=60 * 60 * 24)
|
||||
response = Response(output)
|
||||
response.media_type = "application/json"
|
||||
return response
|
||||
|
@ -57,3 +63,13 @@ app = Starlette(debug=debug, routes=[
|
|||
Route('/api/read/{item_id:int}', read),
|
||||
Route('/api/topstories', topstories),
|
||||
])
|
||||
|
||||
if not debug:
|
||||
app.add_middleware(
|
||||
RateLimitMiddleware,
|
||||
authenticate=from_session,
|
||||
backend=CustomRedisBackend(r),
|
||||
config={
|
||||
r"^/api/": [Rule(minute=4)],
|
||||
},
|
||||
)
|
||||
|
|
|
@ -111,6 +111,10 @@ pre {
|
|||
.comment {
|
||||
border-top: .5px solid rgba(0, 0, 0, .09);
|
||||
|
||||
a {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.text {
|
||||
padding: 10px 15px;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue