1
0
Fork 0
mirror of https://github.com/Findus23/PaperLibrary-cli.git synced 2024-09-20 17:03:46 +02:00

support notes

This commit is contained in:
Lukas Winkler 2020-11-01 13:48:40 +01:00
parent 051dac5411
commit 3de87fc735
Signed by: lukas
GPG key ID: 54DE4D798D244853
6 changed files with 153 additions and 31 deletions

View file

@ -1,8 +1,9 @@
from typing import List from pathlib import Path
from typing import List, Dict
from requests import Session from requests import Session
from paperlibrary.api.models import Author, PDF, Keyword from paperlibrary.api.models import Author, PDF, Keyword, PaperComplete
class PaperLibraryAPI: class PaperLibraryAPI:
@ -13,14 +14,35 @@ class PaperLibraryAPI:
self.s = Session() self.s = Session()
self.s.headers.update({"Authorization": "Token " + auth_token}) self.s.headers.update({"Authorization": "Token " + auth_token})
def fetch_papers(self) -> List[PaperComplete]:
r = self.s.get(self.baseURL + "papers/")
return PaperComplete.schema().loads(r.text, many=True)
def fetch_authors(self) -> List[Author]: def fetch_authors(self) -> List[Author]:
r = self.s.get(self.baseURL + "authors/") r = self.s.get(self.baseURL + "authors/")
return Author.schema().loads(r.text, many=True) return Author.schema().loads(r.text, many=True)
def fetch_keywords(self) -> List[Author]: def fetch_keywords(self) -> List[Keyword]:
r = self.s.get(self.baseURL + "keywords/") r = self.s.get(self.baseURL + "keywords/")
return Keyword.schema().loads(r.text, many=True) return Keyword.schema().loads(r.text, many=True)
def fetch_papers_by_year(self) -> Dict[int, List[PaperComplete]]:
papers = self.fetch_papers()
years: Dict[int, List[PaperComplete]] = {}
for paper in papers:
if paper.year in years:
years[paper.year].append(paper)
else:
years[paper.year] = [paper]
return years
def fetch_pdfs(self) -> List[PDF]: def fetch_pdfs(self) -> List[PDF]:
r = self.s.get(self.baseURL + "pdfs/") r = self.s.get(self.baseURL + "pdfs/")
return PDF.schema().loads(r.text, many=True) return PDF.schema().loads(r.text, many=True)
def upload_pdf(self, pdf, file: Path) -> PDF:
with file.open("rb") as f:
r = self.s.put(pdf.url, files={
"file": f,
})
return PDF.schema().loads(r.text)

View file

@ -6,13 +6,23 @@ from dataclasses_json import DataClassJsonMixin, config
from marshmallow import fields from marshmallow import fields
@dataclass
class Note(DataClassJsonMixin):
paper: int
recommended_by: List[str]
custom_title: str
notes_md: str
notes_html: str
@dataclass @dataclass
class PDF(DataClassJsonMixin): class PDF(DataClassJsonMixin):
id: int id: int
url: str
file: str file: str
sha265: str sha256: str
type: str type: str
preview: str preview: Optional[str]
updated_at: datetime = field( updated_at: datetime = field(
metadata=config( metadata=config(
encoder=datetime.isoformat, encoder=datetime.isoformat,
@ -24,29 +34,52 @@ class PDF(DataClassJsonMixin):
@dataclass @dataclass
class Paper(DataClassJsonMixin): class Paper(DataClassJsonMixin):
id: int # id: int
url: str url: str
title: str title: str
pdfs: List[PDF] pdfs: List[PDF]
doi: str doi: Optional[str]
note: Optional[Note]
@property @property
def main_pdf(self) -> PDF: def main_pdf(self) -> Optional[PDF]:
if not self.pdfs:
return None
return self.pdfs[0] return self.pdfs[0]
@dataclass
class PaperComplete(Paper):
keywords: List[str]
authors: List[str]
first_author: str
publication: str
doctype: str
arxiv_id: str
bibcode: str
year: int
pubdate: str # TODO: to datetime
entry_date: str # TODO: to datetime
citation_count: int
@dataclass @dataclass
class Author(DataClassJsonMixin): class Author(DataClassJsonMixin):
url: str url: str
papers: List[Paper] papers: List[Paper]
name: str name: str
pretty_name: Optional[str]
affiliation: Optional[str] affiliation: Optional[str]
orcid_id: Optional[str] orcid_id: Optional[str]
@property
def display_name(self):
return self.pretty_name if self.pretty_name else self.name
@dataclass @dataclass
class Keyword(DataClassJsonMixin): class Keyword(DataClassJsonMixin):
url: str url: str
papers: List[Paper] papers: List[Paper]
name: str name: str
schema: str kw_schema: str

View file

@ -1,10 +1,10 @@
import hashlib import hashlib
import os import os
import shutil import shutil
import string
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from alive_progress import alive_bar
from tzlocal import get_localzone from tzlocal import get_localzone
from paperlibrary.api import PaperLibraryAPI from paperlibrary.api import PaperLibraryAPI
@ -12,46 +12,94 @@ from paperlibrary.config import basedir
def format_filename(s: str) -> str: def format_filename(s: str) -> str:
additional_letters = ["ä", "Ä", "ö", "Ö", "ü", "Ü"] invalid_chars = {"/"}
valid_chars = f"-_.() {string.ascii_letters}{string.digits}{''.join(additional_letters)}" filename = ''.join(c for c in s if c not in invalid_chars)
filename = ''.join(c for c in s if c in valid_chars)
# filename = filename.replace(' ', '_') # I don't like spaces in filenames. # filename = filename.replace(' ', '_') # I don't like spaces in filenames.
if not filename:
raise ValueError("empty filename")
return filename return filename
def write_symlinks(api: PaperLibraryAPI): def write_symlinks(api: PaperLibraryAPI):
... ...
pdf_dir = basedir / "pdfs" pdf_dir = basedir / "pdfs"
pdf_dir.mkdir(exist_ok=True)
author_dir = basedir / "by_author" author_dir = basedir / "by_author"
shutil.rmtree(author_dir, ignore_errors=True) keyword_dir = basedir / "by_keyword"
author_dir.mkdir() year_dir = basedir / "by_year"
title_dir = basedir / "by_title"
custom_title_dir = basedir / "by_custom_title"
for directory in [author_dir, keyword_dir, year_dir, title_dir, custom_title_dir]:
shutil.rmtree(directory, ignore_errors=True)
directory.mkdir()
for author in api.fetch_authors(): for author in api.fetch_authors():
author_subdir = author_dir / format_filename(author.name) author_subdir = author_dir / format_filename(author.display_name)
author_subdir.mkdir() author_subdir.mkdir()
for paper in author.papers: for paper in author.papers:
if not paper.main_pdf:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf" sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title)) targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile) targetfile.symlink_to(sourcefile)
for keyword in api.fetch_keywords():
keyword_subdir = keyword_dir / format_filename(keyword.name)
keyword_subdir.mkdir()
for paper in keyword.papers:
if not paper.main_pdf:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
for paper in api.fetch_papers():
if not paper.main_pdf:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = title_dir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
if not paper.note:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = custom_title_dir / "{}.pdf".format(format_filename(paper.note.custom_title))
targetfile.symlink_to(sourcefile)
for year, papers in api.fetch_papers_by_year().items():
year_subdir = year_dir / str(year)
year_subdir.mkdir()
for paper in papers:
if not paper.main_pdf:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = year_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
def download_file(api: PaperLibraryAPI, url: str, target_file: Path): def download_file(api: PaperLibraryAPI, url: str, target_file: Path):
r = api.s.get(url) r = api.s.get(url)
r.raise_for_status() r.raise_for_status()
with alive_bar(int(r.headers["Content-Length"])) as bar:
with target_file.open("wb") as f: with target_file.open("wb") as f:
for chunk in r.iter_content(1024): for chunk in r.iter_content(1024):
for _ in range(1024):
bar()
f.write(chunk) f.write(chunk)
def hash_file(file: Path, buffer_size=65536) -> str: def hash_file(file: Path, buffer_size=65536) -> str:
sha265 = hashlib.sha256() sha256 = hashlib.sha256()
with file.open("rb") as f: with file.open("rb") as f:
while True: while True:
data = f.read(buffer_size) data = f.read(buffer_size)
if not data: if not data:
break break
sha265.update(data) sha256.update(data)
return sha265.hexdigest() return sha256.hexdigest()
def update_pdfs(api: PaperLibraryAPI): def update_pdfs(api: PaperLibraryAPI):
@ -63,16 +111,14 @@ def update_pdfs(api: PaperLibraryAPI):
if not pdf_file.exists(): if not pdf_file.exists():
download_file(api, pdf.file, pdf_file) download_file(api, pdf.file, pdf_file)
continue continue
if hash_file(pdf_file) != pdf.sha265: if hash_file(pdf_file) != pdf.sha256:
modification_date = datetime.fromtimestamp( modification_date = datetime.fromtimestamp(
os.path.getmtime(pdf_file), os.path.getmtime(pdf_file),
get_localzone() get_localzone()
) )
print(modification_date)
print(pdf.updated_at)
# print(modification_date - pdf.updated_at)
if modification_date > pdf.updated_at: if modification_date > pdf.updated_at:
raise ValueError("local file is newer") print("local file is newer")
api.upload_pdf(pdf, pdf_file)
else: else:
raise ValueError("remote file is newer") print("remote file is newer")
# TODO: check if file should be uploaded or downloaded download_file(api, pdf.file, pdf_file)

View file

@ -9,12 +9,20 @@ from paperlibrary.library import write_symlinks, update_pdfs
def cli(): def cli():
pass pass
@cli.command() @cli.command()
def update(): def update():
api=PaperLibraryAPI(url,auth_token=auth_token) api = PaperLibraryAPI(url, auth_token=auth_token)
write_symlinks(api) write_symlinks(api)
update_pdfs(api) update_pdfs(api)
@cli.command()
def test():
api = PaperLibraryAPI(url, auth_token=auth_token)
print(api.fetch_papers())
if __name__ == '__main__': if __name__ == '__main__':
cli() cli()

14
poetry.lock generated
View file

@ -1,3 +1,11 @@
[[package]]
name = "alive-progress"
version = "1.6.1"
description = "A new kind of Progress Bar, with real-time throughput, eta and very cool animations!"
category = "main"
optional = false
python-versions = ">=2.7, <4"
[[package]] [[package]]
name = "certifi" name = "certifi"
version = "2020.6.20" version = "2020.6.20"
@ -161,9 +169,13 @@ socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"]
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.8" python-versions = "^3.8"
content-hash = "55c1b4123bca6c85380c21f1e6f5faa129bb1171aa2c9a45497e5916e8c4ac2e" content-hash = "8bb7dbfcc0d218d22ff3dcafae140daff4799bfeb988731a3759412a256d0e29"
[metadata.files] [metadata.files]
alive-progress = [
{file = "alive-progress-1.6.1.tar.gz", hash = "sha256:2a0d7516ec0f596d5ce53755c0913a909eb1c91854e1d782e511ef5e1dd53218"},
{file = "alive_progress-1.6.1-py3-none-any.whl", hash = "sha256:9a0fae6b94fb4e4bcd9fb51760506d29a33358ebbfef2c6516dce3e359a661b5"},
]
certifi = [ certifi = [
{file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"}, {file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"},
{file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"}, {file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"},

View file

@ -10,6 +10,7 @@ requests = "^2.24.0"
click = "^7.1.2" click = "^7.1.2"
dataclasses-json = "^0.5.2" dataclasses-json = "^0.5.2"
tzlocal = "^2.1" tzlocal = "^2.1"
alive-progress = "^1.6.1"
[build-system] [build-system]
requires = ["poetry-core>=1.0.0"] requires = ["poetry-core>=1.0.0"]