mirror of
https://github.com/Findus23/PaperLibrary-cli.git
synced 2024-09-20 17:03:46 +02:00
support notes
This commit is contained in:
parent
051dac5411
commit
3de87fc735
6 changed files with 153 additions and 31 deletions
|
@ -1,8 +1,9 @@
|
||||||
from typing import List
|
from pathlib import Path
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
from requests import Session
|
from requests import Session
|
||||||
|
|
||||||
from paperlibrary.api.models import Author, PDF, Keyword
|
from paperlibrary.api.models import Author, PDF, Keyword, PaperComplete
|
||||||
|
|
||||||
|
|
||||||
class PaperLibraryAPI:
|
class PaperLibraryAPI:
|
||||||
|
@ -13,14 +14,35 @@ class PaperLibraryAPI:
|
||||||
self.s = Session()
|
self.s = Session()
|
||||||
self.s.headers.update({"Authorization": "Token " + auth_token})
|
self.s.headers.update({"Authorization": "Token " + auth_token})
|
||||||
|
|
||||||
|
def fetch_papers(self) -> List[PaperComplete]:
|
||||||
|
r = self.s.get(self.baseURL + "papers/")
|
||||||
|
return PaperComplete.schema().loads(r.text, many=True)
|
||||||
|
|
||||||
def fetch_authors(self) -> List[Author]:
|
def fetch_authors(self) -> List[Author]:
|
||||||
r = self.s.get(self.baseURL + "authors/")
|
r = self.s.get(self.baseURL + "authors/")
|
||||||
return Author.schema().loads(r.text, many=True)
|
return Author.schema().loads(r.text, many=True)
|
||||||
|
|
||||||
def fetch_keywords(self) -> List[Author]:
|
def fetch_keywords(self) -> List[Keyword]:
|
||||||
r = self.s.get(self.baseURL + "keywords/")
|
r = self.s.get(self.baseURL + "keywords/")
|
||||||
return Keyword.schema().loads(r.text, many=True)
|
return Keyword.schema().loads(r.text, many=True)
|
||||||
|
|
||||||
|
def fetch_papers_by_year(self) -> Dict[int, List[PaperComplete]]:
|
||||||
|
papers = self.fetch_papers()
|
||||||
|
years: Dict[int, List[PaperComplete]] = {}
|
||||||
|
for paper in papers:
|
||||||
|
if paper.year in years:
|
||||||
|
years[paper.year].append(paper)
|
||||||
|
else:
|
||||||
|
years[paper.year] = [paper]
|
||||||
|
return years
|
||||||
|
|
||||||
def fetch_pdfs(self) -> List[PDF]:
|
def fetch_pdfs(self) -> List[PDF]:
|
||||||
r = self.s.get(self.baseURL + "pdfs/")
|
r = self.s.get(self.baseURL + "pdfs/")
|
||||||
return PDF.schema().loads(r.text, many=True)
|
return PDF.schema().loads(r.text, many=True)
|
||||||
|
|
||||||
|
def upload_pdf(self, pdf, file: Path) -> PDF:
|
||||||
|
with file.open("rb") as f:
|
||||||
|
r = self.s.put(pdf.url, files={
|
||||||
|
"file": f,
|
||||||
|
})
|
||||||
|
return PDF.schema().loads(r.text)
|
||||||
|
|
|
@ -6,13 +6,23 @@ from dataclasses_json import DataClassJsonMixin, config
|
||||||
from marshmallow import fields
|
from marshmallow import fields
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Note(DataClassJsonMixin):
|
||||||
|
paper: int
|
||||||
|
recommended_by: List[str]
|
||||||
|
custom_title: str
|
||||||
|
notes_md: str
|
||||||
|
notes_html: str
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class PDF(DataClassJsonMixin):
|
class PDF(DataClassJsonMixin):
|
||||||
id: int
|
id: int
|
||||||
|
url: str
|
||||||
file: str
|
file: str
|
||||||
sha265: str
|
sha256: str
|
||||||
type: str
|
type: str
|
||||||
preview: str
|
preview: Optional[str]
|
||||||
updated_at: datetime = field(
|
updated_at: datetime = field(
|
||||||
metadata=config(
|
metadata=config(
|
||||||
encoder=datetime.isoformat,
|
encoder=datetime.isoformat,
|
||||||
|
@ -24,29 +34,52 @@ class PDF(DataClassJsonMixin):
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Paper(DataClassJsonMixin):
|
class Paper(DataClassJsonMixin):
|
||||||
id: int
|
# id: int
|
||||||
url: str
|
url: str
|
||||||
title: str
|
title: str
|
||||||
pdfs: List[PDF]
|
pdfs: List[PDF]
|
||||||
doi: str
|
doi: Optional[str]
|
||||||
|
note: Optional[Note]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def main_pdf(self) -> PDF:
|
def main_pdf(self) -> Optional[PDF]:
|
||||||
|
if not self.pdfs:
|
||||||
|
return None
|
||||||
return self.pdfs[0]
|
return self.pdfs[0]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PaperComplete(Paper):
|
||||||
|
keywords: List[str]
|
||||||
|
authors: List[str]
|
||||||
|
first_author: str
|
||||||
|
publication: str
|
||||||
|
doctype: str
|
||||||
|
arxiv_id: str
|
||||||
|
bibcode: str
|
||||||
|
year: int
|
||||||
|
pubdate: str # TODO: to datetime
|
||||||
|
entry_date: str # TODO: to datetime
|
||||||
|
citation_count: int
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Author(DataClassJsonMixin):
|
class Author(DataClassJsonMixin):
|
||||||
url: str
|
url: str
|
||||||
papers: List[Paper]
|
papers: List[Paper]
|
||||||
name: str
|
name: str
|
||||||
|
pretty_name: Optional[str]
|
||||||
affiliation: Optional[str]
|
affiliation: Optional[str]
|
||||||
orcid_id: Optional[str]
|
orcid_id: Optional[str]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def display_name(self):
|
||||||
|
return self.pretty_name if self.pretty_name else self.name
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Keyword(DataClassJsonMixin):
|
class Keyword(DataClassJsonMixin):
|
||||||
url: str
|
url: str
|
||||||
papers: List[Paper]
|
papers: List[Paper]
|
||||||
name: str
|
name: str
|
||||||
schema: str
|
kw_schema: str
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import string
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from alive_progress import alive_bar
|
||||||
from tzlocal import get_localzone
|
from tzlocal import get_localzone
|
||||||
|
|
||||||
from paperlibrary.api import PaperLibraryAPI
|
from paperlibrary.api import PaperLibraryAPI
|
||||||
|
@ -12,46 +12,94 @@ from paperlibrary.config import basedir
|
||||||
|
|
||||||
|
|
||||||
def format_filename(s: str) -> str:
|
def format_filename(s: str) -> str:
|
||||||
additional_letters = ["ä", "Ä", "ö", "Ö", "ü", "Ü"]
|
invalid_chars = {"/"}
|
||||||
valid_chars = f"-_.() {string.ascii_letters}{string.digits}{''.join(additional_letters)}"
|
filename = ''.join(c for c in s if c not in invalid_chars)
|
||||||
filename = ''.join(c for c in s if c in valid_chars)
|
|
||||||
# filename = filename.replace(' ', '_') # I don't like spaces in filenames.
|
# filename = filename.replace(' ', '_') # I don't like spaces in filenames.
|
||||||
|
if not filename:
|
||||||
|
raise ValueError("empty filename")
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
|
||||||
def write_symlinks(api: PaperLibraryAPI):
|
def write_symlinks(api: PaperLibraryAPI):
|
||||||
...
|
...
|
||||||
pdf_dir = basedir / "pdfs"
|
pdf_dir = basedir / "pdfs"
|
||||||
|
pdf_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
author_dir = basedir / "by_author"
|
author_dir = basedir / "by_author"
|
||||||
shutil.rmtree(author_dir, ignore_errors=True)
|
keyword_dir = basedir / "by_keyword"
|
||||||
author_dir.mkdir()
|
year_dir = basedir / "by_year"
|
||||||
|
title_dir = basedir / "by_title"
|
||||||
|
custom_title_dir = basedir / "by_custom_title"
|
||||||
|
|
||||||
|
for directory in [author_dir, keyword_dir, year_dir, title_dir, custom_title_dir]:
|
||||||
|
shutil.rmtree(directory, ignore_errors=True)
|
||||||
|
directory.mkdir()
|
||||||
|
|
||||||
for author in api.fetch_authors():
|
for author in api.fetch_authors():
|
||||||
author_subdir = author_dir / format_filename(author.name)
|
author_subdir = author_dir / format_filename(author.display_name)
|
||||||
author_subdir.mkdir()
|
author_subdir.mkdir()
|
||||||
for paper in author.papers:
|
for paper in author.papers:
|
||||||
|
if not paper.main_pdf:
|
||||||
|
continue
|
||||||
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
|
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
|
||||||
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
|
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
|
||||||
targetfile.symlink_to(sourcefile)
|
targetfile.symlink_to(sourcefile)
|
||||||
|
|
||||||
|
for keyword in api.fetch_keywords():
|
||||||
|
keyword_subdir = keyword_dir / format_filename(keyword.name)
|
||||||
|
keyword_subdir.mkdir()
|
||||||
|
for paper in keyword.papers:
|
||||||
|
if not paper.main_pdf:
|
||||||
|
continue
|
||||||
|
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
|
||||||
|
targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title))
|
||||||
|
targetfile.symlink_to(sourcefile)
|
||||||
|
|
||||||
|
for paper in api.fetch_papers():
|
||||||
|
if not paper.main_pdf:
|
||||||
|
continue
|
||||||
|
|
||||||
|
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
|
||||||
|
targetfile = title_dir / "{}.pdf".format(format_filename(paper.title))
|
||||||
|
targetfile.symlink_to(sourcefile)
|
||||||
|
|
||||||
|
if not paper.note:
|
||||||
|
continue
|
||||||
|
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
|
||||||
|
targetfile = custom_title_dir / "{}.pdf".format(format_filename(paper.note.custom_title))
|
||||||
|
targetfile.symlink_to(sourcefile)
|
||||||
|
|
||||||
|
for year, papers in api.fetch_papers_by_year().items():
|
||||||
|
year_subdir = year_dir / str(year)
|
||||||
|
year_subdir.mkdir()
|
||||||
|
for paper in papers:
|
||||||
|
if not paper.main_pdf:
|
||||||
|
continue
|
||||||
|
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
|
||||||
|
targetfile = year_subdir / "{}.pdf".format(format_filename(paper.title))
|
||||||
|
targetfile.symlink_to(sourcefile)
|
||||||
|
|
||||||
|
|
||||||
def download_file(api: PaperLibraryAPI, url: str, target_file: Path):
|
def download_file(api: PaperLibraryAPI, url: str, target_file: Path):
|
||||||
r = api.s.get(url)
|
r = api.s.get(url)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
with alive_bar(int(r.headers["Content-Length"])) as bar:
|
||||||
with target_file.open("wb") as f:
|
with target_file.open("wb") as f:
|
||||||
for chunk in r.iter_content(1024):
|
for chunk in r.iter_content(1024):
|
||||||
|
for _ in range(1024):
|
||||||
|
bar()
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
|
|
||||||
|
|
||||||
def hash_file(file: Path, buffer_size=65536) -> str:
|
def hash_file(file: Path, buffer_size=65536) -> str:
|
||||||
sha265 = hashlib.sha256()
|
sha256 = hashlib.sha256()
|
||||||
with file.open("rb") as f:
|
with file.open("rb") as f:
|
||||||
while True:
|
while True:
|
||||||
data = f.read(buffer_size)
|
data = f.read(buffer_size)
|
||||||
if not data:
|
if not data:
|
||||||
break
|
break
|
||||||
sha265.update(data)
|
sha256.update(data)
|
||||||
return sha265.hexdigest()
|
return sha256.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def update_pdfs(api: PaperLibraryAPI):
|
def update_pdfs(api: PaperLibraryAPI):
|
||||||
|
@ -63,16 +111,14 @@ def update_pdfs(api: PaperLibraryAPI):
|
||||||
if not pdf_file.exists():
|
if not pdf_file.exists():
|
||||||
download_file(api, pdf.file, pdf_file)
|
download_file(api, pdf.file, pdf_file)
|
||||||
continue
|
continue
|
||||||
if hash_file(pdf_file) != pdf.sha265:
|
if hash_file(pdf_file) != pdf.sha256:
|
||||||
modification_date = datetime.fromtimestamp(
|
modification_date = datetime.fromtimestamp(
|
||||||
os.path.getmtime(pdf_file),
|
os.path.getmtime(pdf_file),
|
||||||
get_localzone()
|
get_localzone()
|
||||||
)
|
)
|
||||||
print(modification_date)
|
|
||||||
print(pdf.updated_at)
|
|
||||||
# print(modification_date - pdf.updated_at)
|
|
||||||
if modification_date > pdf.updated_at:
|
if modification_date > pdf.updated_at:
|
||||||
raise ValueError("local file is newer")
|
print("local file is newer")
|
||||||
|
api.upload_pdf(pdf, pdf_file)
|
||||||
else:
|
else:
|
||||||
raise ValueError("remote file is newer")
|
print("remote file is newer")
|
||||||
# TODO: check if file should be uploaded or downloaded
|
download_file(api, pdf.file, pdf_file)
|
||||||
|
|
|
@ -9,6 +9,7 @@ from paperlibrary.library import write_symlinks, update_pdfs
|
||||||
def cli():
|
def cli():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
def update():
|
def update():
|
||||||
api = PaperLibraryAPI(url, auth_token=auth_token)
|
api = PaperLibraryAPI(url, auth_token=auth_token)
|
||||||
|
@ -16,5 +17,12 @@ def update():
|
||||||
update_pdfs(api)
|
update_pdfs(api)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
def test():
|
||||||
|
api = PaperLibraryAPI(url, auth_token=auth_token)
|
||||||
|
|
||||||
|
print(api.fetch_papers())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
cli()
|
cli()
|
||||||
|
|
14
poetry.lock
generated
14
poetry.lock
generated
|
@ -1,3 +1,11 @@
|
||||||
|
[[package]]
|
||||||
|
name = "alive-progress"
|
||||||
|
version = "1.6.1"
|
||||||
|
description = "A new kind of Progress Bar, with real-time throughput, eta and very cool animations!"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, <4"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "certifi"
|
name = "certifi"
|
||||||
version = "2020.6.20"
|
version = "2020.6.20"
|
||||||
|
@ -161,9 +169,13 @@ socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = "^3.8"
|
python-versions = "^3.8"
|
||||||
content-hash = "55c1b4123bca6c85380c21f1e6f5faa129bb1171aa2c9a45497e5916e8c4ac2e"
|
content-hash = "8bb7dbfcc0d218d22ff3dcafae140daff4799bfeb988731a3759412a256d0e29"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
|
alive-progress = [
|
||||||
|
{file = "alive-progress-1.6.1.tar.gz", hash = "sha256:2a0d7516ec0f596d5ce53755c0913a909eb1c91854e1d782e511ef5e1dd53218"},
|
||||||
|
{file = "alive_progress-1.6.1-py3-none-any.whl", hash = "sha256:9a0fae6b94fb4e4bcd9fb51760506d29a33358ebbfef2c6516dce3e359a661b5"},
|
||||||
|
]
|
||||||
certifi = [
|
certifi = [
|
||||||
{file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"},
|
{file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"},
|
||||||
{file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"},
|
{file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"},
|
||||||
|
|
|
@ -10,6 +10,7 @@ requests = "^2.24.0"
|
||||||
click = "^7.1.2"
|
click = "^7.1.2"
|
||||||
dataclasses-json = "^0.5.2"
|
dataclasses-json = "^0.5.2"
|
||||||
tzlocal = "^2.1"
|
tzlocal = "^2.1"
|
||||||
|
alive-progress = "^1.6.1"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core>=1.0.0"]
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
|
Loading…
Reference in a new issue