Verified Commit 3de87fc7 authored by Lukas Winkler's avatar Lukas Winkler
Browse files

support notes

parent 051dac54
from typing import List
from pathlib import Path
from typing import List, Dict
from requests import Session
from paperlibrary.api.models import Author, PDF, Keyword
from paperlibrary.api.models import Author, PDF, Keyword, PaperComplete
class PaperLibraryAPI:
......@@ -13,14 +14,35 @@ class PaperLibraryAPI:
self.s = Session()
self.s.headers.update({"Authorization": "Token " + auth_token})
def fetch_papers(self) -> List[PaperComplete]:
r = self.s.get(self.baseURL + "papers/")
return PaperComplete.schema().loads(r.text, many=True)
def fetch_authors(self) -> List[Author]:
r = self.s.get(self.baseURL + "authors/")
return Author.schema().loads(r.text, many=True)
def fetch_keywords(self) -> List[Author]:
def fetch_keywords(self) -> List[Keyword]:
r = self.s.get(self.baseURL + "keywords/")
return Keyword.schema().loads(r.text, many=True)
def fetch_papers_by_year(self) -> Dict[int, List[PaperComplete]]:
papers = self.fetch_papers()
years: Dict[int, List[PaperComplete]] = {}
for paper in papers:
if paper.year in years:
years[paper.year].append(paper)
else:
years[paper.year] = [paper]
return years
def fetch_pdfs(self) -> List[PDF]:
r = self.s.get(self.baseURL + "pdfs/")
return PDF.schema().loads(r.text, many=True)
def upload_pdf(self, pdf, file: Path) -> PDF:
with file.open("rb") as f:
r = self.s.put(pdf.url, files={
"file": f,
})
return PDF.schema().loads(r.text)
......@@ -6,13 +6,23 @@ from dataclasses_json import DataClassJsonMixin, config
from marshmallow import fields
@dataclass
class Note(DataClassJsonMixin):
paper: int
recommended_by: List[str]
custom_title: str
notes_md: str
notes_html: str
@dataclass
class PDF(DataClassJsonMixin):
id: int
url: str
file: str
sha265: str
sha256: str
type: str
preview: str
preview: Optional[str]
updated_at: datetime = field(
metadata=config(
encoder=datetime.isoformat,
......@@ -24,29 +34,52 @@ class PDF(DataClassJsonMixin):
@dataclass
class Paper(DataClassJsonMixin):
id: int
# id: int
url: str
title: str
pdfs: List[PDF]
doi: str
doi: Optional[str]
note: Optional[Note]
@property
def main_pdf(self) -> PDF:
def main_pdf(self) -> Optional[PDF]:
if not self.pdfs:
return None
return self.pdfs[0]
@dataclass
class PaperComplete(Paper):
keywords: List[str]
authors: List[str]
first_author: str
publication: str
doctype: str
arxiv_id: str
bibcode: str
year: int
pubdate: str # TODO: to datetime
entry_date: str # TODO: to datetime
citation_count: int
@dataclass
class Author(DataClassJsonMixin):
url: str
papers: List[Paper]
name: str
pretty_name: Optional[str]
affiliation: Optional[str]
orcid_id: Optional[str]
@property
def display_name(self):
return self.pretty_name if self.pretty_name else self.name
@dataclass
class Keyword(DataClassJsonMixin):
url: str
papers: List[Paper]
name: str
schema: str
kw_schema: str
import hashlib
import os
import shutil
import string
from datetime import datetime
from pathlib import Path
from alive_progress import alive_bar
from tzlocal import get_localzone
from paperlibrary.api import PaperLibraryAPI
......@@ -12,46 +12,94 @@ from paperlibrary.config import basedir
def format_filename(s: str) -> str:
additional_letters = ["ä", "Ä", "ö", "Ö", "ü", "Ü"]
valid_chars = f"-_.() {string.ascii_letters}{string.digits}{''.join(additional_letters)}"
filename = ''.join(c for c in s if c in valid_chars)
invalid_chars = {"/"}
filename = ''.join(c for c in s if c not in invalid_chars)
# filename = filename.replace(' ', '_') # I don't like spaces in filenames.
if not filename:
raise ValueError("empty filename")
return filename
def write_symlinks(api: PaperLibraryAPI):
...
pdf_dir = basedir / "pdfs"
pdf_dir.mkdir(exist_ok=True)
author_dir = basedir / "by_author"
shutil.rmtree(author_dir, ignore_errors=True)
author_dir.mkdir()
keyword_dir = basedir / "by_keyword"
year_dir = basedir / "by_year"
title_dir = basedir / "by_title"
custom_title_dir = basedir / "by_custom_title"
for directory in [author_dir, keyword_dir, year_dir, title_dir, custom_title_dir]:
shutil.rmtree(directory, ignore_errors=True)
directory.mkdir()
for author in api.fetch_authors():
author_subdir = author_dir / format_filename(author.name)
author_subdir = author_dir / format_filename(author.display_name)
author_subdir.mkdir()
for paper in author.papers:
if not paper.main_pdf:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
for keyword in api.fetch_keywords():
keyword_subdir = keyword_dir / format_filename(keyword.name)
keyword_subdir.mkdir()
for paper in keyword.papers:
if not paper.main_pdf:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
for paper in api.fetch_papers():
if not paper.main_pdf:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = title_dir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
if not paper.note:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = custom_title_dir / "{}.pdf".format(format_filename(paper.note.custom_title))
targetfile.symlink_to(sourcefile)
for year, papers in api.fetch_papers_by_year().items():
year_subdir = year_dir / str(year)
year_subdir.mkdir()
for paper in papers:
if not paper.main_pdf:
continue
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = year_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
def download_file(api: PaperLibraryAPI, url: str, target_file: Path):
r = api.s.get(url)
r.raise_for_status()
with target_file.open("wb") as f:
for chunk in r.iter_content(1024):
f.write(chunk)
with alive_bar(int(r.headers["Content-Length"])) as bar:
with target_file.open("wb") as f:
for chunk in r.iter_content(1024):
for _ in range(1024):
bar()
f.write(chunk)
def hash_file(file: Path, buffer_size=65536) -> str:
sha265 = hashlib.sha256()
sha256 = hashlib.sha256()
with file.open("rb") as f:
while True:
data = f.read(buffer_size)
if not data:
break
sha265.update(data)
return sha265.hexdigest()
sha256.update(data)
return sha256.hexdigest()
def update_pdfs(api: PaperLibraryAPI):
......@@ -63,16 +111,14 @@ def update_pdfs(api: PaperLibraryAPI):
if not pdf_file.exists():
download_file(api, pdf.file, pdf_file)
continue
if hash_file(pdf_file) != pdf.sha265:
if hash_file(pdf_file) != pdf.sha256:
modification_date = datetime.fromtimestamp(
os.path.getmtime(pdf_file),
get_localzone()
)
print(modification_date)
print(pdf.updated_at)
# print(modification_date - pdf.updated_at)
if modification_date > pdf.updated_at:
raise ValueError("local file is newer")
print("local file is newer")
api.upload_pdf(pdf, pdf_file)
else:
raise ValueError("remote file is newer")
# TODO: check if file should be uploaded or downloaded
print("remote file is newer")
download_file(api, pdf.file, pdf_file)
......@@ -9,12 +9,20 @@ from paperlibrary.library import write_symlinks, update_pdfs
def cli():
pass
@cli.command()
def update():
api=PaperLibraryAPI(url,auth_token=auth_token)
api = PaperLibraryAPI(url, auth_token=auth_token)
write_symlinks(api)
update_pdfs(api)
@cli.command()
def test():
api = PaperLibraryAPI(url, auth_token=auth_token)
print(api.fetch_papers())
if __name__ == '__main__':
cli()
[[package]]
name = "alive-progress"
version = "1.6.1"
description = "A new kind of Progress Bar, with real-time throughput, eta and very cool animations!"
category = "main"
optional = false
python-versions = ">=2.7, <4"
[[package]]
name = "certifi"
version = "2020.6.20"
......@@ -161,9 +169,13 @@ socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.8"
content-hash = "55c1b4123bca6c85380c21f1e6f5faa129bb1171aa2c9a45497e5916e8c4ac2e"
content-hash = "8bb7dbfcc0d218d22ff3dcafae140daff4799bfeb988731a3759412a256d0e29"
[metadata.files]
alive-progress = [
{file = "alive-progress-1.6.1.tar.gz", hash = "sha256:2a0d7516ec0f596d5ce53755c0913a909eb1c91854e1d782e511ef5e1dd53218"},
{file = "alive_progress-1.6.1-py3-none-any.whl", hash = "sha256:9a0fae6b94fb4e4bcd9fb51760506d29a33358ebbfef2c6516dce3e359a661b5"},
]
certifi = [
{file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"},
{file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"},
......
......@@ -10,6 +10,7 @@ requests = "^2.24.0"
click = "^7.1.2"
dataclasses-json = "^0.5.2"
tzlocal = "^2.1"
alive-progress = "^1.6.1"
[build-system]
requires = ["poetry-core>=1.0.0"]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment