1
0
Fork 0
mirror of https://github.com/Findus23/PaperLibrary-cli.git synced 2024-09-20 17:03:46 +02:00

add library code

This commit is contained in:
Lukas Winkler 2020-10-19 17:43:08 +02:00
parent aa495bb0ca
commit 051dac5411
Signed by: lukas
GPG key ID: 54DE4D798D244853
5 changed files with 94 additions and 3 deletions

1
.gitignore vendored
View file

@ -1,4 +1,5 @@
.idea/ .idea/
__pycache__/ __pycache__/
library/ library/
!paperlibrary/library
paperlibrary/config.py paperlibrary/config.py

View file

@ -2,7 +2,7 @@ from typing import List
from requests import Session from requests import Session
from paperlibrary.api.models import Author, PDF from paperlibrary.api.models import Author, PDF, Keyword
class PaperLibraryAPI: class PaperLibraryAPI:
@ -17,6 +17,10 @@ class PaperLibraryAPI:
r = self.s.get(self.baseURL + "authors/") r = self.s.get(self.baseURL + "authors/")
return Author.schema().loads(r.text, many=True) return Author.schema().loads(r.text, many=True)
def fetch_keywords(self) -> List[Author]:
r = self.s.get(self.baseURL + "keywords/")
return Keyword.schema().loads(r.text, many=True)
def fetch_pdfs(self) -> List[PDF]: def fetch_pdfs(self) -> List[PDF]:
r = self.s.get(self.baseURL + "pdfs/") r = self.s.get(self.baseURL + "pdfs/")
return PDF.schema().loads(r.text, many=True) return PDF.schema().loads(r.text, many=True)

View file

@ -2,7 +2,7 @@ from dataclasses import dataclass, field
from datetime import datetime from datetime import datetime
from typing import Optional, List from typing import Optional, List
from dataclasses_json import DataClassJsonMixin, dataclass_json, Undefined, config from dataclasses_json import DataClassJsonMixin, config
from marshmallow import fields from marshmallow import fields
@ -35,7 +35,6 @@ class Paper(DataClassJsonMixin):
return self.pdfs[0] return self.pdfs[0]
@dataclass_json()
@dataclass @dataclass
class Author(DataClassJsonMixin): class Author(DataClassJsonMixin):
url: str url: str
@ -43,3 +42,11 @@ class Author(DataClassJsonMixin):
name: str name: str
affiliation: Optional[str] affiliation: Optional[str]
orcid_id: Optional[str] orcid_id: Optional[str]
@dataclass
class Keyword(DataClassJsonMixin):
url: str
papers: List[Paper]
name: str
schema: str

View file

@ -0,0 +1 @@
from .library import *

View file

@ -0,0 +1,78 @@
import hashlib
import os
import shutil
import string
from datetime import datetime
from pathlib import Path
from tzlocal import get_localzone
from paperlibrary.api import PaperLibraryAPI
from paperlibrary.config import basedir
def format_filename(s: str) -> str:
additional_letters = ["ä", "Ä", "ö", "Ö", "ü", "Ü"]
valid_chars = f"-_.() {string.ascii_letters}{string.digits}{''.join(additional_letters)}"
filename = ''.join(c for c in s if c in valid_chars)
# filename = filename.replace(' ', '_') # I don't like spaces in filenames.
return filename
def write_symlinks(api: PaperLibraryAPI):
...
pdf_dir = basedir / "pdfs"
author_dir = basedir / "by_author"
shutil.rmtree(author_dir, ignore_errors=True)
author_dir.mkdir()
for author in api.fetch_authors():
author_subdir = author_dir / format_filename(author.name)
author_subdir.mkdir()
for paper in author.papers:
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
def download_file(api: PaperLibraryAPI, url: str, target_file: Path):
r = api.s.get(url)
r.raise_for_status()
with target_file.open("wb") as f:
for chunk in r.iter_content(1024):
f.write(chunk)
def hash_file(file: Path, buffer_size=65536) -> str:
sha265 = hashlib.sha256()
with file.open("rb") as f:
while True:
data = f.read(buffer_size)
if not data:
break
sha265.update(data)
return sha265.hexdigest()
def update_pdfs(api: PaperLibraryAPI):
pdf_dir = basedir / "pdfs"
pdf_dir.mkdir(exist_ok=True)
for pdf in api.fetch_pdfs():
pdf_file = pdf_dir / f"{pdf.id}.pdf"
if not pdf_file.exists():
download_file(api, pdf.file, pdf_file)
continue
if hash_file(pdf_file) != pdf.sha265:
modification_date = datetime.fromtimestamp(
os.path.getmtime(pdf_file),
get_localzone()
)
print(modification_date)
print(pdf.updated_at)
# print(modification_date - pdf.updated_at)
if modification_date > pdf.updated_at:
raise ValueError("local file is newer")
else:
raise ValueError("remote file is newer")
# TODO: check if file should be uploaded or downloaded