mirror of
https://github.com/Findus23/PaperLibrary-cli.git
synced 2024-09-20 17:03:46 +02:00
add library code
This commit is contained in:
parent
aa495bb0ca
commit
051dac5411
5 changed files with 94 additions and 3 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,4 +1,5 @@
|
||||||
.idea/
|
.idea/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
library/
|
library/
|
||||||
|
!paperlibrary/library
|
||||||
paperlibrary/config.py
|
paperlibrary/config.py
|
||||||
|
|
|
@ -2,7 +2,7 @@ from typing import List
|
||||||
|
|
||||||
from requests import Session
|
from requests import Session
|
||||||
|
|
||||||
from paperlibrary.api.models import Author, PDF
|
from paperlibrary.api.models import Author, PDF, Keyword
|
||||||
|
|
||||||
|
|
||||||
class PaperLibraryAPI:
|
class PaperLibraryAPI:
|
||||||
|
@ -17,6 +17,10 @@ class PaperLibraryAPI:
|
||||||
r = self.s.get(self.baseURL + "authors/")
|
r = self.s.get(self.baseURL + "authors/")
|
||||||
return Author.schema().loads(r.text, many=True)
|
return Author.schema().loads(r.text, many=True)
|
||||||
|
|
||||||
|
def fetch_keywords(self) -> List[Author]:
|
||||||
|
r = self.s.get(self.baseURL + "keywords/")
|
||||||
|
return Keyword.schema().loads(r.text, many=True)
|
||||||
|
|
||||||
def fetch_pdfs(self) -> List[PDF]:
|
def fetch_pdfs(self) -> List[PDF]:
|
||||||
r = self.s.get(self.baseURL + "pdfs/")
|
r = self.s.get(self.baseURL + "pdfs/")
|
||||||
return PDF.schema().loads(r.text, many=True)
|
return PDF.schema().loads(r.text, many=True)
|
||||||
|
|
|
@ -2,7 +2,7 @@ from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
|
|
||||||
from dataclasses_json import DataClassJsonMixin, dataclass_json, Undefined, config
|
from dataclasses_json import DataClassJsonMixin, config
|
||||||
from marshmallow import fields
|
from marshmallow import fields
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,7 +35,6 @@ class Paper(DataClassJsonMixin):
|
||||||
return self.pdfs[0]
|
return self.pdfs[0]
|
||||||
|
|
||||||
|
|
||||||
@dataclass_json()
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Author(DataClassJsonMixin):
|
class Author(DataClassJsonMixin):
|
||||||
url: str
|
url: str
|
||||||
|
@ -43,3 +42,11 @@ class Author(DataClassJsonMixin):
|
||||||
name: str
|
name: str
|
||||||
affiliation: Optional[str]
|
affiliation: Optional[str]
|
||||||
orcid_id: Optional[str]
|
orcid_id: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Keyword(DataClassJsonMixin):
|
||||||
|
url: str
|
||||||
|
papers: List[Paper]
|
||||||
|
name: str
|
||||||
|
schema: str
|
||||||
|
|
1
paperlibrary/library/__init__.py
Normal file
1
paperlibrary/library/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
from .library import *
|
78
paperlibrary/library/library.py
Normal file
78
paperlibrary/library/library.py
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import string
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from tzlocal import get_localzone
|
||||||
|
|
||||||
|
from paperlibrary.api import PaperLibraryAPI
|
||||||
|
from paperlibrary.config import basedir
|
||||||
|
|
||||||
|
|
||||||
|
def format_filename(s: str) -> str:
|
||||||
|
additional_letters = ["ä", "Ä", "ö", "Ö", "ü", "Ü"]
|
||||||
|
valid_chars = f"-_.() {string.ascii_letters}{string.digits}{''.join(additional_letters)}"
|
||||||
|
filename = ''.join(c for c in s if c in valid_chars)
|
||||||
|
# filename = filename.replace(' ', '_') # I don't like spaces in filenames.
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
|
def write_symlinks(api: PaperLibraryAPI):
|
||||||
|
...
|
||||||
|
pdf_dir = basedir / "pdfs"
|
||||||
|
author_dir = basedir / "by_author"
|
||||||
|
shutil.rmtree(author_dir, ignore_errors=True)
|
||||||
|
author_dir.mkdir()
|
||||||
|
|
||||||
|
for author in api.fetch_authors():
|
||||||
|
author_subdir = author_dir / format_filename(author.name)
|
||||||
|
author_subdir.mkdir()
|
||||||
|
for paper in author.papers:
|
||||||
|
sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
|
||||||
|
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
|
||||||
|
targetfile.symlink_to(sourcefile)
|
||||||
|
|
||||||
|
|
||||||
|
def download_file(api: PaperLibraryAPI, url: str, target_file: Path):
|
||||||
|
r = api.s.get(url)
|
||||||
|
r.raise_for_status()
|
||||||
|
with target_file.open("wb") as f:
|
||||||
|
for chunk in r.iter_content(1024):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
|
||||||
|
def hash_file(file: Path, buffer_size=65536) -> str:
|
||||||
|
sha265 = hashlib.sha256()
|
||||||
|
with file.open("rb") as f:
|
||||||
|
while True:
|
||||||
|
data = f.read(buffer_size)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
sha265.update(data)
|
||||||
|
return sha265.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def update_pdfs(api: PaperLibraryAPI):
|
||||||
|
pdf_dir = basedir / "pdfs"
|
||||||
|
pdf_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
for pdf in api.fetch_pdfs():
|
||||||
|
pdf_file = pdf_dir / f"{pdf.id}.pdf"
|
||||||
|
if not pdf_file.exists():
|
||||||
|
download_file(api, pdf.file, pdf_file)
|
||||||
|
continue
|
||||||
|
if hash_file(pdf_file) != pdf.sha265:
|
||||||
|
modification_date = datetime.fromtimestamp(
|
||||||
|
os.path.getmtime(pdf_file),
|
||||||
|
get_localzone()
|
||||||
|
)
|
||||||
|
print(modification_date)
|
||||||
|
print(pdf.updated_at)
|
||||||
|
# print(modification_date - pdf.updated_at)
|
||||||
|
if modification_date > pdf.updated_at:
|
||||||
|
raise ValueError("local file is newer")
|
||||||
|
else:
|
||||||
|
raise ValueError("remote file is newer")
|
||||||
|
# TODO: check if file should be uploaded or downloaded
|
Loading…
Reference in a new issue