support notes

2024-09-20 17:03:46 +02:00 · 2020-11-01 13:48:40 +01:00 · 2020-11-01 13:48:40 +01:00 · 3de87fc735
commit 3de87fc735
parent 051dac5411
6 changed files with 153 additions and 31 deletions
--- a/paperlibrary/api/api.py
+++ b/paperlibrary/api/api.py
@ -1,8 +1,9 @@
-from typing import List
+from pathlib import Path
 from typing import List, Dict
 from requests import Session
-from paperlibrary.api.models import Author, PDF, Keyword
+from paperlibrary.api.models import Author, PDF, Keyword, PaperComplete
 class PaperLibraryAPI:
@ -13,14 +14,35 @@ class PaperLibraryAPI:
        self.s = Session()
        self.s.headers.update({"Authorization": "Token " + auth_token})
    def fetch_papers(self) -> List[PaperComplete]:
        r = self.s.get(self.baseURL + "papers/")
        return PaperComplete.schema().loads(r.text, many=True)
    def fetch_authors(self) -> List[Author]:
        r = self.s.get(self.baseURL + "authors/")
        return Author.schema().loads(r.text, many=True)
-    def fetch_keywords(self) -> List[Author]:
+    def fetch_keywords(self) -> List[Keyword]:
        r = self.s.get(self.baseURL + "keywords/")
        return Keyword.schema().loads(r.text, many=True)
    def fetch_papers_by_year(self) -> Dict[int, List[PaperComplete]]:
        papers = self.fetch_papers()
        years: Dict[int, List[PaperComplete]] = {}
        for paper in papers:
            if paper.year in years:
                years[paper.year].append(paper)
            else:
                years[paper.year] = [paper]
        return years
    def fetch_pdfs(self) -> List[PDF]:
        r = self.s.get(self.baseURL + "pdfs/")
        return PDF.schema().loads(r.text, many=True)
    def upload_pdf(self, pdf, file: Path) -> PDF:
        with file.open("rb") as f:
            r = self.s.put(pdf.url, files={
                "file": f,
            })
        return PDF.schema().loads(r.text)
--- a/paperlibrary/api/models.py
+++ b/paperlibrary/api/models.py
@ -6,13 +6,23 @@ from dataclasses_json import DataClassJsonMixin, config
 from marshmallow import fields
@dataclass
 class Note(DataClassJsonMixin):
    paper: int
    recommended_by: List[str]
    custom_title: str
    notes_md: str
    notes_html: str
@dataclass
 class PDF(DataClassJsonMixin):
    id: int
    url: str
    file: str
-    sha265: str
+    sha256: str
    type: str
-    preview: str
+    preview: Optional[str]
    updated_at: datetime = field(
        metadata=config(
            encoder=datetime.isoformat,
@ -24,29 +34,52 @@ class PDF(DataClassJsonMixin):
@dataclass
 class Paper(DataClassJsonMixin):
-    id: int
+    # id: int
    url: str
    title: str
    pdfs: List[PDF]
-    doi: str
+    doi: Optional[str]
    note: Optional[Note]
    @property
-    def main_pdf(self) -> PDF:
+    def main_pdf(self) -> Optional[PDF]:
        if not self.pdfs:
            return None
        return self.pdfs[0]
@dataclass
 class PaperComplete(Paper):
    keywords: List[str]
    authors: List[str]
    first_author: str
    publication: str
    doctype: str
    arxiv_id: str
    bibcode: str
    year: int
    pubdate: str  # TODO: to datetime
    entry_date: str  # TODO: to datetime
    citation_count: int
@dataclass
 class Author(DataClassJsonMixin):
    url: str
    papers: List[Paper]
    name: str
    pretty_name: Optional[str]
    affiliation: Optional[str]
    orcid_id: Optional[str]
    @property
    def display_name(self):
        return self.pretty_name if self.pretty_name else self.name
@dataclass
 class Keyword(DataClassJsonMixin):
    url: str
    papers: List[Paper]
    name: str
-    schema: str
+    kw_schema: str
--- a/paperlibrary/library/library.py
+++ b/paperlibrary/library/library.py
@ -1,10 +1,10 @@
 import hashlib
 import os
 import shutil
 import string
 from datetime import datetime
 from pathlib import Path
 from alive_progress import alive_bar
 from tzlocal import get_localzone
 from paperlibrary.api import PaperLibraryAPI
@ -12,46 +12,94 @@ from paperlibrary.config import basedir
 def format_filename(s: str) -> str:
-    additional_letters = ["ä", "Ä", "ö", "Ö", "ü", "Ü"]
+    invalid_chars = {"/"}
-    valid_chars = f"-_.() {string.ascii_letters}{string.digits}{''.join(additional_letters)}"
+    filename = ''.join(c for c in s if c not in invalid_chars)
    filename = ''.join(c for c in s if c in valid_chars)
    # filename = filename.replace(' ', '_')  # I don't like spaces in filenames.
    if not filename:
        raise ValueError("empty filename")
    return filename
 def write_symlinks(api: PaperLibraryAPI):
    ...
    pdf_dir = basedir / "pdfs"
    pdf_dir.mkdir(exist_ok=True)
    author_dir = basedir / "by_author"
-    shutil.rmtree(author_dir, ignore_errors=True)
+    keyword_dir = basedir / "by_keyword"
-    author_dir.mkdir()
+    year_dir = basedir / "by_year"
    title_dir = basedir / "by_title"
    custom_title_dir = basedir / "by_custom_title"
    for directory in [author_dir, keyword_dir, year_dir, title_dir, custom_title_dir]:
        shutil.rmtree(directory, ignore_errors=True)
        directory.mkdir()
    for author in api.fetch_authors():
-        author_subdir = author_dir / format_filename(author.name)
+        author_subdir = author_dir / format_filename(author.display_name)
        author_subdir.mkdir()
        for paper in author.papers:
            if not paper.main_pdf:
                continue
            sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
            targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
            targetfile.symlink_to(sourcefile)
    for keyword in api.fetch_keywords():
        keyword_subdir = keyword_dir / format_filename(keyword.name)
        keyword_subdir.mkdir()
        for paper in keyword.papers:
            if not paper.main_pdf:
                continue
            sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
            targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title))
            targetfile.symlink_to(sourcefile)
    for paper in api.fetch_papers():
        if not paper.main_pdf:
            continue
        sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
        targetfile = title_dir / "{}.pdf".format(format_filename(paper.title))
        targetfile.symlink_to(sourcefile)
        if not paper.note:
            continue
        sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
        targetfile = custom_title_dir / "{}.pdf".format(format_filename(paper.note.custom_title))
        targetfile.symlink_to(sourcefile)
    for year, papers in api.fetch_papers_by_year().items():
        year_subdir = year_dir / str(year)
        year_subdir.mkdir()
        for paper in papers:
            if not paper.main_pdf:
                continue
            sourcefile = pdf_dir / f"{paper.main_pdf.id}.pdf"
            targetfile = year_subdir / "{}.pdf".format(format_filename(paper.title))
            targetfile.symlink_to(sourcefile)
 def download_file(api: PaperLibraryAPI, url: str, target_file: Path):
    r = api.s.get(url)
    r.raise_for_status()
    with alive_bar(int(r.headers["Content-Length"])) as bar:
        with target_file.open("wb") as f:
            for chunk in r.iter_content(1024):
                for _ in range(1024):
                    bar()
                f.write(chunk)
 def hash_file(file: Path, buffer_size=65536) -> str:
-    sha265 = hashlib.sha256()
+    sha256 = hashlib.sha256()
    with file.open("rb") as f:
        while True:
            data = f.read(buffer_size)
            if not data:
                break
-            sha265.update(data)
+            sha256.update(data)
-    return sha265.hexdigest()
+    return sha256.hexdigest()
 def update_pdfs(api: PaperLibraryAPI):
@ -63,16 +111,14 @@ def update_pdfs(api: PaperLibraryAPI):
        if not pdf_file.exists():
            download_file(api, pdf.file, pdf_file)
            continue
-        if hash_file(pdf_file) != pdf.sha265:
+        if hash_file(pdf_file) != pdf.sha256:
            modification_date = datetime.fromtimestamp(
                os.path.getmtime(pdf_file),
                get_localzone()
            )
            print(modification_date)
            print(pdf.updated_at)
            # print(modification_date - pdf.updated_at)
            if modification_date > pdf.updated_at:
-                raise ValueError("local file is newer")
+                print("local file is newer")
                api.upload_pdf(pdf, pdf_file)
            else:
-                raise ValueError("remote file is newer")
+                print("remote file is newer")
-            # TODO: check if file should be uploaded or downloaded
+                download_file(api, pdf.file, pdf_file)
--- a/paperlibrary/pap.py
+++ b/paperlibrary/pap.py
@ -9,12 +9,20 @@ from paperlibrary.library import write_symlinks, update_pdfs
 def cli():
    pass
@cli.command()
 def update():
-    api=PaperLibraryAPI(url,auth_token=auth_token)
+    api = PaperLibraryAPI(url, auth_token=auth_token)
    write_symlinks(api)
    update_pdfs(api)
@cli.command()
 def test():
    api = PaperLibraryAPI(url, auth_token=auth_token)
    print(api.fetch_papers())
 if __name__ == '__main__':
    cli()
--- a/poetry.lock
+++ b/poetry.lock
@ -1,3 +1,11 @@
 [[package]]
 name = "alive-progress"
 version = "1.6.1"
 description = "A new kind of Progress Bar, with real-time throughput, eta and very cool animations!"
 category = "main"
 optional = false
 python-versions = ">=2.7, <4"
 [[package]]
 name = "certifi"
 version = "2020.6.20"
@ -161,9 +169,13 @@ socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.8"
-content-hash = "55c1b4123bca6c85380c21f1e6f5faa129bb1171aa2c9a45497e5916e8c4ac2e"
+content-hash = "8bb7dbfcc0d218d22ff3dcafae140daff4799bfeb988731a3759412a256d0e29"
 [metadata.files]
 alive-progress = [
    {file = "alive-progress-1.6.1.tar.gz", hash = "sha256:2a0d7516ec0f596d5ce53755c0913a909eb1c91854e1d782e511ef5e1dd53218"},
    {file = "alive_progress-1.6.1-py3-none-any.whl", hash = "sha256:9a0fae6b94fb4e4bcd9fb51760506d29a33358ebbfef2c6516dce3e359a661b5"},
 ]
 certifi = [
    {file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"},
    {file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"},
--- a/pyproject.toml
+++ b/pyproject.toml
@ -10,6 +10,7 @@ requests = "^2.24.0"
 click = "^7.1.2"
 dataclasses-json = "^0.5.2"
 tzlocal = "^2.1"
 alive-progress = "^1.6.1"
 [build-system]
 requires = ["poetry-core>=1.0.0"]