diff --git a/.gitignore b/.gitignore index 21d1217..3dff898 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ library/ storage.* .idea/ *.egg-info -__pycache__/ \ No newline at end of file +__pycache__/ +browse/ diff --git a/create_library.py b/create_library.py new file mode 100644 index 0000000..51c92a4 --- /dev/null +++ b/create_library.py @@ -0,0 +1,46 @@ +import pathlib +import shutil +import string + +from models import * + + +def format_filename(s: str) -> str: + valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) + filename = ''.join(c for c in s if c in valid_chars) + # filename = filename.replace(' ', '_') # I don't like spaces in filenames. + return filename + + +def create_library(librarydir: pathlib.Path, browsedir: pathlib.Path): + shutil.rmtree(browsedir) + browsedir.mkdir() + print("title") + title_dir = browsedir / "title" + title_dir.mkdir(exist_ok=True) + for paper in Paper.select(): + sourcefile = librarydir / "{}.pdf".format(paper.id) + targetfile = title_dir / "{}.pdf".format(format_filename(paper.title)) + targetfile.symlink_to(sourcefile) + + print("author") + author_dir = browsedir / "authors" + author_dir.mkdir(exist_ok=True) + for author in Author.select(): + author_subdir = author_dir / format_filename(author.name) + author_subdir.mkdir() + for paper in Paper.select().join(PaperAuthors).where(PaperAuthors.author == author): + sourcefile = librarydir / "{}.pdf".format(paper.id) + targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title)) + targetfile.symlink_to(sourcefile) + + print("keywords") + keywords_dir = browsedir / "keywords" + keywords_dir.mkdir(exist_ok=True) + for keyword in Keyword.select(): + keyword_subdir = keywords_dir / format_filename(keyword.keyword) + keyword_subdir.mkdir() + for paper in Paper.select().join(PaperKeywords).where(PaperKeywords.keyword == keyword): + sourcefile = librarydir / "{}.pdf".format(paper.id) + targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title)) + targetfile.symlink_to(sourcefile) diff --git a/main.py b/main.py index d7c6dd6..9aaac81 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,6 @@ +import json import math +import pathlib import ads import ads.config @@ -8,6 +10,7 @@ import requests from peewee import Model import config +from create_library import create_library from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db ads.config.token = config.ads_token @@ -45,7 +48,7 @@ def init(): @click.option("-t", "--title") def add(search_query, author, title): fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub", - "doctype", "identifier"] + "keyword", "doctype", "identifier", "links_data"] if author: search_query += "author:" + author if title: @@ -61,7 +64,7 @@ def add(search_query, author, title): first_ten = papers[:10] single_paper: ads.search.Article for index, single_paper in enumerate(first_ten): - print(index, single_paper.title[0]) + print(index, single_paper.title[0],single_paper.first_author) selected_index = click.prompt('select paper', type=int) selection = papers[selected_index] # type:ads.search.Article @@ -92,15 +95,20 @@ def add(search_query, author, title): paper.year = selection.year paper.pubdate = selection.pubdate paper.pdf_downloaded = False - authors = [Author.get_or_create(name=name)[0] for name in selection.author] paper.first_author = Author.get_or_create(name=selection.first_author)[0] paper.publication = Publication.get_or_create(name=selection.pub)[0] paper.doctype = Doctype.get_or_create(name=selection.doctype)[0] paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1] paper.bibtex = bibtex + links = [json.loads(string) for string in selection.links_data] + print(links) paper.save() + authors = [Author.get_or_create(name=name)[0] for name in selection.author] for author in db.batch_commit(authors, 100): PaperAuthors.create(author=author, paper=paper) + keywords = [Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword] + for keyword in db.batch_commit(keywords, 100): + PaperKeywords.create(keyword=keyword, paper=paper) print("fetching PDF") arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier) r = requests.get(arxiv_url, stream=True) @@ -116,5 +124,10 @@ def add(search_query, author, title): paper.save() +@cli.command() +def update(): + create_library(pathlib.Path('./library').resolve(), pathlib.Path('./browse').resolve()) + + if __name__ == '__main__': cli()