1
0
Fork 0

first kind of working library version

This commit is contained in:
Lukas Winkler 2018-12-20 12:55:48 +01:00
parent 61a247fa56
commit a588fc9835
3 changed files with 64 additions and 4 deletions

3
.gitignore vendored
View file

@ -3,4 +3,5 @@ library/
storage.*
.idea/
*.egg-info
__pycache__/
__pycache__/
browse/

46
create_library.py Normal file
View file

@ -0,0 +1,46 @@
import pathlib
import shutil
import string
from models import *
def format_filename(s: str) -> str:
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
filename = ''.join(c for c in s if c in valid_chars)
# filename = filename.replace(' ', '_') # I don't like spaces in filenames.
return filename
def create_library(librarydir: pathlib.Path, browsedir: pathlib.Path):
shutil.rmtree(browsedir)
browsedir.mkdir()
print("title")
title_dir = browsedir / "title"
title_dir.mkdir(exist_ok=True)
for paper in Paper.select():
sourcefile = librarydir / "{}.pdf".format(paper.id)
targetfile = title_dir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
print("author")
author_dir = browsedir / "authors"
author_dir.mkdir(exist_ok=True)
for author in Author.select():
author_subdir = author_dir / format_filename(author.name)
author_subdir.mkdir()
for paper in Paper.select().join(PaperAuthors).where(PaperAuthors.author == author):
sourcefile = librarydir / "{}.pdf".format(paper.id)
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)
print("keywords")
keywords_dir = browsedir / "keywords"
keywords_dir.mkdir(exist_ok=True)
for keyword in Keyword.select():
keyword_subdir = keywords_dir / format_filename(keyword.keyword)
keyword_subdir.mkdir()
for paper in Paper.select().join(PaperKeywords).where(PaperKeywords.keyword == keyword):
sourcefile = librarydir / "{}.pdf".format(paper.id)
targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title))
targetfile.symlink_to(sourcefile)

19
main.py
View file

@ -1,4 +1,6 @@
import json
import math
import pathlib
import ads
import ads.config
@ -8,6 +10,7 @@ import requests
from peewee import Model
import config
from create_library import create_library
from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db
ads.config.token = config.ads_token
@ -45,7 +48,7 @@ def init():
@click.option("-t", "--title")
def add(search_query, author, title):
fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
"doctype", "identifier"]
"keyword", "doctype", "identifier", "links_data"]
if author:
search_query += "author:" + author
if title:
@ -61,7 +64,7 @@ def add(search_query, author, title):
first_ten = papers[:10]
single_paper: ads.search.Article
for index, single_paper in enumerate(first_ten):
print(index, single_paper.title[0])
print(index, single_paper.title[0],single_paper.first_author)
selected_index = click.prompt('select paper', type=int)
selection = papers[selected_index] # type:ads.search.Article
@ -92,15 +95,20 @@ def add(search_query, author, title):
paper.year = selection.year
paper.pubdate = selection.pubdate
paper.pdf_downloaded = False
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
paper.first_author = Author.get_or_create(name=selection.first_author)[0]
paper.publication = Publication.get_or_create(name=selection.pub)[0]
paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
paper.bibtex = bibtex
links = [json.loads(string) for string in selection.links_data]
print(links)
paper.save()
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
for author in db.batch_commit(authors, 100):
PaperAuthors.create(author=author, paper=paper)
keywords = [Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword]
for keyword in db.batch_commit(keywords, 100):
PaperKeywords.create(keyword=keyword, paper=paper)
print("fetching PDF")
arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
r = requests.get(arxiv_url, stream=True)
@ -116,5 +124,10 @@ def add(search_query, author, title):
paper.save()
@cli.command()
def update():
create_library(pathlib.Path('./library').resolve(), pathlib.Path('./browse').resolve())
if __name__ == '__main__':
cli()