From 92ce4c3859324d30ad60a550301ac56e2ba0110c Mon Sep 17 00:00:00 2001 From: Lukas Winkler Date: Sat, 7 Mar 2020 10:51:44 +0100 Subject: [PATCH] add comments --- stopwords.py | 5 +++++ typo.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/stopwords.py b/stopwords.py index 8a7c178..24aca87 100644 --- a/stopwords.py +++ b/stopwords.py @@ -1,3 +1,8 @@ +""" +from https://github.com/explosion/spaCy/blob/master/spacy/lang/en/stop_words.py + +without contractions +""" STOP_WORDS = set( """ a about above across after afterwards again against all almost alone along diff --git a/typo.py b/typo.py index fea36a8..88ef82c 100644 --- a/typo.py +++ b/typo.py @@ -1,3 +1,7 @@ +""" +replace common typos of names to unify them in the database +""" + typos = { "Matt": {"Mat", "Mattt", "\"Matt"}, "Sam": {"San", "Nott", "Sma", "Sasm", "Sm"},