diff --git a/stopwords.py b/stopwords.py index 8a7c178..24aca87 100644 --- a/stopwords.py +++ b/stopwords.py @@ -1,3 +1,8 @@ +""" +from https://github.com/explosion/spaCy/blob/master/spacy/lang/en/stop_words.py + +without contractions +""" STOP_WORDS = set( """ a about above across after afterwards again against all almost alone along diff --git a/typo.py b/typo.py index fea36a8..88ef82c 100644 --- a/typo.py +++ b/typo.py @@ -1,3 +1,7 @@ +""" +replace common typos of names to unify them in the database +""" + typos = { "Matt": {"Mat", "Mattt", "\"Matt"}, "Sam": {"San", "Nott", "Sma", "Sasm", "Sm"},