mirror of
https://github.com/Findus23/pyLanguagetool.git
synced 2024-09-09 04:13:46 +02:00
Add private wordlist features and spellcheck docs
This commit is contained in:
parent
93ed83e138
commit
fe9bd06522
7 changed files with 58 additions and 11 deletions
|
@ -14,7 +14,9 @@ tests:
|
|||
|
||||
docs:
|
||||
stage: docs
|
||||
script: python setup.py build_sphinx -W
|
||||
script:
|
||||
- python setup.py build_sphinx -W
|
||||
- pylanguagetool --pwl spelling.txt --disabled-rules=WHITESPACE_RULE README.rst
|
||||
|
||||
check:
|
||||
stage: check
|
||||
|
@ -31,4 +33,5 @@ bdist:
|
|||
stages:
|
||||
- check
|
||||
- tests
|
||||
- docs
|
||||
- build
|
||||
|
|
|
@ -18,3 +18,4 @@ script:
|
|||
- python -m pytest
|
||||
- python setup.py check -ms
|
||||
- python setup.py build_sphinx -W
|
||||
- pylanguagetool --pwl spelling.txt --disabled-rules=WHITESPACE_RULE README.rst
|
||||
|
|
25
README.rst
25
README.rst
|
@ -6,7 +6,7 @@ pyLanguagetool
|
|||
A python library and CLI for the LanguageTool_ `JSON API`_.
|
||||
|
||||
LanguageTool_ is an open source spellchecking platform. It supports a large
|
||||
variety of
|
||||
variety of languages and has advanced grammar support.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
@ -48,9 +48,10 @@ replacements.
|
|||
|
||||
Configuration
|
||||
-------------
|
||||
All `Languagetool API`_ parameters can be set via commandline arguments,
|
||||
environment variables or a configuration file (~/.config/pyLanguagetool.conf)
|
||||
For more information about the configuration file syntax, read the `ConfigArgParse documentation`_
|
||||
All `LanguageTool API`_ parameters can be set via command line arguments,
|
||||
environment variables or a configuration file
|
||||
(``~/.config/pyLanguagetool.conf``) For more information about the
|
||||
configuration file syntax, read the `ConfigArgParse documentation`_.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -64,7 +65,7 @@ Parameters
|
|||
[-e ENABLED_RULES] [-d DISABLED_RULES]
|
||||
[--enabled-categories ENABLED_CATEGORIES]
|
||||
[--disabled-categories DISABLED_CATEGORIES]
|
||||
[--enabled-only]
|
||||
[--enabled-only] [--pwl PWL]
|
||||
[input file]
|
||||
|
||||
Args that start with '--' (eg. -v) can also be set in a config file
|
||||
|
@ -121,6 +122,13 @@ Parameters
|
|||
--enabled-only enable only the rules and categories whose IDs are
|
||||
specified with --enabled-rules or --enabled-categories
|
||||
|
||||
--pwl PWL, --personal-word-list PWL
|
||||
File name of personal dictionary. A private dictionary
|
||||
can be used to add special words that would otherwise
|
||||
be marked as spelling errors. [env var:
|
||||
PERSONAL_WORD_LIST]
|
||||
|
||||
|
||||
|
||||
.. |license| image:: https://img.shields.io/badge/license-MIT-blue.svg
|
||||
:target: https://raw.githubusercontent.com/Findus23/pyLanguagetool/master/LICENSE
|
||||
|
@ -135,14 +143,15 @@ Parameters
|
|||
Privacy
|
||||
-------
|
||||
|
||||
By default pyLangugagetool sends all text via HTTPS to the languagetool.org server (see their `privacy policy`_).
|
||||
You can also `setup your own server`_ and use it by changing --api-url.
|
||||
By default pyLanguagetool sends all text via HTTPS to the `LanguageTool`_
|
||||
server (see their `privacy policy`_). You can also `setup your own server`_ and
|
||||
use it by changing the ``--api-url`` attribute.
|
||||
|
||||
.. _LanguageTool: https://languagetool.org/
|
||||
|
||||
.. _JSON API: https://languagetool.org/http-api/swagger-ui/#/default
|
||||
|
||||
.. _Languagetool API: https://languagetool.org/http-api/swagger-ui/#/default
|
||||
.. _LanguageTool API: https://languagetool.org/http-api/swagger-ui/#/default
|
||||
|
||||
.. _ConfigArgParse documentation: https://github.com/bw2/ConfigArgParse#config-file-syntax
|
||||
|
||||
|
|
|
@ -49,6 +49,13 @@ def init_config():
|
|||
p.add_argument("--enabled-only", action='store_true', default=False,
|
||||
help="enable only the rules and categories whose IDs are specified with --enabled-rules or --enabled-categories"
|
||||
)
|
||||
p.add_argument(
|
||||
'--pwl', '--personal-word-list',
|
||||
env_var='PERSONAL_WORD_LIST', help=(
|
||||
'File name of personal dictionary. A private dictionary'
|
||||
' can be used to add special words that would otherwise'
|
||||
' be marked as spelling errors.'
|
||||
))
|
||||
|
||||
c = vars(p.parse_args())
|
||||
if c["enabled_only"] and (c["disabled_categories"] or c["disabled_rules"]):
|
||||
|
@ -166,6 +173,10 @@ def main():
|
|||
if config["verbose"]:
|
||||
print(sys.version)
|
||||
|
||||
if config['pwl']:
|
||||
with open(config['pwl'], 'r') as fs:
|
||||
config['pwl'] = [w.strip() for w in fs.readlines()]
|
||||
|
||||
input_text, inputtype = get_input_text(config)
|
||||
if not inputtype:
|
||||
inputtype = config["input_type"]
|
||||
|
|
|
@ -41,10 +41,18 @@ def get_languages(api_url):
|
|||
return r.json()
|
||||
|
||||
|
||||
def _is_in_pwl(match, pwl):
|
||||
start = match['context']['offset']
|
||||
end = start + match['context']['length']
|
||||
word = match['context']['text'][start:end]
|
||||
return word in pwl
|
||||
|
||||
|
||||
def check(input_text, api_url, lang, mother_tongue=None, preferred_variants=None,
|
||||
enabled_rules=None, disabled_rules=None,
|
||||
enabled_categories=None, disabled_categories=None,
|
||||
enabled_only=False, verbose=False,
|
||||
pwl=None,
|
||||
**kwargs):
|
||||
"""
|
||||
Check given text and return API response as a dictionary.
|
||||
|
@ -96,6 +104,10 @@ def check(input_text, api_url, lang, mother_tongue=None, preferred_variants=None
|
|||
If ``True``, a more verbose output will be printed. Defaults to
|
||||
``False``.
|
||||
|
||||
pwl (List[str]):
|
||||
Personal world list. A custom dictionary of words that should be
|
||||
excluded from spell checking errors.
|
||||
|
||||
Returns:
|
||||
dict:
|
||||
A dictionary representation of the JSON API response.
|
||||
|
@ -157,10 +169,18 @@ def check(input_text, api_url, lang, mother_tongue=None, preferred_variants=None
|
|||
post_parameters["disabledCategories"] = disabled_categories
|
||||
if enabled_only:
|
||||
post_parameters["enabledOnly"] = True
|
||||
|
||||
r = requests.post(api_url + "check", data=post_parameters)
|
||||
if r.status_code != 200:
|
||||
raise ValueError(r.text)
|
||||
if verbose:
|
||||
print(post_parameters)
|
||||
print(r.json())
|
||||
return r.json()
|
||||
data = r.json()
|
||||
if pwl:
|
||||
matches = data.pop('matches', [])
|
||||
data['matches'] = [
|
||||
match for match in matches
|
||||
if not _is_in_pwl(match, pwl)
|
||||
]
|
||||
return data
|
||||
|
|
|
@ -29,7 +29,7 @@ def html2text(html):
|
|||
sys.exit(4)
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
# remove scripts from html
|
||||
for script in soup(["script", "style", "code", "pre"]):
|
||||
for script in soup(["script", "style", "code", "pre"]) + soup("span", {"class": "literal"}):
|
||||
script.extract()
|
||||
return soup.get_text()
|
||||
|
||||
|
|
3
spelling.txt
Normal file
3
spelling.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
pyLanguagetool
|
||||
pipenv
|
||||
ConfigArgParse
|
Loading…
Reference in a new issue