mdevolde
diff --git a/‎language_tool_python/__main__.py
Lines changed: 57 additions & 2 deletions b/‎language_tool_python/__main__.py
Lines changed: 57 additions & 2 deletions
diff --git a/‎language_tool_python/config_file.py
Lines changed: 23 additions & 1 deletion b/‎language_tool_python/config_file.py
Lines changed: 23 additions & 1 deletion
diff --git a/‎language_tool_python/download_lt.py
Lines changed: 70 additions & 19 deletions b/‎language_tool_python/download_lt.py
Lines changed: 70 additions & 19 deletions
diff --git a/‎language_tool_python/language_tag.py
Lines changed: 55 additions & 2 deletions b/‎language_tool_python/language_tag.py
Lines changed: 55 additions & 2 deletions
@@ -13,12 +13,18 @@
 
 try:
     __version__ = version("language_tool_python")
-except PackageNotFoundError:
+except PackageNotFoundError: # If the package is not installed in the environment, read the version from pyproject.toml
     with open("pyproject.toml", "rb") as f:
         __version__ = toml.loads(f.read().decode('utf-8'))["project"]["version"]
 
 
 def parse_args() -> argparse.Namespace:
+    """
+    Parse command line arguments.
+
+    :return: parsed arguments
+    :rtype: argparse.Namespace
+    """
     parser = argparse.ArgumentParser(
         description=__doc__.strip() if __doc__ else None,
         prog='language_tool_python')
@@ -70,22 +76,71 @@ def parse_args() -> argparse.Namespace:
 
 
 class RulesAction(argparse.Action):
-    def __call__(self, parser: argparse.ArgumentParser, namespace: Any, values: Any, option_string: Optional[str] = None) -> str:
+    """
+    Custom argparse action to update a set of rules in the namespace.
+    This action is used to modify the set of rules stored in the argparse
+    namespace when the action is triggered. It updates the attribute specified
+    by 'self.dest' with the provided values.
+
+    Attributes:
+        dest (str): the destination attribute to update
+    """
+    def __call__(self, parser: argparse.ArgumentParser, namespace: Any, values: Any, option_string: Optional[str] = None):
+        """
+        This method is called when the action is triggered. It updates the set of rules
+        in the namespace with the provided values. The method is invoked automatically
+        by argparse when the corresponding command-line argument is encountered.
+
+        :param parser: The ArgumentParser object which contains this action.
+        :type parser: argparse.ArgumentParser
+        :param namespace: The namespace object that will be returned by parse_args().
+        :type namespace: Any
+        :param values: The argument values associated with the action.
+        :type values: Any
+        :param option_string: The option string that was used to invoke this action.
+        :type option_string: Optional[str]
+        """
         getattr(namespace, self.dest).update(values)
 
 
 def get_rules(rules: str) -> Set[str]:
+    """
+    Parse a string of rules and return a set of rule IDs.
+
+    :param rules: A string containing rule IDs separated by non-word characters.
+    :type rules: str
+    :return: A set of rule IDs.
+    :rtype: Set[str]
+    """
     return {rule.upper() for rule in re.findall(r"[\w\-]+", rules)}
 
 
 def get_text(filename: Union[str, int], encoding: Optional[str], ignore: Optional[str]) -> str:
+    """
+    Read the content of a file and return it as a string, optionally ignoring lines that match a regular expression.
+
+    :param filename: The name of the file to read or file descriptor.
+    :type filename: Union[str, int]
+    :param encoding: The encoding to use for reading the file.
+    :type encoding: Optional[str]
+    :param ignore: A regular expression pattern to match lines that should be ignored.
+    :type ignore: Optional[str]
+    :return: The content of the file as a string.
+    :rtype: str
+    """
     with open(filename, encoding=encoding) as f:
         text = ''.join('\n' if (ignore and re.match(ignore, line)) else line
                        for line in f.readlines())
     return text
 
 
 def main() -> int:
+    """
+    Main function to parse arguments, process files, and check text using LanguageTool.
+
+    :return: Exit status code
+    :rtype: int
+    """
     args = parse_args()
 
     status = 0
 
@@ -4,6 +4,7 @@
 import os
 import tempfile
 
+# Allowed configuration keys for LanguageTool.
 ALLOWED_CONFIG_KEYS = { 
     'maxTextLength', 'maxTextHardLength', 'maxCheckTimeMillis', 'maxErrorsPerWordRate',
     'maxSpellingSuggestions', 'maxCheckThreads', 'cacheSize', 'cacheTTLSeconds', 'requestLimit',
@@ -12,10 +13,25 @@
     'blockedReferrers', 'premiumOnly', 'disabledRuleIds', 'pipelineCaching', 'maxPipelinePoolSize',
     'pipelineExpireTimeInSeconds', 'pipelinePrewarming'
 }
+
 class LanguageToolConfig:
+    """
+    Configuration class for LanguageTool.
+
+    :param config: Dictionary containing configuration keys and values.
+    :type config: Dict[str, Any]
+
+    Attributes:
+        config (Dict[str, Any]): Dictionary containing configuration keys and values.
+        path (str): Path to the temporary file storing the configuration.
+    """
     config: Dict[str, Any]
     path: str
+
     def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize the LanguageToolConfig object.
+        """
         assert set(config.keys()) <= ALLOWED_CONFIG_KEYS, f"unexpected keys in config: {set(config.keys()) - ALLOWED_CONFIG_KEYS}"
         assert len(config), "config cannot be empty"
         self.config = config
@@ -31,9 +47,15 @@ def __init__(self, config: Dict[str, Any]):
         self.path = self._create_temp_file()
 
     def _create_temp_file(self) -> str:
+        """
+        Create a temporary file to store the configuration.
+
+        :return: Path to the temporary file.
+        :rtype: str
+        """
         tmp_file = tempfile.NamedTemporaryFile(delete=False)
 
-        # WRite key=value entries as lines in temporary file.
+        # Write key=value entries as lines in temporary file.
         for key, value in self.config.items():
             next_line = f'{key}={value}\n'
             tmp_file.write(next_line.encode())
 
@@ -3,7 +3,6 @@
 import re
 import requests
 import subprocess
-import sys
 import tempfile
 import tqdm
 from typing import IO, Dict, Optional, Tuple
@@ -39,21 +38,18 @@
     re.MULTILINE)
 
 def parse_java_version(version_text: str) -> Tuple[int, int]:
-    """Return Java version (major1, major2).
-
-    >>> parse_java_version('''java version "1.6.0_65"
-    ... Java(TM) SE Runtime Environment (build 1.6.0_65-b14-462-11M4609)
-    ... Java HotSpot(TM) 64-Bit Server VM (build 20.65-b04-462, mixed mode))
-    ... ''')
-    (1, 6)
+    """
+    Parse the Java version from a given version text.
 
-    >>> parse_java_version('''
-    ... openjdk version "1.8.0_60"
-    ... OpenJDK Runtime Environment (build 1.8.0_60-b27)
-    ... OpenJDK 64-Bit Server VM (build 25.60-b23, mixed mode))
-    ... ''')
-    (1, 8)
+    This function attempts to extract the major version numbers from the provided
+    Java version string using regular expressions. It supports two different
+    version formats defined by JAVA_VERSION_REGEX and JAVA_VERSION_REGEX_UPDATED.
 
+    :param version_text: The Java version string to parse.
+    :type version_text: str
+    :return: A tuple containing the major version numbers.
+    :rtype: Tuple[int, int]
+    :raises SystemExit: If the version string cannot be parsed.
     """
     match = (
         re.search(JAVA_VERSION_REGEX, version_text)
@@ -67,7 +63,17 @@ def parse_java_version(version_text: str) -> Tuple[int, int]:
 
 
 def confirm_java_compatibility() -> bool:
-    """ Confirms Java major version >= 8. """
+    """
+    Confirms if the installed Java version is compatible with language-tool-python.
+    This function checks if Java is installed and verifies that the major version is at least 8.
+    It raises an error if Java is not installed or if the version is incompatible.
+
+    :raises ModuleNotFoundError: If no Java installation is detected.
+    :raises SystemError: If the detected Java version is less than 8.
+    :return: True if the Java version is compatible.
+    :rtype: bool
+    """
+    
     java_path = which('java')
     if not java_path:
         raise ModuleNotFoundError(
@@ -94,15 +100,32 @@ def confirm_java_compatibility() -> bool:
 
 
 def get_common_prefix(z: zipfile.ZipFile) -> Optional[str]:
-    """Get common directory in a zip file if any."""
+    """
+    Determine the common prefix of all file names in a zip archive.
+
+    :param z: A ZipFile object representing the zip archive.
+    :type z: zipfile.ZipFile
+    :return: The common prefix of all file names in the zip archive, or None if there is no common prefix.
+    :rtype: Optional[str]
+    """
+    
     name_list = z.namelist()
     if name_list and all(n.startswith(name_list[0]) for n in name_list[1:]):
         return name_list[0]
     return None
 
 
 def http_get(url: str, out_file: IO[bytes], proxies: Optional[Dict[str, str]] = None) -> None:
-    """ Get contents of a URL and save to a file.
+    """
+    Downloads a file from a given URL and writes it to the specified output file.
+
+    :param url: The URL to download the file from.
+    :type url: str
+    :param out_file: The file object to write the downloaded content to.
+    :type out_file: IO[bytes]
+    :param proxies: Optional dictionary of proxies to use for the request.
+    :type proxies: Optional[Dict[str, str]]
+    :raises Exception: If the file could not be found at the given URL (HTTP 403).
     """
     req = requests.get(url, stream=True, proxies=proxies)
     content_length = req.headers.get('Content-Length')
@@ -120,14 +143,29 @@ def http_get(url: str, out_file: IO[bytes], proxies: Optional[Dict[str, str]] =
 
 
 def unzip_file(temp_file: str, directory_to_extract_to: str) -> None:
-    """ Unzips a .zip file to folder path. """
+    """
+    Unzips a zip file to a specified directory.
+
+    :param temp_file: A temporary file object representing the zip file to be extracted.
+    :type temp_file: str
+    :param directory_to_extract_to: The directory where the contents of the zip file will be extracted.
+    :type directory_to_extract_to: str
+    """
+    
     logger.info(f'Unzipping {temp_file.name} to {directory_to_extract_to}.')
     with zipfile.ZipFile(temp_file.name, 'r') as zip_ref:
         zip_ref.extractall(directory_to_extract_to)
 
 
 def download_zip(url: str, directory: str) -> None:
-    """ Downloads and unzips zip file from `url` to `directory`. """
+    """
+    Downloads a ZIP file from the given URL and extracts it to the specified directory.
+
+    :param url: The URL of the ZIP file to download.
+    :type url: str
+    :param directory: The directory where the ZIP file should be extracted.
+    :type directory: str
+    """
     # Download file.
     downloaded_file = tempfile.NamedTemporaryFile(suffix='.zip', delete=False)
     http_get(url, downloaded_file)
@@ -142,6 +180,19 @@ def download_zip(url: str, directory: str) -> None:
 
 
 def download_lt(language_tool_version: Optional[str] = LTP_DOWNLOAD_VERSION) -> None:
+    """
+    Downloads and extracts the specified version of LanguageTool.
+    This function checks for Java compatibility, creates the necessary download
+    directory if it does not exist, and downloads the specified version of 
+    LanguageTool if it is not already present.
+    
+    :param language_tool_version: The version of LanguageTool to download. If not 
+                                  specified, the default version defined by 
+                                  LTP_DOWNLOAD_VERSION is used.
+    :type language_tool_version: Optional[str]
+    :raises AssertionError: If the download folder is not a directory.
+    """
+
     confirm_java_compatibility()
 
     download_folder = get_language_tool_download_path()
 
@@ -4,27 +4,80 @@
 
 @total_ordering
 class LanguageTag:
-    """Language tag supported by LanguageTool."""
+    """
+    A class to represent and normalize language tags.
+
+    :param tag: The language tag.
+    :type tag: str
+    :param languages: An iterable of supported language tags.
+    :type languages: Iterable[str]
+
+    Attributes:
+        tag (str): The language tag to be normalized.
+        languages (Iterable[str]): An iterable of supported language tags.
+        normalized_tag (str): The normalized language tag.
+        _LANGUAGE_RE (re.Pattern): A regular expression to match language tags. 
+    """
     _LANGUAGE_RE = re.compile(r"^([a-z]{2,3})(?:[_-]([a-z]{2}))?$", re.I)
 
     def __init__(self, tag: str, languages: Iterable[str]) -> None:
+        """
+        Initialize a LanguageTag instance.
+        """
         self.tag = tag
         self.languages = languages
         self.normalized_tag = self._normalize(tag)
 
     def __eq__(self, other: Any) -> bool:
-        return self.normalized_tag == self._normalize(other_tag)
+        """
+        Compare this LanguageTag object with another for equality.
+
+        :param other: The other object to compare with.
+        :type other: Any
+        :return: True if the normalized tags are equal, False otherwise.
+        :rtype: bool
+        """
+        return self.normalized_tag == self._normalize(other)
 
     def __lt__(self, other: Any) -> bool:
+        """
+        Compare this object with another for less-than ordering.
+
+        :param other: The object to compare with.
+        :type other: Any
+        :return: True if this object is less than the other, False otherwise.
+        :rtype: bool
+        """
         return str(self) < self._normalize(other)
 
     def __str__(self) -> str:
+        """
+        Returns the string representation of the object.
+
+        :return: The normalized tag as a string.
+        :rtype: str
+        """
         return self.normalized_tag
 
     def __repr__(self) -> str:
+        """
+        Return a string representation of the LanguageTag instance.
+
+        :return: A string in the format '<LanguageTag "language_tag_string">'
+        :rtype: str
+        """
         return f'<LanguageTag "{str(self)}">'
 
     def _normalize(self, tag: str) -> str:
+        """
+        Normalize a language tag to a standard format.
+
+        :param tag: The language tag to normalize.
+        :type tag: str
+        :raises ValueError: If the tag is empty or unsupported.
+        :return: The normalized language tag.
+        :rtype: str
+        """
         if not tag:
             raise ValueError('empty language tag')
         languages = {language.lower().replace('-', '_'): language