opensearch-project · dblock · Jan 19, 2024 · Dec 19, 2023 · Dec 22, 2023 · Dec 22, 2023
@@ -3,6 +3,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 
 ## [Unreleased]
 ### Added
+- Added pylint `unspecified-encoding` and `missing-function-docstring` and ignored opensearchpy for lints (([#643](https://github.com/opensearch-project/opensearch-py/pull/643)))
 - Added pylint `line-too-long` and `invalid-name` ([#590](https://github.com/opensearch-project/opensearch-py/pull/590))
 - Added pylint `pointless-statement` ([#611](https://github.com/opensearch-project/opensearch-py/pull/611))
 - Added a log collection guide ([#579](https://github.com/opensearch-project/opensearch-py/pull/579))

@@ -36,8 +36,9 @@ async def index_records(client: Any, index_name: str, item_count: int) -> None:
 
 async def test_async(client_count: int = 1, item_count: int = 1) -> None:
     """
-    asynchronously index with item_count records and run client_count clients. This function can be used to
-    test balancing the number of items indexed with the number of documents.
+    asynchronously index with item_count records and run client_count
+    clients. This function can be used to test balancing the number of
+    items indexed with the number of documents.
     """
     host = "localhost"
     port = 9200

@@ -22,13 +22,12 @@
 
 def get_info(client: Any, request_count: int) -> float:
     """get info from client"""
-    tt: float = 0
-    for n in range(request_count):
+    total_time: float = 0
+    for request in range(request_count):
         start = time.time() * 1000
         client.info()
-        total_time = time.time() * 1000 - start
-        tt += total_time
-    return tt
+        total_time += time.time() * 1000 - start
+    return total_time
 
 
 def test(thread_count: int = 1, request_count: int = 1, client_count: int = 1) -> None:
@@ -71,8 +70,8 @@ def test(thread_count: int = 1, request_count: int = 1, client_count: int = 1) -
         thread.start()
 
     latency = 0
-    for t in threads:
-        latency += t.join()
+    for thread in threads:
+        latency += thread.join()
 
     print(f"latency={latency}")
 

@@ -23,29 +23,29 @@
 
 def index_records(client: Any, index_name: str, item_count: int) -> Any:
     """bulk index item_count records into index_name"""
-    tt = 0
-    for n in range(10):
+    total_time = 0
+    for iteration in range(10):
         data: Any = []
-        for i in range(item_count):
+        for item in range(item_count):
             data.append(
                 json.dumps({"index": {"_index": index_name, "_id": str(uuid.uuid4())}})
             )
-            data.append(json.dumps({"value": i}))
+            data.append(json.dumps({"value": item}))
         data = "\n".join(data)
 
         start = time.time() * 1000
-        rc = client.bulk(data)
-        if rc["errors"]:
-            raise Exception(rc["errors"])
+        response = client.bulk(data)
+        if response["errors"]:
+            raise Exception(response["errors"])
 
-        server_time = rc["took"]
-        total_time = time.time() * 1000 - start
+        server_time = response["took"]
+        this_time = time.time() * 1000 - start
 
-        if total_time < server_time:
-            raise Exception(f"total={total_time} < server={server_time}")
+        if this_time < server_time:
+            raise Exception(f"total={this_time} < server={server_time}")
 
-        tt += total_time - server_time
-    return tt
+        total_time += this_time - server_time
+    return total_time
 
 
 def test(thread_count: int = 1, item_count: int = 1, client_count: int = 1) -> None:
@@ -105,8 +105,8 @@ def test(thread_count: int = 1, item_count: int = 1, client_count: int = 1) -> N
         thread.start()
 
     latency = 0
-    for t in threads:
-        latency += t.join()
+    for thread in threads:
+        latency += thread.join()
 
     clients[0].indices.refresh(index=index_name)
     count = clients[0].count(index=index_name)

@@ -25,7 +25,7 @@
 #  under the License.
 
 
-from typing import Any
+from typing import Any, List
 
 import nox
 
@@ -43,6 +43,10 @@
 
 @nox.session(python=["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"])  # type: ignore
 def test(session: Any) -> None:
+    """
+    runs all tests with a fresh python environment using "python setup.py test"
+    :param session: current nox session
+    """
     session.install(".")
     # ensure client can be imported without aiohttp
     session.run("python", "-c", "import opensearchpy\nprint(opensearchpy.OpenSearch())")
@@ -59,6 +63,10 @@ def test(session: Any) -> None:
 
 @nox.session(python=["3.7"])  # type: ignore
 def format(session: Any) -> None:
+    """
+    runs black and isort to format the files accordingly
+    :param session: current nox session
+    """
     session.install(".")
     session.install("black", "isort")
 
@@ -71,6 +79,10 @@ def format(session: Any) -> None:
 
 @nox.session(python=["3.7"])  # type: ignore
 def lint(session: Any) -> None:
+    """
+    runs isort, black, flake8, pylint, and mypy to check the files according to each utility's function
+    :param session: current nox session
+    """
     session.install(
         "flake8",
         "black",
@@ -89,7 +101,9 @@ def lint(session: Any) -> None:
     session.run("isort", "--check", *SOURCE_FILES)
     session.run("black", "--check", *SOURCE_FILES)
     session.run("flake8", *SOURCE_FILES)
-    session.run("pylint", *SOURCE_FILES)
+
+    lint_per_folder(session)
+
     session.run("python", "utils/license_headers.py", "check", *SOURCE_FILES)
 
     # Workaround to make '-r' to still work despite uninstalling aiohttp below.
@@ -108,8 +122,68 @@ def lint(session: Any) -> None:
     session.run("mypy", "--strict", "test_opensearchpy/test_types/sync_types.py")
 
 
+def lint_per_folder(session: Any) -> None:
+    """
+    allows configuration of pylint rules per folder and runs a pylint command for each folder
+    :param session: the current nox session
+    """
+
+    # any paths that should not be run through pylint
+    exclude_path_from_linting: List[str] = []
+
+    # all paths not referenced in override_enable will run these lints
+    default_enable = [
+        "line-too-long",
+        "invalid-name",
+        "pointless-statement",
+        "unspecified-encoding",
+        "missing-function-docstring",
+        "missing-param-doc",
+        "differing-param-doc",
+    ]
+    override_enable = {
+        "test_opensearchpy/": [
+            "line-too-long",
+            # "invalid-name", lots of short functions with one or two character names
+            "pointless-statement",
+            "unspecified-encoding",
+            "missing-param-doc",
+            "differing-param-doc",
+            # "missing-function-docstring", test names usually are, self describing
+        ],
+        "opensearchpy/": [
+            "line-too-long",
+            "invalid-name",
+            "pointless-statement",
+            "unspecified-encoding",
+        ],
+    }
+
+    for source_file in SOURCE_FILES:
+        if source_file in exclude_path_from_linting:
+            continue
+
+        args = [
+            "--disable=all",
+            "--max-line-length=240",
+            "--good-names-rgxs=^[_a-z][_a-z0-9]?$",
+            "--load-plugins",
+            "pylint.extensions.docparams",
+        ]
+        if source_file in override_enable:
+            args.append(f"--enable={','.join(override_enable[source_file])}")
+        else:
+            args.append(f"--enable={','.join(default_enable)}")
+        args.append(source_file)
+        session.run("pylint", *args)
+
+
 @nox.session()  # type: ignore
 def docs(session: Any) -> None:
+    """
+    builds the html documentation for the client
+    :param session: current nox session
+    """
     session.install(".")
     session.install(".[docs]")
     with session.chdir("docs"):
@@ -118,6 +192,10 @@ def docs(session: Any) -> None:
 
 @nox.session()  # type: ignore
 def generate(session: Any) -> None:
+    """
+    generates the base API code
+    :param session: current nox session
+    """
     session.install("-rdev-requirements.txt")
     session.run("python", "utils/generate_api.py")
     format(session)
@@ -35,6 +35,12 @@
 
 
 def to_str(x: Union[str, bytes], encoding: str = "ascii") -> str:
+    """
+    returns x as a string encoded in "encoding" if it is not already a string
+    :param x: the value to convert to a str
+    :param encoding: the encoding to convert to - see https://docs.python.org/3/library/codecs.html#standard-encodings
+    :return: an encoded str
+    """
     if not isinstance(x, str):
         return x.decode(encoding)
     return x

@@ -19,8 +19,8 @@
 
 def main() -> None:
     """
-    demonstrates various functions to operate on the index (e.g. clear different levels of cache, refreshing the
-    index)
+    demonstrates various functions to operate on the index
+    (e.g. clear different levels of cache, refreshing the index)
     """
     # Set up
     client = OpenSearch(

@@ -21,10 +21,13 @@
 
 def main() -> None:
     """
-    connects to a cluster specified in environment variables, creates an index, inserts documents,
+    connects to a cluster specified in environment variables,
+    creates an index, inserts documents,
     searches the index, deletes the document, deletes the index.
-    the environment variables are "ENDPOINT" for the cluster endpoint, AWS_REGION for the region in which the cluster
-    is hosted, and SERVICE to indicate if this is an ES 7.10.2 compatible cluster
+    the environment variables are "ENDPOINT" for the cluster
+    endpoint, AWS_REGION for the region in which the cluster
+    is hosted, and SERVICE to indicate if this is an ES 7.10.2
+    compatible cluster
     """
     # verbose logging
     logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

@@ -21,9 +21,10 @@
 
 def main() -> None:
     """
-    1. connects to an OpenSearch cluster on AWS defined by environment variables (i.e. ENDPOINT - cluster endpoint like
-    my-test-domain.us-east-1.es.amazonaws.com; AWS_REGION like us-east-1, us-west-2; and SERVICE like es which
-    differentiates beteween serverless and the managed service.
+    1. connects to an OpenSearch cluster on AWS defined by environment variables
+    (i.e. ENDPOINT - cluster endpoint like my-test-domain.us-east-1.es.
+    amazonaws.com; AWS_REGION like us-east-1, us-west-2; and SERVICE like es which
+    differentiates between serverless and the managed service.
     2. creates an index called "movies" and adds a single document
     3. queries for that document
     4. deletes the document

@@ -53,7 +53,7 @@ def main() -> None:
         data.append({"index": {"_index": index_name, "_id": i}})
         data.append({"value": i})
 
-    rc = client.bulk(data)
+    rc = client.bulk(data)  # pylint: disable=invalid-name
     if rc["errors"]:
         print("There were errors:")
         for item in rc["items"]:

@@ -18,8 +18,8 @@
 
 def main() -> None:
     """
-    demonstrates how to bulk load data using opensearchpy.helpers including examples of serial, parallel, and streaming
-    bulk load
+    demonstrates how to bulk load data using opensearchpy.helpers
+    including examples of serial, parallel, and streaming bulk load
     """
     # connect to an instance of OpenSearch
 
@@ -56,7 +56,7 @@ def main() -> None:
         data.append({"_index": index_name, "_id": i, "value": i})
 
     # serialized bulk raising an exception on error
-    rc = helpers.bulk(client, data)
+    rc = helpers.bulk(client, data)  # pylint: disable=invalid-name
     print(f"Bulk-inserted {rc[0]} items (bulk).")
 
     # parallel bulk with explicit error checking

@@ -55,7 +55,7 @@ def main() -> None:
         data += json.dumps({"index": {"_index": index_name, "_id": i}}) + "\n"
         data += json.dumps({"value": i}) + "\n"
 
-    rc = client.bulk(data)
+    rc = client.bulk(data)  # pylint: disable=invalid-name
     if rc["errors"]:
         print("There were errors:")
         for item in rc["items"]:

@@ -18,7 +18,8 @@
 
 def main() -> None:
     """
-    provides samples for different ways to handle documents including indexing, searching, updating, and deleting
+    provides samples for different ways to handle documents
+    including indexing, searching, updating, and deleting
     """
     # Connect to OpenSearch
     client = OpenSearch(

@@ -17,8 +17,9 @@
 
 def main() -> None:
     """
-    an example showing how to create an synchronous connection to OpenSearch, create an index, index a document
-    and search to return the document
+    an example showing how to create an synchronous connection to
+    OpenSearch, create an index, index a document and search to
+    return the document
     """
     host = "localhost"
     port = 9200
@@ -49,19 +50,21 @@ def main() -> None:
 
     document = {"title": "Moneyball", "director": "Bennett Miller", "year": "2011"}
 
-    id = "1"
+    doc_id = "1"
 
-    response = client.index(index=index_name, body=document, id=id, refresh=True)
+    response = client.index(index=index_name, body=document, id=doc_id, refresh=True)
 
     print(response)
 
     # search for a document
 
-    q = "miller"
+    user_query = "miller"
 
     query = {
         "size": 5,
-        "query": {"multi_match": {"query": q, "fields": ["title^2", "director"]}},
+        "query": {
+            "multi_match": {"query": user_query, "fields": ["title^2", "director"]}
+        },
     }
 
     response = client.search(body=query, index=index_name)
@@ -70,7 +73,7 @@ def main() -> None:
 
     # delete the document
 
-    response = client.delete(index=index_name, id=id)
+    response = client.delete(index=index_name, id=doc_id)
 
     print(response)