continued renaming opensearch variables

opensearch-project · Aug 3, 2022 · 53df988 · 53df988
1 parent 396d7ec
commit 53df988
Show file tree

Hide file tree

Showing 33 changed files with 179 additions and 176 deletions.
diff --git a/.ci/jobs/defaults.yml b/.ci/jobs/defaults.yml
@@ -20,7 +20,7 @@
           &lt;commitId&gt;, etc.)
     properties:
     - github:
-        url: https://github.com/elastic/eland
+        url: https://github.com/elastic/opensearch_py_ml
     - inject:
         properties-content: HOME=$JENKINS_HOME
     concurrent: true
@@ -29,10 +29,10 @@
     - git:
         name: origin
         credentials-id: f6c7695a-671e-4f4f-a331-acdce44ff9ba
-        reference-repo: /var/lib/jenkins/.git-references/eland.git
+        reference-repo: /var/lib/jenkins/.git-references/opensearch_py_ml.git
         branches:
         - ${branch_specifier}
-        url: git@github.com:elastic/eland.git
+        url: git@github.com:elastic/opensearch_py_ml.git
         basedir: ''
         wipe-workspace: 'True'
     triggers:

diff --git a/.ci/jobs/elastic+eland+7.x.yml b/.ci/jobs/elastic+eland+7.x.yml
@@ -1,7 +1,7 @@
 ---
 - job:
-    name: elastic+eland+7.x
-    display-name: 'elastic / eland # 7.x'
+    name: elastic+opensearch_py_ml+7.x
+    display-name: 'elastic / opensearch_py_ml # 7.x'
     description: Eland is a data science client with a Pandas-like interface
     junit_results: "*-junit.xml"
     parameters:

diff --git a/.ci/jobs/elastic+eland+main.yml b/.ci/jobs/elastic+eland+main.yml
@@ -1,7 +1,7 @@
 ---
 - job:
-    name: elastic+eland+main
-    display-name: 'elastic / eland # main'
+    name: elastic+opensearch_py_ml+main
+    display-name: 'elastic / opensearch_py_ml # main'
     description: Eland is a data science client with a Pandas-like interface
     junit_results: "*-junit.xml"
     parameters:

diff --git a/.ci/jobs/elastic+eland+pull-request.yml b/.ci/jobs/elastic+eland+pull-request.yml
@@ -1,8 +1,8 @@
 ---
 - job:
-    name: elastic+eland+pull-request
-    display-name: 'elastic / eland # pull-request'
-    description: Testing of eland pull requests.
+    name: elastic+opensearch_py_ml+pull-request
+    display-name: 'elastic / opensearch_py_ml # pull-request'
+    description: Testing of opensearch_py_ml pull requests.
     scm:
     - git:
         branches:

diff --git a/.ci/run-repository.sh b/.ci/run-repository.sh
@@ -25,15 +25,15 @@ echo -e "\033[34;1mINFO:\033[0m PANDAS_VERSION ${PANDAS_VERSION}\033[0m"
 
 echo -e "\033[1m>>>>> Build [elastic/eland container] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"
 
-docker build --file .ci/Dockerfile --tag elastic/eland --build-arg PYTHON_VERSION=${PYTHON_VERSION} .
+docker build --file .ci/Dockerfile --tag elastic/opensearch-py-ml --build-arg PYTHON_VERSION=${PYTHON_VERSION} .
 
 echo -e "\033[1m>>>>> Run [elastic/eland container] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"
 
 docker run \
   --network=${NETWORK_NAME} \
   --env "ELASTICSEARCH_HOST=${ELASTICSEARCH_URL}" \
   --env "TEST_SUITE=${TEST_SUITE}" \
-  --name eland-test-runner \
+  --name opensearch-py-ml-test-runner \
   --rm \
-  elastic/eland \
+  elastic/opensearch-py-ml \
   nox -s "test-${PYTHON_VERSION}(pandas_version='${PANDAS_VERSION}')"
diff --git a/.gitignore b/.gitignore
@@ -18,7 +18,7 @@ result_images/
 
 # Python egg metadata, regenerated from source files by setuptools.
 /*.egg-info
-eland.egg-info/
+opensearch_py_ml.egg-info/
 
 # PyCharm files
 .idea/

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -46,7 +46,7 @@ Added
 * Added support for ``number_samples`` to LightGBM and Scikit-Learn models (`#397`_, contributed by `@V1NAY8`_)
 * Added ability to use datetime types for filtering dataframes (`284`_, contributed by `@Fju`_)
 * Added pandas ``datetime64`` type to use the Elasticsearch ``date`` type (`#425`_, contributed by `@Ashton-Sidhu`_)
-* Added ``es_verify_mapping_compatibility`` parameter to disable schema enforcement with ``pandas_to_eland`` (`#423`_, contributed by `@Ashton-Sidhu`_)
+* Added ``es_verify_mapping_compatibility`` parameter to disable schema enforcement with ``pandas_to_opensearch`` (`#423`_, contributed by `@Ashton-Sidhu`_)
 
 Changed
 ^^^^^^^
@@ -197,26 +197,26 @@ Added
 * Added support for type hints of the ``elasticsearch-py`` package (`#295`_)
 
 * Added support for passing dictionaries to ``es_type_overrides`` parameter
-  in the ``pandas_to_eland()`` function to directly control the field mapping
+  in the ``pandas_to_opensearch()`` function to directly control the field mapping
   generated in Elasticsearch (`#310`_)
 
 * Added ``es_dtypes`` property to ``DataFrame`` and ``Series`` (`#285`_) 
 
 Changed
 ^^^^^^^
 
-* Changed ``pandas_to_eland()`` to use the ``parallel_bulk()``
+* Changed ``pandas_to_opensearch()`` to use the ``parallel_bulk()``
   helper instead of single-threaded ``bulk()`` helper to improve
   performance (`#279`_, contributed by `@V1NAY8`_)
 
-* Changed the ``es_type_overrides`` parameter in ``pandas_to_eland()``
+* Changed the ``es_type_overrides`` parameter in ``pandas_to_opensearch()``
   to raise ``ValueError`` if an unknown column is given (`#302`_)
 
 * Changed ``DataFrame.filter()`` to preserve the order of items
   (`#283`_, contributed by `@V1NAY8`_)
 
 * Changed when setting ``es_type_overrides={"column": "text"}`` in
-  ``pandas_to_eland()`` will automatically add the ``column.keyword``
+  ``pandas_to_opensearch()`` will automatically add the ``column.keyword``
   sub-field so that aggregations are available for the field as well (`#310`_)
 
 Fixed
@@ -286,7 +286,7 @@ Added
 * Added support for LightGBM ``LGBMRegressor`` and ``LGBMClassifier`` to ``ImportedMLModel`` (`#247`_, `#252`_)
 * Added support for ``multi:softmax`` and ``multi:softprob`` XGBoost operators to ``ImportedMLModel`` (`#246`_)
 * Added column names to ``DataFrame.__dir__()`` for better auto-completion support (`#223`_, contributed by `@leonardbinet`_)
-* Added support for ``es_if_exists='append'`` to ``pandas_to_eland()`` (`#217`_)
+* Added support for ``es_if_exists='append'`` to ``pandas_to_opensearch()`` (`#217`_)
 * Added support for aggregating datetimes with ``nunique`` and ``mean`` (`#253`_)
 * Added ``es_compress_model_definition`` parameter to ``ImportedMLModel`` constructor (`#220`_)
 * Added ``.size`` and ``.ndim`` properties to ``DataFrame`` and ``Series`` (`#231`_ and `#233`_)
@@ -350,7 +350,7 @@ Deprecated
 ^^^^^^^^^^
 
 * Deprecated ``info_es()`` in favor of ``es_info()`` (`#208`_)
-* Deprecated ``opensearch_py_ml.read_csv()`` in favor of ``opensearch_py_ml.csv_to_eland()`` (`#208`_)
+* Deprecated ``opensearch_py_ml.read_csv()`` in favor of ``opensearch_py_ml.csv_to_opensearch()`` (`#208`_)
 * Deprecated ``opensearch_py_ml.read_es()`` in favor of ``opensearch_py_ml.DataFrame()`` (`#208`_)
 
 Changed
@@ -398,8 +398,8 @@ Added
 ^^^^^
 
 * Added support for Pandas v1.0.0 (`#141`_, contributed by `@mesejo`_)
-* Added ``use_pandas_index_for_es_ids`` parameter to ``pandas_to_eland()`` (`#154`_)
-* Added ``es_type_overrides`` parameter to ``pandas_to_eland()`` (`#181`_)
+* Added ``use_pandas_index_for_os_ids`` parameter to ``pandas_to_opensearch()`` (`#154`_)
+* Added ``es_type_overrides`` parameter to ``pandas_to_opensearch()`` (`#181`_)
 * Added ``NDFrame.var()``, ``.std()`` and ``.median()`` aggregations (`#175`_, `#176`_, contributed by `@mesejo`_)
 * Added ``DataFrame.es_query()`` to allow modifying ES queries directly (`#156`_)
 * Added ``opensearch_py_ml.__version__`` (`#153`_, contributed by `@mesejo`_)
@@ -411,7 +411,7 @@ Removed
 * Removed ``opensearch_py_ml.Client()`` interface, use
   ``elasticsearch.Elasticsearch()`` client instead (`#166`_)
 * Removed all private objects from top-level ``eland`` namespace (`#170`_)
-* Removed ``geo_points`` from ``pandas_to_eland()`` in favor of ``es_type_overrides`` (`#181`_)
+* Removed ``geo_points`` from ``pandas_to_opensearch()`` in favor of ``es_type_overrides`` (`#181`_)
 
 Changed
 ^^^^^^^

diff --git a/docs/sphinx/examples/demo_notebook.ipynb b/docs/sphinx/examples/demo_notebook.ipynb
@@ -120,7 +120,7 @@
    },
    "outputs": [],
    "source": [
-    "pd_flights = ed.eland_to_pandas(ed_flights)"
+    "pd_flights = ed.opensearch_to_pandas(ed_flights)"
    ]
   },
   {
@@ -968,7 +968,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "This method would scan/scroll the entire Elasticsearch index(s) into memory. If this is explicitly required, and there is sufficient memory, call `ed.eland_to_pandas(ed_df).values`\n"
+      "This method would scan/scroll the entire Elasticsearch index(s) into memory. If this is explicitly required, and there is sufficient memory, call `ed.opensearch_to_pandas(ed_df).values`\n"
      ]
     }
    ],

diff --git a/docs/sphinx/examples/introduction_to_eland_webinar.ipynb b/docs/sphinx/examples/introduction_to_eland_webinar.ipynb
@@ -246,12 +246,12 @@
     }
    ],
    "source": [
-    "df = ed.pandas_to_eland(\n",
+    "df = ed.pandas_to_opensearch(\n",
     "    pd_df=pd_df,\n",
     "    es_client=es,\n",
     "\n",
     "    # Where the data will live in Elasticsearch\n",
-    "    es_dest_index=\"nyc-restaurants\",\n",
+    "    os_dest_index=\"nyc-restaurants\",\n",
     "    \n",
     "    # Type overrides for certain columns, 'location' detected\n",
     "    # automatically as 'keyword' but we want these interpreted as 'geo_point'.\n",

diff --git a/docs/sphinx/examples/online_retail_analysis.ipynb b/docs/sphinx/examples/online_retail_analysis.ipynb
@@ -52,11 +52,11 @@
    },
    "outputs": [],
    "source": [
-    "df = ed.csv_to_eland(\"data/online-retail.csv.gz\",\n",
+    "df = ed.csv_to_opensearch(\"data/online-retail.csv.gz\",\n",
     "                     es_client='http://localhost:9200', \n",
-    "                     es_dest_index='online-retail', \n",
+    "                     os_dest_index='online-retail', \n",
     "                     es_if_exists='replace', \n",
-    "                     es_dropna=True,\n",
+    "                     os_dropna=True,\n",
     "                     es_refresh=True,\n",
     "                     compression='gzip',\n",
     "                     index_col=0)"

diff --git a/docs/sphinx/reference/api/eland.csv_to_eland.rst b/docs/sphinx/reference/api/eland.csv_to_eland.rst
@@ -1,6 +1,6 @@
-opensearch_py_ml.csv_to_eland
+opensearch_py_ml.csv_to_opensearch
 ==================
 
 .. currentmodule:: eland
 
-.. autofunction:: csv_to_eland
+.. autofunction:: csv_to_opensearch
diff --git a/docs/sphinx/reference/api/eland.eland_to_pandas.rst b/docs/sphinx/reference/api/eland.eland_to_pandas.rst
@@ -1,6 +1,6 @@
-opensearch_py_ml.eland_to_pandas
+opensearch_py_ml.opensearch_to_pandas
 =====================
 
 .. currentmodule:: eland
 
-.. autofunction:: eland_to_pandas
+.. autofunction:: opensearch_to_pandas
diff --git a/docs/sphinx/reference/api/eland.pandas_to_eland.rst b/docs/sphinx/reference/api/eland.pandas_to_eland.rst
@@ -1,6 +1,6 @@
-opensearch_py_ml.pandas_to_eland
+opensearch_py_ml.pandas_to_opensearch
 =====================
 
 .. currentmodule:: eland
 
-.. autofunction:: pandas_to_eland
+.. autofunction:: pandas_to_opensearch
diff --git a/docs/sphinx/reference/general_utility_functions.rst b/docs/sphinx/reference/general_utility_functions.rst
@@ -10,5 +10,5 @@ Pandas and Eland
 .. autosummary::
    :toctree: api/
 
-    pandas_to_eland
-    eland_to_pandas
+    pandas_to_opensearch
+    opensearch_to_pandas
diff --git a/docs/sphinx/reference/io.rst b/docs/sphinx/reference/io.rst
@@ -10,4 +10,4 @@ Flat File
 .. autosummary::
    :toctree: api/
 
-    csv_to_eland
+    csv_to_opensearch
diff --git a/opensearch_py_ml/__init__.py b/opensearch_py_ml/__init__.py
@@ -27,7 +27,7 @@
 )
 from .common import SortOrder
 from .dataframe import DataFrame
-from .etl import csv_to_eland, eland_to_pandas, pandas_to_eland
+from .etl import csv_to_opensearch, opensearch_to_pandas, pandas_to_opensearch
 from .index import Index
 from .ndframe import NDFrame
 from .series import Series
@@ -38,9 +38,9 @@
     "Series",
     "NDFrame",
     "Index",
-    "pandas_to_eland",
-    "eland_to_pandas",
-    "csv_to_eland",
+    "pandas_to_opensearch",
+    "opensearch_to_pandas",
+    "csv_to_opensearch",
     "SortOrder",
     "make_sagemaker_prediction"
 ]
diff --git a/opensearch_py_ml/common.py b/opensearch_py_ml/common.py
@@ -32,8 +32,10 @@
 
 import pandas as pd  # type: ignore
 from elasticsearch import Elasticsearch
+from opensearchpy import OpenSearch
 
-from ._version import __version__ as _eland_version
+from ._version import __version__ as _opensearch_py_ml_version
+from warnings import warn
 
 if TYPE_CHECKING:
     from numpy.typing import DTypeLike
@@ -50,7 +52,7 @@
     int(part) for part in pd.__version__.split(".") if part.isdigit()
 )[:2]
 
-_ELAND_MAJOR_VERSION = int(_eland_version.split(".")[0])
+_OPENSEARCH_PY_ML_MAJOR_VERSION = int(_opensearch_py_ml_version.split(".")[0])
 
 
 with warnings.catch_warnings():
@@ -308,42 +310,43 @@ def elasticsearch_date_to_pandas_date(
 def ensure_es_client(
     es_client: Union[str, List[str], Tuple[str, ...], Elasticsearch]
 ) -> Elasticsearch:
+    warn('function uses Elasticsearch and has been deprecated', DeprecationWarning, stacklevel=2)
     if isinstance(es_client, tuple):
         es_client = list(es_client)
     if not isinstance(es_client, Elasticsearch):
         es_client = Elasticsearch(es_client)  # type: ignore[arg-type]
     return es_client
 
 
-def es_version(es_client: Elasticsearch) -> Tuple[int, int, int]:
+def os_version(os_client: OpenSearch) -> Tuple[int, int, int]:
     """Tags the current ES client with a cached '_eland_es_version'
     property if one doesn't exist yet for the current Elasticsearch version.
     """
-    eland_es_version: Tuple[int, int, int]
-    if not hasattr(es_client, "_eland_es_version"):
-        version_info = es_client.info()["version"]["number"]
+    opensearch_py_ml_os_version: Tuple[int, int, int]
+    if not hasattr(os_client, "_eland_es_version"):
+        version_info = os_client.info()["version"]["number"]
         match = re.match(r"^(\d+)\.(\d+)\.(\d+)", version_info)
         if match is None:
             raise ValueError(
-                f"Unable to determine Elasticsearch version. "
+                f"Unable to determine version. "
                 f"Received: {version_info}"
             )
-        eland_es_version = cast(
+        opensearch_py_ml_os_version = cast(
             Tuple[int, int, int], tuple(int(x) for x in match.groups())
         )
-        es_client._eland_es_version = eland_es_version  # type: ignore
+        os_client._eland_es_version = opensearch_py_ml_os_version  # type: ignore
 
         # Raise a warning if the major version of the library doesn't match the
         # the Elasticsearch server major version.
-        if eland_es_version[0] != _ELAND_MAJOR_VERSION:
+        if opensearch_py_ml_os_version[0] != _OPENSEARCH_PY_ML_MAJOR_VERSION:
             warnings.warn(
-                f"Eland major version ({_eland_version}) doesn't match the major "
-                f"version of the Elasticsearch server ({version_info}) which can lead "
-                f"to compatibility issues. Your Eland major version should be the same "
+                f"OpenSearch major version ({_opensearch_py_ml_version}) doesn't match the major "
+                f"version of the OpenSearch server ({version_info}) which can lead "
+                f"to compatibility issues. Your major version should be the same "
                 "as your cluster major version.",
                 stacklevel=2,
             )
 
     else:
-        eland_es_version = es_client._eland_es_version  # type: ignore
-    return eland_es_version
+        opensearch_py_ml_os_version = os_client._eland_es_version  # type: ignore
+    return opensearch_py_ml_os_version
diff --git a/opensearch_py_ml/dataframe.py b/opensearch_py_ml/dataframe.py
@@ -2114,13 +2114,13 @@ def values(self) -> None:
         In pandas this returns a Numpy representation of the DataFrame. This would involve scan/scrolling the
         entire index.
 
-        If this is required, call ``ed.eland_to_pandas(ed_df).values``, *but beware this will scan/scroll the entire
+        If this is required, call ``ed.opensearch_to_pandas(ed_df).values``, *but beware this will scan/scroll the entire
         Elasticsearch index(s) into memory.*
 
         See Also
         --------
         :pandas_api_docs:`pandas.DataFrame.values`
-        eland_to_pandas
+        opensearch_to_pandas
         to_numpy
         """
         return self.to_numpy()
@@ -2143,7 +2143,7 @@ def to_numpy(self) -> None:
         Examples
         --------
         >>> ed_df = ed.DataFrame('http://localhost:9200', 'flights', columns=['AvgTicketPrice', 'Carrier']).head(5)
-        >>> pd_df = ed.eland_to_pandas(ed_df)
+        >>> pd_df = ed.opensearch_to_pandas(ed_df)
         >>> print(f"type(ed_df)={type(ed_df)}\\ntype(pd_df)={type(pd_df)}")
         type(ed_df)=<class 'opensearch_py_ml.dataframe.DataFrame'>
         type(pd_df)=<class 'pandas.core.frame.DataFrame'>
@@ -2165,5 +2165,5 @@ def to_numpy(self) -> None:
         """
         raise AttributeError(
             "This method would scan/scroll the entire Elasticsearch index(s) into memory. "
-            "If this is explicitly required, and there is sufficient memory, call `ed.eland_to_pandas(ed_df).values`"
+            "If this is explicitly required, and there is sufficient memory, call `ed.opensearch_to_pandas(ed_df).values`"
         )