Skip to content

Commit

Permalink
debugging indexing issue
Browse files Browse the repository at this point in the history
  • Loading branch information
LEFTA98 committed Jul 25, 2022
1 parent 55b2f4f commit 5f2d9aa
Showing 1 changed file with 31 additions and 31 deletions.
62 changes: 31 additions & 31 deletions eland/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,14 +1279,14 @@ def search_yield_pandas_dataframes(
if sort_params:
body["sort"] = [sort_params]

# i = 1
i = 1
for hits in _search_yield_hits(
query_compiler=query_compiler, body=body, max_number_of_hits=result_size, sort_index=sort_index
):
df = query_compiler._es_results_to_pandas(hits)
df = self._apply_df_post_processing(df, post_processing)
# df.to_csv(f'debug_{i}.csv')
# i += 1
df.to_csv(f'debug_{i}.csv')
i += 1
yield df

def index_count(self, query_compiler: "QueryCompiler", field: str) -> int:
Expand Down Expand Up @@ -1566,31 +1566,31 @@ def _search_yield_hits(
# to be the last sort value for this set of hits.
body["search_after"] = hits[-1]["sort"]

# if __name__ == "__main__":
# import eland as ed
# from opensearchpy import OpenSearch
#
#
# # try connecting to an actual cluster at some point
# def get_os_client(cluster_url='https://localhost:9200',
# username='admin',
# password='admin'):
# '''
# Get OpenSearch client
# :param cluster_url: cluster URL like https://ml-te-netwo-1s12ba42br23v-ff1736fa7db98ff2.elb.us-west-2.amazonaws.com:443
# :return: OpenSearch client
# '''
# client = OpenSearch(
# hosts=[cluster_url],
# http_auth=(username, password),
# verify_certs=False
# )
# return client
#
# client = get_os_client()
# ed_df = ed.DataFrame(client, 'sagemaker_demo_data')
#
# indices = [index for index, _ in ed_df.iterrows('_doc')]
# print(len(set(indices)))
#
# pass
if __name__ == "__main__":
import eland as ed
from opensearchpy import OpenSearch


# try connecting to an actual cluster at some point
def get_os_client(cluster_url='https://localhost:9200',
username='admin',
password='admin'):
'''
Get OpenSearch client
:param cluster_url: cluster URL like https://ml-te-netwo-1s12ba42br23v-ff1736fa7db98ff2.elb.us-west-2.amazonaws.com:443
:return: OpenSearch client
'''
client = OpenSearch(
hosts=[cluster_url],
http_auth=(username, password),
verify_certs=False
)
return client

client = get_os_client()
ed_df = ed.DataFrame(client, 'sagemaker_demo_data')

indices = [index for index, _ in ed_df.iterrows('_doc')]
print(len(set(indices)))

pass

0 comments on commit 5f2d9aa

Please sign in to comment.