Skip to content

Commit

Permalink
Merge pull request #3 from LEFTA98/data-setup-fix
Browse files Browse the repository at this point in the history
reverted many erroneous "fixes" to tests
  • Loading branch information
LEFTA98 authored Jul 8, 2022
2 parents 983b43d + 60927c1 commit 83a6230
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 46 deletions.
2 changes: 1 addition & 1 deletion eland/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,4 +543,4 @@ def csv_to_eland( # type: ignore
first_write = False

# Now create an eland.DataFrame that references the new index
return DataFrame(es_client, es_index_pattern=es_dest_index)
return DataFrame(es_client, os_index_pattern=es_dest_index)
2 changes: 1 addition & 1 deletion eland/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def __init__(self, field: str, value: Any) -> None:
class Equal(BooleanFilter):
def __init__(self, field: str, value: Any) -> None:
super().__init__()
self._filter = {"match": {field: value}}
self._filter = {"term": {field: value}}


class IsIn(BooleanFilter):
Expand Down
18 changes: 2 additions & 16 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,6 @@
_pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index()
_pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
# need to unpack the dictionaries in the original df to accommodate OpenSearch client read procedure
_pd_flights['DestLocation.lon'] = _pd_flights['DestLocation'].apply(pd.Series)['lon']
_pd_flights['DestLocation.lat'] = _pd_flights['DestLocation'].apply(pd.Series)['lat']
_pd_flights['OriginLocation.lon'] = _pd_flights['OriginLocation'].apply(pd.Series)['lon']
_pd_flights['OriginLocation.lat'] = _pd_flights['OriginLocation'].apply(pd.Series)['lat']
_pd_flights = _pd_flights.drop(columns=['DestLocation','OriginLocation'])
_pd_flights = _pd_flights.reindex(sorted(_pd_flights.columns), axis=1)

_ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)

_pd_flights_small = _pd_flights.head(48)
Expand All @@ -56,17 +48,11 @@
_pd_ecommerce["products.created_on"] = _pd_ecommerce["products.created_on"].apply(
lambda x: pd.to_datetime(x)
)
_pd_ecommerce.insert(2, "customer_birth_date", None)
_pd_ecommerce.index = _pd_ecommerce.index.map(str) # make index 'object' not int
# need to unpack the dictionaries in the original df to accommodate OpenSearch client read procedure
_pd_ecommerce['geoip.location.lon'] = _pd_ecommerce['geoip.location'].apply(pd.Series)['lon']
_pd_ecommerce['geoip.location.lat'] = _pd_ecommerce['geoip.location'].apply(pd.Series)['lat']
_pd_ecommerce = _pd_ecommerce.drop(columns=['geoip.location'])
_pd_ecommerce = _pd_ecommerce.reindex(sorted(_pd_ecommerce.columns), axis=1)


_pd_ecommerce["customer_birth_date"].astype("datetime64")
_ed_ecommerce = ed.DataFrame(ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME)


class TestData:
client = ES_TEST_CLIENT

Expand Down
6 changes: 2 additions & 4 deletions tests/dataframe/test_dtypes_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ def test_es_dtypes(self, testdata):
"DestAirportID": "keyword",
"DestCityName": "keyword",
"DestCountry": "keyword",
"DestLocation.lat": "float",
"DestLocation.lon": "float",
"DestLocation": "geo_point",
"DestRegion": "keyword",
"DestWeather": "keyword",
"DistanceKilometers": "float",
Expand All @@ -63,8 +62,7 @@ def test_es_dtypes(self, testdata):
"OriginAirportID": "keyword",
"OriginCityName": "keyword",
"OriginCountry": "keyword",
"OriginLocation.lat": "float",
"OriginLocation.lon": "float",
"OriginLocation": "geo_point",
"OriginRegion": "keyword",
"OriginWeather": "keyword",
"dayOfWeek": "byte",
Expand Down
2 changes: 1 addition & 1 deletion tests/dataframe/test_es_query_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_es_query_allows_query_in_dict(self):
assert len(left) > 0
assert_eland_frame_equal(left, right)

@pytest.mark.skip(reason="OpenSearch currently does not support geosearch")
# @pytest.mark.skip(reason="OpenSearch currently does not support geosearch")
def test_es_query_geo_location(self):
df = self.ed_ecommerce()
cur_nearby = df.es_query(
Expand Down
2 changes: 1 addition & 1 deletion tests/dataframe/test_getitem_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_getitem_one_argument(self, df):
print(df.OriginAirportID)

def test_getitem_multiple_calls(self, df):
df = df[["DestCityName", "DestCountry", "DestLocation.lat", "DestLocation.lon", "DestRegion"]]
df = df[["DestCityName", "DestCountry", "DestLocation", "DestRegion"]]
with pytest.raises(KeyError):
df["Carrier"]

Expand Down
2 changes: 1 addition & 1 deletion tests/etl/test_pandas_to_eland.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import pandas as pd
import pytest
from elasticsearch.helpers import BulkIndexError
from opensearchpy.helpers import BulkIndexError

from eland import DataFrame, pandas_to_eland
from tests.common import (
Expand Down
42 changes: 21 additions & 21 deletions tests/field_mappings/test_aggregatables_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,21 @@ def test_ecommerce_all_aggregatables(self):

expected = {
"category.keyword": "category",
'currency.keyword': "currency",
"currency": "currency",
"customer_birth_date": "customer_birth_date",
"customer_first_name.keyword": "customer_first_name",
"customer_full_name.keyword": "customer_full_name",
'customer_gender.keyword': "customer_gender",
"customer_id": "customer_id",
"customer_last_name.keyword": "customer_last_name",
"customer_phone.keyword": "customer_phone",
"day_of_week.keyword": "day_of_week",
"customer_phone": "customer_phone",
"day_of_week": "day_of_week",
"day_of_week_i": "day_of_week_i",
"email.keyword": "email",
"geoip.city_name.keyword": "geoip.city_name",
"geoip.continent_name.keyword": "geoip.continent_name",
"geoip.country_iso_code.keyword": "geoip.country_iso_code",
"geoip.location.lat": "geoip.location.lat",
"geoip.location.lon": "geoip.location.lon",
"geoip.region_name.keyword": "geoip.region_name",
"email": "email",
"geoip.city_name": "geoip.city_name",
"geoip.continent_name": "geoip.continent_name",
"geoip.country_iso_code": "geoip.country_iso_code",
"geoip.location": "geoip.location",
"geoip.region_name": "geoip.region_name",
"manufacturer.keyword": "manufacturer",
"order_date": "order_date",
"order_id": "order_id",
Expand All @@ -66,29 +65,30 @@ def test_ecommerce_all_aggregatables(self):
"products.product_id": "products.product_id",
"products.product_name.keyword": "products.product_name",
"products.quantity": "products.quantity",
"products.sku.keyword": "products.sku",
"products.sku": "products.sku",
"products.tax_amount": "products.tax_amount",
"products.taxful_price": "products.taxful_price",
"products.taxless_price": "products.taxless_price",
"products.unit_discount_amount": "products.unit_discount_amount",
"sku.keyword": "sku",
"sku": "sku",
"taxful_total_price": "taxful_total_price",
"taxless_total_price": "taxless_total_price",
"total_quantity": "total_quantity",
"total_unique_products": "total_unique_products",
"type.keyword": "type",
"user.keyword": "user",
"type": "type",
"user": "user",
}

assert expected == aggregatables

def test_ecommerce_selected_aggregatables(self):
expected = {
"category.keyword": "category",
"currency.keyword": "currency",
"currency": "currency",
"customer_birth_date": "customer_birth_date",
"customer_first_name.keyword": "customer_first_name",
"type.keyword": "type",
"user.keyword": "user",
"type": "type",
"user": "user",
}

ed_field_mappings = FieldMappings(
Expand All @@ -106,15 +106,15 @@ def test_ecommerce_single_aggregatable_field(self):
client=ES_TEST_CLIENT, index_pattern=ECOMMERCE_INDEX_NAME
)

assert "user.keyword" == ed_field_mappings.aggregatable_field_name("user")
assert "user" == ed_field_mappings.aggregatable_field_name("user")

def test_ecommerce_single_keyword_aggregatable_field(self):
ed_field_mappings = FieldMappings(
client=ES_TEST_CLIENT, index_pattern=ECOMMERCE_INDEX_NAME
)

assert (
"customer_first_name.keyword"
"customer_first_name"
== ed_field_mappings.aggregatable_field_name("customer_first_name")
)

Expand All @@ -126,7 +126,7 @@ def test_ecommerce_single_non_existant_field(self):
with pytest.raises(KeyError):
ed_field_mappings.aggregatable_field_name("non_existant")

@pytest.mark.skip(reason="opensearch treats all fields in ecommerce df as aggregatable")
# @pytest.mark.skip(reason="opensearch treats all fields in ecommerce df as aggregatable")
@pytest.mark.filterwarnings("ignore:Aggregations not supported")
def test_ecommerce_single_non_aggregatable_field(self):
ed_field_mappings = FieldMappings(
Expand Down

0 comments on commit 83a6230

Please sign in to comment.