Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
a721d83
[DEV-13657] - WIP award search dataframe
zachflanders-frb Oct 22, 2025
76d7377
[DEV-13551] - Adding dataframe approach for location index
zachflanders-frb Oct 28, 2025
c838004
[DEV-13551] - Updating test for check for error with location index
zachflanders-frb Oct 29, 2025
8463bad
[DEV-13551] - WIP testing location elasticsearch index loader
zachflanders-frb Oct 30, 2025
6df23e3
Merge branch 'qat' into ftr/dev-13551-location-index-world-cities
zachflanders-frb Oct 30, 2025
bfa15cd
[DEV-13551] - Updating test location elasticsearch index loader
zachflanders-frb Oct 30, 2025
21a9f4d
[DEV-13551] - delete location loader sql file
zachflanders-frb Oct 30, 2025
0dd25a5
[DEV-13551] - Updating test location elasticsearch index loader
zachflanders-frb Oct 30, 2025
b27dcb3
[DEV-13551] - removing unused import
zachflanders-frb Oct 30, 2025
41bebae
[DEV-13551] - WIP fixing tests, updating location dataframe
zachflanders-frb Oct 30, 2025
a384a27
[DEV-13551] - fixing tests, updating location dataframe
zachflanders-frb Oct 31, 2025
1ece91c
[DEV-13551] - fixing conftest
zachflanders-frb Oct 31, 2025
b0d692f
[DEV-13551] - fixing conftest
zachflanders-frb Oct 31, 2025
774223e
[DEV-13551] - fix issue with test not having guaranteed order
zachflanders-frb Oct 31, 2025
a38de9f
Merge branch 'qat' into ftr/dev-13551-location-index-world-cities
zachflanders-frb Nov 3, 2025
9926543
Merge branch 'qat' into ftr/dev-13657-award-search-improvements
zachflanders-frb Nov 3, 2025
a98327d
[DEV-13091] update query to use total_outlay from award_search
loreleitrimberger Nov 3, 2025
ff44f98
[DEV-13091] fix typo
loreleitrimberger Nov 3, 2025
d5dc2f2
Update usaspending_api/etl/management/commands/elasticsearch_indexer_…
zachflanders-frb Nov 3, 2025
cb4c577
Update README.md
zachflanders-frb Nov 3, 2025
df40422
[DEV-13657] - WIP adding award search dataframe
zachflanders-frb Nov 3, 2025
943e2a1
[DEV-13091] update sql
loreleitrimberger Nov 4, 2025
73776e4
[DEV-13091] update sql
loreleitrimberger Nov 4, 2025
673ccfc
[DEV-13657] - WIP award search query
zachflanders-frb Nov 5, 2025
6a1211b
[DEV-13091] update sql
loreleitrimberger Nov 5, 2025
c4c6adf
[DEV-13091] update sql
loreleitrimberger Nov 5, 2025
bced306
Merge branch 'qat' into ftr/dev-13551-location-index-world-cities
zachflanders-frb Nov 5, 2025
8a77dbf
[DEV-13091] update sql
loreleitrimberger Nov 6, 2025
c61e7ab
[DEV-13657] - WIP
zachflanders-frb Nov 6, 2025
7216ddb
[DEV-13091] format
loreleitrimberger Nov 7, 2025
41dd137
[DEV-13091] fix test
loreleitrimberger Nov 7, 2025
2fc2023
[DEV-13657] - WIP
zachflanders-frb Nov 7, 2025
afee601
Merge pull request #4540 from fedspendingtransparency/mod/dev-13706-t…
sethstoudenmier Nov 10, 2025
43eacf5
[DEV-13091] pr comments
loreleitrimberger Nov 13, 2025
4407d57
Merge branch 'qat' into ftr/dev-13091-fix-award-outlays-on-idv
loreleitrimberger Nov 13, 2025
521c6ec
Merge pull request #4534 from fedspendingtransparency/ftr/dev-13091-f…
loreleitrimberger Nov 13, 2025
dd1927f
Merge branch 'qat' into fix/dev-13988-filter-by-prefix-on-s3-retrieval
sethstoudenmier Nov 13, 2025
d2b0458
[DEV-13551] - remove named_struct for spark 3.4.1
zachflanders-frb Nov 14, 2025
1d4ea5c
Merge branch 'ftr/dev-13551-location-index-world-cities' of https://g…
zachflanders-frb Nov 14, 2025
a1ffd23
[DEV-13551] - remove rlike for spark 3.4.1
zachflanders-frb Nov 14, 2025
cdc717a
[DEV-13551] - fixing struct
zachflanders-frb Nov 14, 2025
ab930fa
Merge branch 'qat' into ftr/dev-13551-location-index-world-cities
zachflanders-frb Nov 17, 2025
691c1b5
[DEV-13657] - Refactoring transaction/awards to own files
zachflanders-frb Nov 17, 2025
4f222ef
[DEV-13657] - Remove award search sql string
zachflanders-frb Nov 17, 2025
ba67839
[DEV-13657] - clean up imports
zachflanders-frb Nov 17, 2025
eb9640a
Merge branch 'qat' into ftr/dev-13657-award-search-improvements
zachflanders-frb Nov 17, 2025
64ff724
[DEV-13657] - clean up imports
zachflanders-frb Nov 17, 2025
6ad0041
Merge branch 'ftr/dev-13657-award-search-improvements' of https://git…
zachflanders-frb Nov 17, 2025
f54b1fe
Merge pull request #4545 from fedspendingtransparency/fix/dev-13988-f…
sethstoudenmier Nov 19, 2025
4a89371
Merge branch 'qat' into ftr/dev-13551-location-index-world-cities
zachflanders-frb Nov 19, 2025
cfd73a3
[DEV-13551] - adding county fips to location results
zachflanders-frb Nov 19, 2025
25fbb0a
Merge branch 'ftr/dev-13551-location-index-world-cities' of https://g…
zachflanders-frb Nov 19, 2025
998d1d0
[DEV-13551] - make max year programmatic
zachflanders-frb Nov 19, 2025
9004d21
[DEV-13657] - fix join issues
zachflanders-frb Nov 19, 2025
69aba86
Merge branch 'qat' into ftr/dev-13657-award-search-improvements
zachflanders-frb Nov 19, 2025
45167c2
Merge pull request #4529 from fedspendingtransparency/ftr/dev-13551-l…
zachflanders-frb Nov 20, 2025
7539c97
Merge branch 'qat' into ftr/dev-13657-award-search-improvements
zachflanders-frb Nov 20, 2025
36e0ce9
[DEV-13657] - remove named_struct function
zachflanders-frb Nov 20, 2025
9e64708
[DEV-13657] - remove now function
zachflanders-frb Nov 20, 2025
b0e5572
[DEV-13657] - Fix recipient hash result when all col are null
zachflanders-frb Nov 20, 2025
0d46249
[DEV-13926] Add transaction download delta table schema
sethstoudenmier Nov 24, 2025
5e8da11
Merge pull request #4546 from fedspendingtransparency/ftr/dev-13657-a…
zachflanders-frb Nov 25, 2025
f00e5bc
Merge branch 'qat' into ftr/dev-13926-transaction-download-delta-table
sethstoudenmier Nov 25, 2025
5058dbf
Merge pull request #4549 from fedspendingtransparency/ftr/dev-13926-t…
sethstoudenmier Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ CURR_DATE=$(date '+%Y-%m-%d-%H-%M-%S')
docker compose run --rm usaspending-manage python3 -u manage.py elasticsearch_indexer --create-new-index --index-name "$CURR_DATE-transactions" --load-type transaction
docker compose run --rm usaspending-manage python3 -u manage.py elasticsearch_indexer --create-new-index --index-name "$CURR_DATE-awards" --load-type award
docker compose run --rm usaspending-manage python3 -u manage.py elasticsearch_indexer --create-new-index --index-name "$CURR_DATE-recipients" --load-type recipient
docker compose run --rm usaspending-manage python3 -u manage.py elasticsearch_indexer --create-new-index --index-name "$CURR_DATE-locations" --load-type location
docker compose run --rm usaspending-manage python3 -u manage.py elasticsearch_indexer --create-new-index --index-name "$CURR_DATE-subaward" --load-type subaward
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ This route sends a request to the backend to retrieve locations matching the spe
+ `country_name` (required, string)

## CountyMatch (object)
+ `county_name` (required, string)
+ `county_fips` (required, string)
The 5 digit FIPS code (2 digit state FIPS code + 3 digit county FIPS code)
+ `county_name` (required, string)
+ `state_name` (required, string)
+ `country_name` (required, string)

Expand Down
286 changes: 271 additions & 15 deletions usaspending_api/conftest.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import logging
import os
import uuid
import sys
import tempfile
from decimal import Decimal
from pathlib import Path
from typing import List

import docker
import pytest
from django.conf import settings
from django.db import connections
from django.core.management import call_command
from django.db import connections, IntegrityError
from django.test import override_settings
from model_bakery import baker
from pytest_django.fixtures import _set_suffix_to_test_databases
Expand All @@ -19,10 +22,12 @@
pytest_xdist_auto_num_workers,
)

from usaspending_api.common.elasticsearch.client import instantiate_elasticsearch_client
from usaspending_api.common.elasticsearch.elasticsearch_sql_helpers import (
ensure_business_categories_functions_exist,
ensure_view_exists,
)
from usaspending_api.common.etl.spark import create_ref_temp_views, _USAS_RDS_REF_TABLES
from usaspending_api.common.helpers.generic_helper import generate_matviews
from usaspending_api.common.helpers.sql_helpers import (
build_dsn_string,
Expand All @@ -44,6 +49,7 @@
transform_xdist_worker_id_to_django_test_db_id,
)


# Compose ALL fixtures from conftest_spark
from usaspending_api.tests.conftest_spark import * # noqa
from usaspending_api.tests.integration.test_setup_of_test_dbs import TEST_DB_SETUP_TEST_NAME
Expand Down Expand Up @@ -404,21 +410,271 @@ def elasticsearch_recipient_index(db):


@pytest.fixture
def elasticsearch_location_index(db):
"""
Add this fixture to your test if you intend to use the Elasticsearch
location index. To use, create some mock database data then call
elasticsearch_location_index.update_index to populate Elasticsearch.
def location_data_fixture(db):
baker.make("references.RefCountryCode", country_code="DNK", country_name="DENMARK")
baker.make("references.RefCountryCode", country_code="FRA", country_name="FRANCE")
baker.make("references.RefCountryCode", country_code="TST", country_name="TEST COUNTRY")
baker.make("references.RefCountryCode", country_code="USA", country_name="UNITED STATES")

baker.make("recipient.StateData", id="1", code="CO", name="Colorado", year=2017)
baker.make("recipient.StateData", id="2", code="CA", name="California", year=2017)
baker.make("recipient.StateData", id="3", code="TX", name="Texas", year=2017)
baker.make("recipient.StateData", id="4", code="IL", name="Illinois", year=2017)
baker.make("recipient.StateData", id="5", code="OK", name="Oklahoma", year=2017)
baker.make("recipient.StateData", id="6", name="MISSOURI", code="MO", year=2017)
baker.make("recipient.StateData", id="7", name="KANSAS", code="KS", year=2017)

baker.make(
"references.CityCountyStateCode",
id=1,
feature_name="Denver",
state_alpha="CO",
state_numeric="08",
county_numeric="100",
)
baker.make(
"references.CityCountyStateCode",
id=2,
feature_name="Texas A City",
state_alpha="TX",
state_numeric="48",
county_numeric="101",
)
baker.make(
"references.CityCountyStateCode",
id=3,
feature_name="Texas B City",
state_alpha="TX",
state_numeric="48",
county_numeric="102",
)
baker.make(
"references.CityCountyStateCode",
id=4,
feature_name="Texas C City",
state_alpha="IL",
state_numeric="17",
county_numeric="103",
)
baker.make(
"references.CityCountyStateCode",
id=5,
feature_name="Texas D City",
state_alpha="OK",
state_numeric="40",
county_numeric="104",
)
baker.make(
"references.CityCountyStateCode",
id=6,
feature_name="Texas E City",
state_alpha="TX",
state_numeric="48",
county_numeric="105",
)
baker.make(
"references.CityCountyStateCode",
id=7,
feature_name="Texas F City",
state_alpha="TX",
state_numeric="48",
county_numeric="106",
)
baker.make(
"references.CityCountyStateCode",
id=8,
county_name="Los Angeles",
state_alpha="CA",
state_numeric="06",
county_numeric="107",
)

See test_demo_elasticsearch_tests.py for sample usage.
"""
elastic_search_index = TestElasticSearchIndex("location")
with override_settings(
ES_LOCATIONS_QUERY_ALIAS_PREFIX=elastic_search_index.alias_prefix,
ES_LOCATIONS_WRITE_ALIAS=elastic_search_index.etl_config["write_alias"],
):
yield elastic_search_index
elastic_search_index.delete_index()
baker.make("references.ZipsGrouped", zips_grouped_id=1, zip5="90210", state_abbreviation="CA")
baker.make("references.ZipsGrouped", zips_grouped_id=2, zip5="90211", state_abbreviation="CA")


@pytest.fixture
def world_cities_delta_table(s3_unittest_data_bucket):
test_data = [
[
"city",
"city_ascii",
"city_alt",
"city_local",
"city_local_lang",
"lat",
"lng",
"country",
"iso2",
"iso3",
"admin_name",
"admin_name_ascii",
"admin_code",
"admin_type",
"capital",
"density",
"population",
"population_proper",
"ranking",
"timezone",
"same_name",
"id",
],
[
"test_city",
"test_city_ascii",
"test_city_alt|another test city|hello world",
"test_city_local",
"test_city_local_lang",
Decimal("90.0000"),
Decimal("180.0000"),
"test_country",
"test_iso2",
"TST",
"test_admin_name",
"test_admin_name_ascii",
"test_admin_code",
"test_admin_type",
"test_capital",
10.0,
100_000,
100_000,
"test_ranking",
"test_timezone",
False,
1234,
],
[
"COPENHAGEN",
"test_city_ascii",
"",
"test_city_local",
"test_city_local_lang",
Decimal("90.0000"),
Decimal("180.0000"),
"test_country",
"test_iso2",
"DNK",
"test_admin_name",
"test_admin_name_ascii",
"test_admin_code",
"test_admin_type",
"test_capital",
10.0,
100_000,
100_000,
"test_ranking",
"test_timezone",
False,
1234,
],
[
"PARIS",
"test_city_ascii",
"",
"test_city_local",
"test_city_local_lang",
Decimal("90.0000"),
Decimal("180.0000"),
"test_country",
"test_iso2",
"FRA",
"test_admin_name",
"test_admin_name_ascii",
"test_admin_code",
"test_admin_type",
"test_capital",
10.0,
100_000,
100_000,
"test_ranking",
"test_timezone",
False,
1234,
],
]

with tempfile.NamedTemporaryFile(mode="w") as f:
for row in test_data:
f.write(",".join(str(v) for v in row) + "\n")
f.flush()
f.seek(0)
call_command(
"load_csv_to_delta",
"--destination-table=world_cities",
f"--source-path={f.name}",
f"--spark-s3-bucket={s3_unittest_data_bucket}",
)


@pytest.fixture
def transaction_search_delta_table(spark):
spark.sql("CREATE DATABASE IF NOT EXISTS rpt")
transaction_search_df = spark.createDataFrame(
[
{
"pop_state_code": "MO",
"pop_congressional_code_current": "01",
"recipient_location_state_code": "MO",
"recipient_location_congressional_code_current": "01",
"pop_congressional_code": "01",
"recipient_location_congressional_code": "01",
},
{
"pop_state_code": "KS",
"pop_congressional_code_current": "01",
"recipient_location_state_code": "KS",
"recipient_location_congressional_code_current": "01",
"pop_congressional_code": "01",
"recipient_location_congressional_code": "01",
},
{
"pop_state_code": "CA",
"pop_congressional_code_current": "34",
"recipient_location_state_code": "CA",
"recipient_location_congressional_code_current": "34",
"pop_congressional_code": "34",
"recipient_location_congressional_code": "34",
},
{
"pop_state_code": "CA",
"pop_congressional_code_current": "34",
"recipient_location_state_code": "CA",
"recipient_location_congressional_code_current": "34",
"pop_congressional_code": "34",
"recipient_location_congressional_code": "34",
},
]
)
transaction_search_df.write.saveAsTable("rpt.transaction_search")


@pytest.fixture
def elasticsearch_location_index(
location_data_fixture,
world_cities_delta_table,
transaction_search_delta_table,
spark,
s3_unittest_data_bucket,
hive_unittest_metastore_db,
):
for rds_ref_table in _USAS_RDS_REF_TABLES:
try:
baker.make(rds_ref_table)
except IntegrityError:
pass
create_ref_temp_views(spark)
index_name = f"{uuid.uuid4()}-test-locations"
client = instantiate_elasticsearch_client()
try:
call_command(
"elasticsearch_indexer_for_spark", create_new_index=True, load_type="location", index_name=index_name
)
yield client
except Exception as e:
raise e
finally:
client.indices.delete(index_name, ignore_unavailable=True)


@pytest.fixture(scope="session")
Expand Down
5 changes: 0 additions & 5 deletions usaspending_api/conftest_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
create_award_type_aliases,
execute_sql_statement,
transform_award_data,
transform_location_data,
transform_subaward_data,
transform_transaction_data,
)
Expand Down Expand Up @@ -205,8 +204,6 @@ def _add_contents(self, **options):
records = transform_transaction_data(self.worker, records)
elif self.index_type == "subaward":
records = transform_subaward_data(self.worker, records)
elif self.index_type == "location":
records = transform_location_data(self.worker, records)

for record in records:
# Special cases where we convert array of JSON to an array of strings to avoid nested types
Expand Down Expand Up @@ -237,8 +234,6 @@ def _generate_index_name(self):
required_suffix = "-" + settings.ES_SUBAWARD_NAME_SUFFIX
elif self.index_type == "recipient":
required_suffix = "-" + settings.ES_RECIPIENTS_NAME_SUFFIX
elif self.index_type == "location":
required_suffix = "-" + settings.ES_LOCATIONS_NAME_SUFFIX
return (
f"test-{datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-%f')}"
f"-{generate_random_string()}"
Expand Down
Loading