Skip to content

Commit

Permalink
fix: replace xlrd with pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
soofstad committed Sep 20, 2024
1 parent db7a90b commit bf07951
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 76 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
135 changes: 118 additions & 17 deletions api/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ license = "MIT"
python = "^3.12"
azure-cosmosdb-table = "^1.0.6"
numpy = "^1.26.2"
xlrd = "^2.0.1"
azure-storage-blob = "^12.19.0"
PyJWT = "^2.8.0"
cachetools = "^5.3.2"
matplotlib = "^3.8.2"
gunicorn = "^21.2.0"
Flask = "^3.0.0"
pandas = "^2.2.2"
openpyxl = "^3.1.5"

[tool.poetry.dev-dependencies]
pytest = "^7.1.3"
Expand Down
12 changes: 4 additions & 8 deletions api/src/tests/create_product_row_test.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
import unittest
from unittest import skip

from tests.utils import read_file
from util.azure_table import process_meta_blob
from util.excel import excel_raw_file_to_sheet, sheet_to_bridge_dict
from util.excel import excel_bytes_to_dataframe


@skip("No time to fix")
class CreateProductTableRow(unittest.TestCase):
@staticmethod
def test_create_row():
with open("src/test_data/metadata.csv") as meta_file:
productdata = process_meta_blob(meta_file)
process_meta_blob(meta_file)

product_bridge_file = read_file("src/test_data/flow-carb10.xlsx")
product_sheet = excel_raw_file_to_sheet(product_bridge_file)
product_data = sheet_to_bridge_dict(product_sheet)

productdata[0]["cumulative"] = product_data["cumulative"]
product_df = excel_bytes_to_dataframe(product_bridge_file)
product_df.Cumulative.to_list()
28 changes: 15 additions & 13 deletions api/src/util/azure_blobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from pathlib import Path

from azure.storage.blob import BlobProperties, ContainerClient
from xlrd.sheet import Sheet
from pandas import DataFrame, read_excel

from config import Config
from util.azure_table import process_meta_blob, sanitize_row_key
from util.excel import excel_raw_file_to_sheet, sheet_to_bridge_dict


def get_container_client() -> ContainerClient:
Expand All @@ -18,17 +17,25 @@ def get_container_client() -> ContainerClient:
)


def from_blobs_to_excel(blobs: Iterator[BlobProperties], container_client: ContainerClient) -> dict[str, Sheet]:
sheets = {}
def excel_bytes_to_dataframe(file: bytes) -> DataFrame:
file_io = io.BytesIO(file)
df = read_excel(file_io)
return df


def from_excel_blobs_to_data_frame(
blobs: Iterator[BlobProperties], container_client: ContainerClient
) -> dict[str, DataFrame]:
products: dict[str, DataFrame] = {}
for blob in blobs:
if Path(blob.name).suffix != ".xlsx":
continue
blob_client = container_client.get_blob_client(blob)
raw_blob = blob_client.download_blob().readall()
product_id = sanitize_row_key(Path(blob.name).stem)
sheets[product_id] = excel_raw_file_to_sheet(raw_blob)
products[product_id] = excel_bytes_to_dataframe(raw_blob)

return sheets
return products


def get_metadata_blob_data() -> list[dict]:
Expand All @@ -41,10 +48,5 @@ def get_metadata_blob_data() -> list[dict]:
def get_product_blobs_data() -> dict[str, dict]:
container_client = get_container_client()
all_blobs = container_client.list_blobs()
sheets = from_blobs_to_excel(all_blobs, container_client)

table_data = {}
for filename, sheet in sheets.items():
table_data[filename] = sheet_to_bridge_dict(sheet)

return table_data
dfs = from_excel_blobs_to_data_frame(all_blobs, container_client)
return {filename: {"cumulative": data_frame.Cumulative.to_list()} for filename, data_frame in dfs.items()}
37 changes: 0 additions & 37 deletions api/src/util/excel.py

This file was deleted.

0 comments on commit bf07951

Please sign in to comment.