more examples, e.g. Top10 and Excel upload

entorb · entorb · commit 6109f04b4659 · 2025-07-04T20:53:08.000+02:00
diff --git a/src/reports/r01_text.py b/src/reports/r01_text.py
@@ -22,8 +22,8 @@
 cols[2].subheader("Col3")
 cols[2].write(DUMMY_TEXT)
 
-st.subheader("1/3 and 3/4 columns with only the first used")
-cols = st.columns([1, 3])  # 1/3 and 3/4 columns
+st.subheader("1/3 and 2/3 columns with only the first used")
+cols = st.columns((1, 2))  # 1/3 and 2/3 columns
 cols[0].subheader("Col1")
 cols[0].write(DUMMY_TEXT)
 
@@ -36,9 +36,25 @@
 
 cols[1].subheader("Markdown")
 cols[1].markdown("""
-This is a **markdown** text with *italics* and `code` and [link](https://streamlit.io).
+This is a markdown text with **bold**,
+*italics*, `code`, [link](https://streamlit.io).
 """)
 
+st.columns(1)
+
+st.subheader("Code")
+cols = st.columns(3)
+
+s = """# Some comment
+print("Hello, world!")
+"""
+cols[0].write("Plain format")
+cols[0].code(s, language=None)
+cols[1].write("Python highlighting")
+cols[1].code(s, language="python")
+cols[2].write("CSV")
+cols[2].code("Column 1,Column 2\n12.3,23.4", language="csv")
+
 
 st.subheader("LaTeX")
 # 2 equals 1
diff --git a/src/reports/r04_tables.py b/src/reports/r04_tables.py
@@ -1,5 +1,7 @@
 """Tables."""
 
+import datetime as dt
+
 import numpy as np
 import pandas as pd
 import streamlit as st
@@ -20,7 +22,7 @@
     }
 )
 
-st.header("simple tables")
+st.header("Simple Tables")
 st.subheader("st.write")
 st.write(df)
 
@@ -45,3 +47,56 @@
     column_order=["url", "date", "value"],
     use_container_width=False,
 )
+
+st.header("From raw data to Top 10")
+
+
+def gen_random_data(rows: int = 100) -> pd.DataFrame:
+    """Generate random data."""
+    rng = np.random.default_rng()
+    dt_offset = dt.datetime(2025, 1, 1, 0, 0, 0, tzinfo=None)  # noqa: DTZ001
+    names = ["cat 1", "cat 2", "cat 3"]
+    data = [
+        {
+            "datetime": dt_offset
+            + dt.timedelta(seconds=int(rng.integers(0, 86400) * 31)),
+            "category": rng.choice(names),
+            "value": int(rng.integers(0, 100)),
+        }
+        for _ in range(rows)
+    ]
+    df = pd.DataFrame(data).sort_values("datetime").reset_index(drop=True)
+    return df
+
+
+def enrich_data(df: pd.DataFrame) -> pd.DataFrame:
+    """Enrich the dataframe by extracting data of datetime column."""
+    df["year"] = df["datetime"].dt.year
+    df["month"] = df["datetime"].dt.month
+    df["day"] = df["datetime"].dt.day
+    df["hour"] = df["datetime"].dt.hour
+    df["value_rounded"] = df["value"].round(-1)
+    return df
+
+
+df = gen_random_data()
+st.subheader("Raw data")
+st.dataframe(df, hide_index=True)
+
+df = enrich_data(df)
+
+st.subheader("Top10")
+rel_cols = st.multiselect(
+    label="column",
+    options=["category", "day", "hour", "value_rounded"],
+    default=["category", "hour", "value_rounded"],
+)
+
+if rel_cols:
+    cols = st.columns(len(rel_cols))
+    for i, col in enumerate(rel_cols):
+        cols[i].write(col.title().replace("_", " "))
+        df2 = df.groupby(col).size().to_frame("count")
+        cols[i].dataframe(
+            df2.head(10).sort_values(["count", col], ascending=[False, True])
+        )
diff --git a/src/reports/r05_data_editor.py b/src/reports/r05_data_editor.py
@@ -31,6 +31,6 @@
     df2["Lat"] = df2["Lat"].clip(lower=-180, upper=180).round(4)
     df2["Lng"] = df2["Lng"].clip(lower=-90, upper=90).round(4)
     df2 = df2.sort_values("Name")
-    p = Path("filename.txt")
+    p = Path("out/filename.csv")
     df2.to_csv(p, sep="\t", index=False, header=True, lineterminator="\n")
-    st.rerun()
+    st.write(f"written to `{p}`")
diff --git a/src/reports/r11_file_download.py b/src/reports/r11_file_download.py
@@ -8,8 +8,6 @@
 
 from helper import filename_to_title, get_logger_from_filename
 
-# pip install xlsxwriter
-
 logger = get_logger_from_filename(__file__)
 
 st.title(filename_to_title(__file__))
@@ -19,7 +17,9 @@
 st.write("Prepare step makes sense if calculations are needed.")
 cols = st.columns((1, 1, 6))
 if cols[0].button(label="File Prepare"):
-    cont = """This is the content of the text file."""
+    cont = """This is the content
+of the text file."""
+    st.code(cont, language=None)
     buffer = io.BytesIO()
     buffer.write(cont.encode("utf-8"))
     buffer.seek(0)
@@ -44,6 +44,7 @@
 )
 cols = st.columns((1, 1, 6))
 if cols[0].button(label="Excel Prepare"):
+    st.dataframe(df, hide_index=True)
     buffer = io.BytesIO()
     with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
         df.to_excel(writer, sheet_name="Sheet1", index=False)
diff --git a/src/reports/r12_file_upload.py b/src/reports/r12_file_upload.py
@@ -3,7 +3,8 @@
 import re
 from io import StringIO
 
-import chardet  # pip install chardet
+import chardet
+import pandas as pd
 import streamlit as st
 
 from helper import filename_to_title, get_logger_from_filename
@@ -20,17 +21,38 @@ def guess_encoding(raw_data: bytes) -> str:  # noqa: D103
     return "utf-8"  # as fallback
 
 
-st.header("Upload a text file (and guess encoding)")
+st.header("Upload text file (and guess encoding)")
 
 st.write("set `maxUploadSize` in config.toml")
 
-uploaded_file = st.file_uploader("Upload file", type="txt")
+uploaded_txt = st.file_uploader("Upload file", type="txt")
 
-if uploaded_file:
-    raw_data = uploaded_file.getvalue()
+if uploaded_txt:
+    raw_data = uploaded_txt.getvalue()
     encoding = guess_encoding(raw_data)
     s = StringIO(raw_data.decode(encoding)).read()
     # some cleanup
     s = s.replace("\r", "")
     s = re.sub(r"[\n\s]+$", "", s)
-    st.write(s.split("\n"))
+    # st.write(s.split("\n"))
+    st.code(s, language=None)
+
+
+st.header("Upload Excel file")
+st.write("this requires `pip install openpyxl`")
+uploaded_xlsx = st.file_uploader("Upload Excel file", type="xlsx")
+
+if uploaded_xlsx:
+    df = pd.read_excel(uploaded_xlsx, engine="openpyxl")
+    # , sheet_name=
+    # , usecols=
+    st.dataframe(df, hide_index=True)
+
+st.subheader("Alternative: read Excel from file")
+st.code(
+    """
+p = Path("path/to/local.xlsx")
+df = pd.read_excel(p, engine="openpyxl")
+""",
+    language="python",
+)