All the updates

sfc-gh-tkipkemboi · Feb 3, 2023 · 7949cde · 7949cde
1 parent 31c04df
commit 7949cde
Show file tree

Hide file tree

Showing 11 changed files with 1,294 additions and 175 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .streamlit/secrets.toml
 cli/.streamlit/secrets.toml
-*.db
+*.db
+/test*
diff --git a/Home.py b/Home.py
@@ -10,8 +10,7 @@
 """
 **Landing page and demo for the st.connection Private Preview.**
 
-* This app is running on [this commit](https://github.com/streamlit/streamlit/tree/f4f6c110f7b571ff122f7f7602603c8b324623f8).
-The full PR is [here](https://github.com/streamlit/streamlit/pull/6035).
+* The full PR is [here](https://github.com/streamlit/streamlit/pull/6035).
 * Install the latest .whl file from [here](https://core-previews.s3-us-west-2.amazonaws.com/pr-6035/streamlit-1.17.0-py2.py3-none-any.whl).
 """
 

diff --git a/Pipfile b/Pipfile
@@ -4,12 +4,15 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-psycopg2-binary = "*"
 toml = "*"
 watchdog = "*"
 pyarrow = {version = "==8.0.0", extras = ["pandas"]}
 sqlalchemy = "==1.4"
 duckdb = "*"
+fsspec = "*"
+s3fs = "*"
+gcsfs = "*"
+snowflake-snowpark-python = {extras = ["pandas"], version = "*"}
 streamlit = {path = "./streamlit-1.17.0-py2.py3-none-any.whl"}
 
 [dev-packages]

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/pages/01_SQL.py → pages/01_🏰_SQL.py b/pages/01_SQL.py → pages/01_🏰_SQL.py
diff --git a/pages/02_❄️_Snowpark.py b/pages/02_❄️_Snowpark.py
@@ -0,0 +1,64 @@
+import streamlit as st
+
+from snowflake.snowpark.functions import col
+import pandas as pd
+
+st.set_page_config(
+    page_title='st.connection PrPr - Snowpark',
+    page_icon='🔌'
+)
+
+st.title('🔌 st.connection PrPr - Snowpark')
+
+st.markdown("""
+See the <a href='/Detailed_Docs#snowpark-connection' target='_self'>Detailed Docs</a> for quickstart, install instructions and the full API reference.
+""", unsafe_allow_html=True)
+
+"Snowpark connection API is shown here, but won't work in the Cloud app since it needs local Snowflake credentials."
+
+"If you have Snowflake credentials, you can clone the app locally and use it with your existing creds."
+
+run_the_code = st.checkbox("Try running the code (requires local snowflake creds)")
+
+st.subheader("Initialize a connection")
+with st.echo():
+    if run_the_code:
+        conn = st.connection('snowpark')
+
+        conn
+
+st.subheader("read_sql() for convenience")
+
+"`conn.read_sql()` will cache by default and return a pandas dataframe."
+
+with st.expander("⚠️ **NOTE:** On read_sql and native Snowpark dataframes"):
+    """
+    If you do further calculations on the dataframe returned this way, these will run in the app,
+    and may be slower than performing them natively in a Snowpark DataFrame. This approach is good for
+    prototyping and fast app building, but consider moving to the Session() API below for high use apps
+    or apps that use a lot of data.
+    """
+
+with st.echo():
+    if run_the_code:
+        query = """
+            select 50 as high_fives, 25 as fist_bumps, 'Q1' as quarter
+            union
+            select 20 as high_fives, 35 as fist_bumps, 'Q2' as quarter
+            union
+            select 60 as high_fives, 30 as fist_bumps, 'Q3' as quarter
+        """
+        df = conn.read_sql(query)
+        st.dataframe(df)
+
+st.subheader("session() for full operations")
+"Use `conn.session()` to get the underlying Snowpark Session for more advanced (and often faster) operations."
+
+"You may want to wrap this in a function with `@st.cache_data` to be even faster!"
+with st.echo():
+    if run_the_code:
+        with conn.session() as session:
+            local_df = pd.DataFrame({"OWNER": ["jerry", "barbara", "alex"], "PET": ["fish", "cat", "puppy"], "COUNT": [4, 2, 1]})
+            snow_df = session.create_dataframe(local_df)
+            snow_df = snow_df.filter(col('COUNT') > 1)
+            st.dataframe(snow_df)
diff --git a/pages/03_🗂️_Files.py b/pages/03_🗂️_Files.py
@@ -0,0 +1,249 @@
+import streamlit as st
+
+import os
+from tempfile import NamedTemporaryFile
+import pandas as pd
+
+st.set_page_config(
+    page_title='st.connection PrPr - Files',
+    page_icon='🔌'
+)
+
+st.title('🔌 st.connection PrPr - Files')
+
+st.markdown("""
+See the <a href='/Detailed_Docs#file-connection' target='_self'>Detailed Docs</a> for quickstart, install instructions and the full API reference.
+""", unsafe_allow_html=True)
+
+df = pd.DataFrame({"Owner": ["jerry", "barbara", "alex"], "Pet": ["fish", "cat", "puppy"], "Count": [4, 2, 1]})
+
+local, s3, s3_other, gcs, gcs_other = st.tabs(
+    [
+        "Local files",
+        "S3 files",
+        "S3 files (other credentials)",
+        "GCS files",
+        "GCS files (other credentials)",
+    ]
+)
+with local:
+    st.write("## Working with local files")
+    with st.echo():
+        conn = st.connection('files')
+        text_file = "test.txt"
+        csv_file = "test.csv"
+        parquet_file = "test.parquet"
+
+        st.write("## Text files")
+        with conn.open(text_file, "wt") as f:
+            f.write("This is a test")
+
+        st.write(conn.read_text(text_file))
+
+        st.write("## CSV Files")
+        with conn.open(csv_file, "wt") as f:
+            df.to_csv(f, index=False)
+
+        st.write(conn.read_csv(csv_file))
+
+        st.write("## Parquet files")
+        with conn.open(parquet_file, "wb") as f:
+            df.to_parquet(f)
+
+        st.write(conn.read_parquet(parquet_file))
+
+
+with s3:
+    st.write("## Working with S3 files")
+    st.write("Credentials are stored in secrets.toml")
+
+    st.code(
+        """
+# In secrets.toml
+[connections.s3]
+protocol = "s3"
+key = "..."
+secret = "..."
+    """,
+        language="toml",
+    )
+
+    with st.echo():
+        conn = st.connection('s3')
+
+        text_file = "st-connection-test/test.txt"
+        csv_file = "st-connection-test/test.csv"
+        parquet_file = "st-connection-test/test.parquet"
+
+        st.write("## Text files")
+
+        try:
+            st.write(conn.read_text(text_file))
+        except FileNotFoundError:
+            with conn.open(text_file, "wt") as f:
+                f.write("This is a test")
+            st.write(conn.read_text(text_file))
+
+        st.write("## CSV Files")
+        try:
+            st.write(conn.read_csv(csv_file))
+        except FileNotFoundError:
+            with conn.open(csv_file, "wt") as f:
+                df.to_csv(f, index=False)
+            st.write(conn.read_csv(csv_file))
+
+        st.write("## Parquet Files")
+        try:
+            st.write(conn.read_parquet(parquet_file))
+        except FileNotFoundError:
+            with conn.open(parquet_file, "wb") as f:
+                df.to_parquet(f)
+            st.write(conn.read_parquet(parquet_file))
+
+with s3_other:
+    st.write("## Working with S3 files")
+
+    # HACK to get the environment variables set
+    secrets = st.secrets["connections"]["s3"]
+
+    os.environ["AWS_ACCESS_KEY_ID"] = secrets["key"]
+    os.environ["AWS_SECRET_ACCESS_KEY"] = secrets["secret"]
+
+    st.write(
+        "Credentials stored in `~/.aws/config` or `AWS_ACCESS_KEY_ID` & "
+        "`AWS_SECRET_ACCES_KEY` environment variables"
+    )
+
+    with st.echo():
+        conn = st.connection('s3', name="s3-other")
+
+        text_file = "st-connection-test/test2.txt"
+        csv_file = "st-connection-test/test2.csv"
+        parquet_file = "st-connection-test/test2.parquet"
+
+        st.write("## Text files")
+        try:
+            st.write(conn.read_text(text_file))
+        except FileNotFoundError:
+            with conn.open(text_file, "wt") as f:
+                f.write("This is a test")
+            st.write(conn.read_text(text_file))
+
+        st.write("## CSV Files")
+        try:
+            st.write(conn.read_csv(csv_file))
+        except FileNotFoundError:
+            with conn.open(csv_file, "wt") as f:
+                df.to_csv(f, index=False)
+            st.write(conn.read_csv(csv_file))
+
+        st.write("## Parquet Files")
+        try:
+            st.write(conn.read_parquet(parquet_file))
+        except FileNotFoundError:
+            with conn.open(parquet_file, "wb") as f:
+                df.to_parquet(f)
+            st.write(conn.read_parquet(parquet_file))
+
+
+with gcs:
+    st.write("## Working with Google Cloud Storage files")
+    st.write("Credentials are set in secrets.toml")
+
+    st.code(
+        """
+# In secrets.toml
+[connections.gcs]
+protocol = "gcs"
+type = "..."
+project_id = "..."
+private_key_id = "..."
+private_key = "-----BEGIN PRIVATE KEY-----\n..."
+client_email = "..."
+client_id = "..."
+auth_uri = "https://accounts.google.com/o/oauth2/auth"
+token_uri = "https://oauth2.googleapis.com/token"
+auth_provider_x509_cert_url = "https://www.googleapis.com/oauth2/v1/certs"
+client_x509_cert_url = "..."
+    """,
+        language="toml",
+    )
+
+    with st.echo():
+        conn = st.connection('gcs')
+
+        text_file = "st-connection-test/test.txt"
+        csv_file = "st-connection-test/test.csv"
+        parquet_file = "st-connection-test/test.parquet"
+
+        st.write("## Text files")
+
+        try:
+            st.write(conn.read_text(text_file))
+        except FileNotFoundError:
+            with conn.open(text_file, "wt") as f:
+                f.write("This is a test")
+            st.write(conn.read_text(text_file))
+
+        st.write("## CSV Files")
+        try:
+            st.write(conn.read_csv(csv_file))
+        except FileNotFoundError:
+            with conn.open(csv_file, "wt") as f:
+                df.to_csv(f, index=False)
+            st.write(conn.read_csv(csv_file))
+
+        st.write("## Parquet Files")
+        try:
+            st.write(conn.read_parquet(parquet_file))
+        except FileNotFoundError:
+            with conn.open(parquet_file, "wb") as f:
+                df.to_parquet(f)
+            st.write(conn.read_parquet(parquet_file))
+
+with gcs_other:
+    "## Working with Google Cloud Storage files"
+    st.write("Credentials are provided by a path to a service account json file")
+
+    connection_details = dict(st.secrets["connections"]["gcs"])
+
+    del connection_details["protocol"]
+
+    with NamedTemporaryFile("w+", suffix=".json") as f:
+        import json
+
+        json.dump(connection_details, f)
+        f.seek(0)
+
+        credentials_file_name = f.name
+
+        with st.echo():
+            conn = st.connection('gcs', name="gcs-other", token=credentials_file_name)
+
+            text_file = "st-connection-test/test4.txt"
+            csv_file = "st-connection-test/test4.csv"
+            parquet_file = "st-connection-test/test4.parquet"
+
+            st.write("## Text files")
+            try:
+                st.write(conn.read_text(text_file))
+            except FileNotFoundError:
+                with conn.open(text_file, "wt") as f:
+                    f.write("This is a test")
+                st.write(conn.read_text(text_file))
+
+            st.write("## CSV Files")
+            try:
+                st.write(conn.read_csv(csv_file))
+            except FileNotFoundError:
+                with conn.open(csv_file, "wt") as f:
+                    df.to_csv(f, index=False)
+                st.write(conn.read_csv(csv_file))
+
+            st.write("## Parquet Files")
+            try:
+                st.write(conn.read_parquet(parquet_file))
+            except FileNotFoundError:
+                with conn.open(parquet_file, "wb") as f:
+                    df.to_parquet(f)
+                st.write(conn.read_parquet(parquet_file))
diff --git a/pages/04_Build_your_own.py → pages/04_🏗️_Build_your_own.py b/pages/04_Build_your_own.py → pages/04_🏗️_Build_your_own.py