Skip to content

Commit

Permalink
All the updates
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jcarroll committed Feb 3, 2023
1 parent 31c04df commit 7949cde
Show file tree
Hide file tree
Showing 11 changed files with 1,294 additions and 175 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.streamlit/secrets.toml
cli/.streamlit/secrets.toml
*.db
*.db
/test*
3 changes: 1 addition & 2 deletions Home.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
"""
**Landing page and demo for the st.connection Private Preview.**
* This app is running on [this commit](https://github.com/streamlit/streamlit/tree/f4f6c110f7b571ff122f7f7602603c8b324623f8).
The full PR is [here](https://github.com/streamlit/streamlit/pull/6035).
* The full PR is [here](https://github.com/streamlit/streamlit/pull/6035).
* Install the latest .whl file from [here](https://core-previews.s3-us-west-2.amazonaws.com/pr-6035/streamlit-1.17.0-py2.py3-none-any.whl).
"""

Expand Down
5 changes: 4 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ verify_ssl = true
name = "pypi"

[packages]
psycopg2-binary = "*"
toml = "*"
watchdog = "*"
pyarrow = {version = "==8.0.0", extras = ["pandas"]}
sqlalchemy = "==1.4"
duckdb = "*"
fsspec = "*"
s3fs = "*"
gcsfs = "*"
snowflake-snowpark-python = {extras = ["pandas"], version = "*"}
streamlit = {path = "./streamlit-1.17.0-py2.py3-none-any.whl"}

[dev-packages]
Expand Down
1,090 changes: 921 additions & 169 deletions Pipfile.lock

Large diffs are not rendered by default.

File renamed without changes.
64 changes: 64 additions & 0 deletions pages/02_❄️_Snowpark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import streamlit as st

from snowflake.snowpark.functions import col
import pandas as pd

st.set_page_config(
page_title='st.connection PrPr - Snowpark',
page_icon='πŸ”Œ'
)

st.title('πŸ”Œ st.connection PrPr - Snowpark')

st.markdown("""
See the <a href='/Detailed_Docs#snowpark-connection' target='_self'>Detailed Docs</a> for quickstart, install instructions and the full API reference.
""", unsafe_allow_html=True)

"Snowpark connection API is shown here, but won't work in the Cloud app since it needs local Snowflake credentials."

"If you have Snowflake credentials, you can clone the app locally and use it with your existing creds."

run_the_code = st.checkbox("Try running the code (requires local snowflake creds)")

st.subheader("Initialize a connection")
with st.echo():
if run_the_code:
conn = st.connection('snowpark')

conn

st.subheader("read_sql() for convenience")

"`conn.read_sql()` will cache by default and return a pandas dataframe."

with st.expander("⚠️ **NOTE:** On read_sql and native Snowpark dataframes"):
"""
If you do further calculations on the dataframe returned this way, these will run in the app,
and may be slower than performing them natively in a Snowpark DataFrame. This approach is good for
prototyping and fast app building, but consider moving to the Session() API below for high use apps
or apps that use a lot of data.
"""

with st.echo():
if run_the_code:
query = """
select 50 as high_fives, 25 as fist_bumps, 'Q1' as quarter
union
select 20 as high_fives, 35 as fist_bumps, 'Q2' as quarter
union
select 60 as high_fives, 30 as fist_bumps, 'Q3' as quarter
"""
df = conn.read_sql(query)
st.dataframe(df)

st.subheader("session() for full operations")
"Use `conn.session()` to get the underlying Snowpark Session for more advanced (and often faster) operations."

"You may want to wrap this in a function with `@st.cache_data` to be even faster!"
with st.echo():
if run_the_code:
with conn.session() as session:
local_df = pd.DataFrame({"OWNER": ["jerry", "barbara", "alex"], "PET": ["fish", "cat", "puppy"], "COUNT": [4, 2, 1]})
snow_df = session.create_dataframe(local_df)
snow_df = snow_df.filter(col('COUNT') > 1)
st.dataframe(snow_df)
249 changes: 249 additions & 0 deletions pages/03_πŸ—‚οΈ_Files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
import streamlit as st

import os
from tempfile import NamedTemporaryFile
import pandas as pd

st.set_page_config(
page_title='st.connection PrPr - Files',
page_icon='πŸ”Œ'
)

st.title('πŸ”Œ st.connection PrPr - Files')

st.markdown("""
See the <a href='/Detailed_Docs#file-connection' target='_self'>Detailed Docs</a> for quickstart, install instructions and the full API reference.
""", unsafe_allow_html=True)

df = pd.DataFrame({"Owner": ["jerry", "barbara", "alex"], "Pet": ["fish", "cat", "puppy"], "Count": [4, 2, 1]})

local, s3, s3_other, gcs, gcs_other = st.tabs(
[
"Local files",
"S3 files",
"S3 files (other credentials)",
"GCS files",
"GCS files (other credentials)",
]
)
with local:
st.write("## Working with local files")
with st.echo():
conn = st.connection('files')
text_file = "test.txt"
csv_file = "test.csv"
parquet_file = "test.parquet"

st.write("## Text files")
with conn.open(text_file, "wt") as f:
f.write("This is a test")

st.write(conn.read_text(text_file))

st.write("## CSV Files")
with conn.open(csv_file, "wt") as f:
df.to_csv(f, index=False)

st.write(conn.read_csv(csv_file))

st.write("## Parquet files")
with conn.open(parquet_file, "wb") as f:
df.to_parquet(f)

st.write(conn.read_parquet(parquet_file))


with s3:
st.write("## Working with S3 files")
st.write("Credentials are stored in secrets.toml")

st.code(
"""
# In secrets.toml
[connections.s3]
protocol = "s3"
key = "..."
secret = "..."
""",
language="toml",
)

with st.echo():
conn = st.connection('s3')

text_file = "st-connection-test/test.txt"
csv_file = "st-connection-test/test.csv"
parquet_file = "st-connection-test/test.parquet"

st.write("## Text files")

try:
st.write(conn.read_text(text_file))
except FileNotFoundError:
with conn.open(text_file, "wt") as f:
f.write("This is a test")
st.write(conn.read_text(text_file))

st.write("## CSV Files")
try:
st.write(conn.read_csv(csv_file))
except FileNotFoundError:
with conn.open(csv_file, "wt") as f:
df.to_csv(f, index=False)
st.write(conn.read_csv(csv_file))

st.write("## Parquet Files")
try:
st.write(conn.read_parquet(parquet_file))
except FileNotFoundError:
with conn.open(parquet_file, "wb") as f:
df.to_parquet(f)
st.write(conn.read_parquet(parquet_file))

with s3_other:
st.write("## Working with S3 files")

# HACK to get the environment variables set
secrets = st.secrets["connections"]["s3"]

os.environ["AWS_ACCESS_KEY_ID"] = secrets["key"]
os.environ["AWS_SECRET_ACCESS_KEY"] = secrets["secret"]

st.write(
"Credentials stored in `~/.aws/config` or `AWS_ACCESS_KEY_ID` & "
"`AWS_SECRET_ACCES_KEY` environment variables"
)

with st.echo():
conn = st.connection('s3', name="s3-other")

text_file = "st-connection-test/test2.txt"
csv_file = "st-connection-test/test2.csv"
parquet_file = "st-connection-test/test2.parquet"

st.write("## Text files")
try:
st.write(conn.read_text(text_file))
except FileNotFoundError:
with conn.open(text_file, "wt") as f:
f.write("This is a test")
st.write(conn.read_text(text_file))

st.write("## CSV Files")
try:
st.write(conn.read_csv(csv_file))
except FileNotFoundError:
with conn.open(csv_file, "wt") as f:
df.to_csv(f, index=False)
st.write(conn.read_csv(csv_file))

st.write("## Parquet Files")
try:
st.write(conn.read_parquet(parquet_file))
except FileNotFoundError:
with conn.open(parquet_file, "wb") as f:
df.to_parquet(f)
st.write(conn.read_parquet(parquet_file))


with gcs:
st.write("## Working with Google Cloud Storage files")
st.write("Credentials are set in secrets.toml")

st.code(
"""
# In secrets.toml
[connections.gcs]
protocol = "gcs"
type = "..."
project_id = "..."
private_key_id = "..."
private_key = "-----BEGIN PRIVATE KEY-----\n..."
client_email = "..."
client_id = "..."
auth_uri = "https://accounts.google.com/o/oauth2/auth"
token_uri = "https://oauth2.googleapis.com/token"
auth_provider_x509_cert_url = "https://www.googleapis.com/oauth2/v1/certs"
client_x509_cert_url = "..."
""",
language="toml",
)

with st.echo():
conn = st.connection('gcs')

text_file = "st-connection-test/test.txt"
csv_file = "st-connection-test/test.csv"
parquet_file = "st-connection-test/test.parquet"

st.write("## Text files")

try:
st.write(conn.read_text(text_file))
except FileNotFoundError:
with conn.open(text_file, "wt") as f:
f.write("This is a test")
st.write(conn.read_text(text_file))

st.write("## CSV Files")
try:
st.write(conn.read_csv(csv_file))
except FileNotFoundError:
with conn.open(csv_file, "wt") as f:
df.to_csv(f, index=False)
st.write(conn.read_csv(csv_file))

st.write("## Parquet Files")
try:
st.write(conn.read_parquet(parquet_file))
except FileNotFoundError:
with conn.open(parquet_file, "wb") as f:
df.to_parquet(f)
st.write(conn.read_parquet(parquet_file))

with gcs_other:
"## Working with Google Cloud Storage files"
st.write("Credentials are provided by a path to a service account json file")

connection_details = dict(st.secrets["connections"]["gcs"])

del connection_details["protocol"]

with NamedTemporaryFile("w+", suffix=".json") as f:
import json

json.dump(connection_details, f)
f.seek(0)

credentials_file_name = f.name

with st.echo():
conn = st.connection('gcs', name="gcs-other", token=credentials_file_name)

text_file = "st-connection-test/test4.txt"
csv_file = "st-connection-test/test4.csv"
parquet_file = "st-connection-test/test4.parquet"

st.write("## Text files")
try:
st.write(conn.read_text(text_file))
except FileNotFoundError:
with conn.open(text_file, "wt") as f:
f.write("This is a test")
st.write(conn.read_text(text_file))

st.write("## CSV Files")
try:
st.write(conn.read_csv(csv_file))
except FileNotFoundError:
with conn.open(csv_file, "wt") as f:
df.to_csv(f, index=False)
st.write(conn.read_csv(csv_file))

st.write("## Parquet Files")
try:
st.write(conn.read_parquet(parquet_file))
except FileNotFoundError:
with conn.open(parquet_file, "wb") as f:
df.to_parquet(f)
st.write(conn.read_parquet(parquet_file))
File renamed without changes.
Loading

0 comments on commit 7949cde

Please sign in to comment.