Skip to content

Commit

Permalink
ux update
Browse files Browse the repository at this point in the history
  • Loading branch information
XiangpengHao committed Dec 1, 2024
1 parent f49f0a0 commit b6402a7
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 81 deletions.
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
# Parquet Viewer

Online at: https://parquet-viewer.haoxp.xyz
Online at: https://parquet-viewer.xiangpeng.systems

It compiles [parquet-rs](https://github.com/apache/arrow-rs) to WebAssembly and uses it to explore Parquet files, [more details](https://blog.haoxp.xyz/posts/parquet-viewer/).
### Features

- View Parquet metadata ✅
- Explore Parquet data with SQL ✅
- Ask questions about Parquet data with natural language ✅
- View Parquet files from local file system, S3, and URLs ✅
- Everything runs in the browser, no data upload ✅

### Demo

![screenshot](doc/parquet-viewer.gif)



## Development

It compiles [parquet-rs](https://github.com/apache/arrow-rs) to WebAssembly and uses it to explore Parquet files, [more details](https://blog.haoxp.xyz/posts/parquet-viewer/).


Checkout the awesome [Leptos](https://github.com/leptos-rs/leptos) framework.

```bash
Expand Down
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
<meta property="og:title" content="Parquet Viewer - View and Query Parquet Files" />
<meta property="og:description" content="A web-based Parquet file viewer and query tool." />
<meta property="og:type" content="website" />
<meta property="og:url" content="https://parquet-viewer.haoxp.xyz" />
<meta property="og:url" content="https://parquet-viewer.xiangpeng.systems" />

<script src="https://cdn.tailwindcss.com"></script>
<script>
Expand Down
14 changes: 9 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ fn App() -> impl IntoView {
let (file_content, set_file_content) = create_signal(None::<ParquetInfo>);
let (error_message, set_error_message) = create_signal(Option::<String>::None);
let (file_bytes, set_file_bytes) = create_signal(None::<Bytes>);
let (user_query, set_user_query) = create_signal(String::new());
let (sql_query, set_sql_query) = create_signal(String::new());
let (query_result, set_query_result) = create_signal(Vec::<arrow::array::RecordBatch>::new());
let (file_name, set_file_name) = create_signal(String::from("uploaded"));
Expand All @@ -187,10 +188,11 @@ fn App() -> impl IntoView {
};

wasm_bindgen_futures::spawn_local(async move {
match execute_query_async(query, bytes, table_name, parquet_info).await {
match execute_query_async(query.clone(), bytes, table_name, parquet_info).await {
Ok((results, physical_plan)) => {
set_physical_plan.set(Some(physical_plan));
set_query_result.set(results);
set_sql_query.set(query);
}
Err(e) => set_error_message.set(Some(e)),
}
Expand All @@ -211,6 +213,7 @@ fn App() -> impl IntoView {
set_file_bytes.set(Some(bytes.clone()));
let default_query =
format!("select * from \"{}\" limit 10", file_name.get_untracked());
set_user_query.set(default_query.clone());
set_sql_query.set(default_query.clone());
execute_query(default_query);
}
Expand Down Expand Up @@ -263,7 +266,7 @@ fn App() -> impl IntoView {
"Tips:" <ul class="list-disc ml-6 mt-2 space-y-1">
<li>"Make sure the URL has CORS enabled."</li>
<li>
"If query with natural language, make sure to set the Gemini API key (free tier is enough)."
"If query with natural language, make sure to set the Anthropic API key."
</li>
<li>
"I usually download the file and use the file picker above."
Expand All @@ -284,8 +287,8 @@ fn App() -> impl IntoView {

view! {
<QueryInput
sql_query=sql_query
set_sql_query=set_sql_query
user_query=user_query
set_user_query=set_user_query
file_name=file_name
execute_query=Arc::new(execute_query)
schema=info.schema
Expand All @@ -307,7 +310,8 @@ fn App() -> impl IntoView {
let physical_plan = physical_plan.get().unwrap();
view! {
<QueryResults
sql_query=sql_query.get_untracked()
sql_query=sql_query.get()
set_user_query=set_user_query
query_result=result
physical_plan=physical_plan
/>
Expand Down
112 changes: 41 additions & 71 deletions src/query_input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ pub(crate) async fn execute_query_inner(

#[component]
pub fn QueryInput(
sql_query: ReadSignal<String>,
set_sql_query: WriteSignal<String>,
user_query: ReadSignal<String>,
set_user_query: WriteSignal<String>,
error_message: WriteSignal<Option<String>>,
file_name: ReadSignal<String>,
execute_query: Arc<dyn Fn(String)>,
Expand All @@ -95,15 +95,15 @@ pub fn QueryInput(
.local_storage()
.unwrap()
.unwrap()
.get_item("gemini_api_key")
.get_item("claude_api_key")
.unwrap()
.unwrap_or_default()
});

create_effect(move |_| {
if let Some(window) = web_sys::window() {
if let Ok(Some(storage)) = window.local_storage() {
let _ = storage.set_item("gemini_api_key", &api_key.get());
let _ = storage.set_item("claude_api_key", &api_key.get());
}
}
});
Expand All @@ -115,13 +115,12 @@ pub fn QueryInput(
let file_name_s = file_name.get_untracked();
let key_down = move |ev: web_sys::KeyboardEvent| {
if ev.key() == "Enter" {
let input = sql_query.get_untracked();
let input = user_query.get_untracked();
process_user_input(
input,
key_down_schema.clone(),
file_name_s.clone(),
key_down_exec.clone(),
set_sql_query.clone(),
api_key.get_untracked(),
error_message.clone(),
);
Expand All @@ -132,13 +131,12 @@ pub fn QueryInput(
let button_press_schema = schema.clone();
let file_name_s = file_name.get_untracked();
let button_press = move |_ev: web_sys::MouseEvent| {
let input = sql_query.get_untracked();
let input = user_query.get_untracked();
process_user_input(
input,
button_press_schema.clone(),
file_name_s.clone(),
key_down_exec.clone(),
set_sql_query.clone(),
api_key.get_untracked(),
error_message.clone(),
);
Expand All @@ -157,10 +155,10 @@ pub fn QueryInput(
<div class="flex flex-col gap-2">
<label class="text-sm text-gray-600">
<a
href="https://aistudio.google.com/app/apikey"
href="https://console.anthropic.com/settings/keys"
class="text-blue-500 hover:text-blue-700 underline"
>
Gemini API
Anthropic API
</a>
Key
</label>
Expand All @@ -179,7 +177,8 @@ pub fn QueryInput(
<input
type="text"
placeholder=default_query
on:input=move |ev| set_sql_query(event_target_value(&ev))
on:input=move |ev| set_user_query(event_target_value(&ev))
prop:value=user_query
on:keydown=key_down
class="flex-1 px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500"
/>
Expand All @@ -206,14 +205,12 @@ fn process_user_input(
schema: SchemaRef,
file_name: String,
exec: Arc<dyn Fn(String)>,
set_sql_query: WriteSignal<String>,
api_key: String,
error_message: WriteSignal<Option<String>>,
) {
// if the input seems to be a SQL query, return it as is
if input.starts_with("select") || input.starts_with("SELECT") {
exec(input.clone());
set_sql_query(input);
return;
}

Expand All @@ -232,17 +229,16 @@ fn process_user_input(
let prompt = prompt.clone();
let api_key = api_key.clone();
async move {
let sql = match generate_sql_via_gemini(prompt, api_key).await {
let sql = match generate_sql_via_claude(prompt, api_key).await {
Ok(response) => response,
Err(e) => {
web_sys::console::log_1(&e.clone().into());
let gemini_error = format!("Failed to generate SQL through Gemini: {}", e);
error_message.set(Some(gemini_error));
let claude_error = format!("Failed to generate SQL through Claude: {}", e);
error_message.set(Some(claude_error));
return;
}
};
web_sys::console::log_1(&sql.clone().into());
set_sql_query(sql.clone());
exec(sql);
}
});
Expand All @@ -256,53 +252,38 @@ fn schema_to_brief_str(schema: SchemaRef) -> String {
field_strs.collect::<Vec<_>>().join(", ")
}

// Asynchronous function to call the Gemini API
async fn generate_sql_via_gemini(prompt: String, api_key: String) -> Result<String, String> {
// Remove the hardcoded key
let url = format!(
"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={}",
api_key
);
// Asynchronous function to call the Claude API
async fn generate_sql_via_claude(prompt: String, api_key: String) -> Result<String, String> {
let url = "https://api.anthropic.com/v1/messages";

// Build the JSON payload
let payload = json!({
"contents": [
{
"role": "user",
"parts": [
{
"text": prompt
}
]
}
],
"generationConfig": {
"temperature": 1,
"topK": 40,
"topP": 0.95,
"maxOutputTokens": 8192,
"responseMimeType": "application/json",
"responseSchema": {
"type": "object",
"properties": {
"sql": {
"type": "string"
}
}
}
}
"model": "claude-3-haiku-20240307",
"max_tokens": 1024,
"messages": [{
"role": "user",
"content": prompt
}],
"system": "You are a SQL query generator. You should only respond with the generated SQL query. Do not include any explanation, JSON wrapping, or additional text."
});

// Initialize Request
let opts = RequestInit::new();
opts.set_method("POST");
opts.set_mode(RequestMode::Cors);

// Set headers
// Update headers according to docs
let headers = Headers::new().map_err(|e| format!("Failed to create headers: {:?}", e))?;
headers
.set("Content-Type", "application/json")
.set("content-type", "application/json")
.map_err(|e| format!("Failed to set Content-Type: {:?}", e))?;
headers
.set("anthropic-version", "2023-06-01")
.map_err(|e| format!("Failed to set Anthropic version: {:?}", e))?;
headers
.set("x-api-key", &api_key)
.map_err(|e| format!("Failed to set API key: {:?}", e))?;
headers
.set("anthropic-dangerous-direct-browser-access", "true")
.map_err(|e| format!("Failed to set browser access header: {:?}", e))?;
opts.set_headers(&headers);

// Set body
Expand Down Expand Up @@ -341,7 +322,7 @@ async fn generate_sql_via_gemini(prompt: String, api_key: String) -> Result<Stri
.await
.map_err(|e| format!("JSON parsing error: {:?}", e))?;

// Parse the response to extract just the SQL query
// Simplified response parsing
let json_value: serde_json::Value = serde_json::from_str(
&js_sys::JSON::stringify(&json)
.map_err(|e| format!("Failed to stringify JSON: {:?}", e))?
Expand All @@ -350,26 +331,15 @@ async fn generate_sql_via_gemini(prompt: String, api_key: String) -> Result<Stri
)
.map_err(|e| format!("Failed to parse JSON value: {:?}", e))?;

// Navigate the JSON structure to extract the SQL
// Extract the SQL directly from the content
let sql = json_value
.get("candidates")
.get("content")
.and_then(|c| c.get(0))
.and_then(|c| c.get("content"))
.and_then(|c| c.get("parts"))
.and_then(|p| p.get(0))
.and_then(|p| p.get("text"))
.and_then(|c| c.get("text"))
.and_then(|t| t.as_str())
.ok_or("Failed to extract SQL from response")?;

// Parse the inner JSON string to get the final SQL
let sql_obj: serde_json::Value =
serde_json::from_str(sql).map_err(|e| format!("Failed to parse SQL JSON: {:?}", e))?;

let final_sql = sql_obj
.get("sql")
.and_then(|s| s.as_str())
.ok_or("Failed to extract SQL field")?
.ok_or("Failed to extract SQL from response")?
.trim()
.to_string();

Ok(final_sql)
Ok(sql)
}
7 changes: 5 additions & 2 deletions src/query_results.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@ use leptos::*;
#[component]
pub fn QueryResults(
sql_query: String,
set_user_query: WriteSignal<String>,
query_result: Vec<RecordBatch>,
physical_plan: Arc<dyn ExecutionPlan>,
) -> impl IntoView {
let (active_tab, set_active_tab) = create_signal("results".to_string());

let sql = sql_query.clone();
view! {
<div class="mt-4 p-4 bg-white border border-gray-300 rounded-md">
<div class="mb-4 p-3 bg-gray-50 rounded border border-gray-200 font-mono text-sm overflow-x-auto">
{sql_query}
<div class="mb-4 p-3 bg-gray-50 rounded border border-gray-200 font-mono text-sm overflow-x-auto cursor-pointer"
on:click=move |_| set_user_query(sql_query.to_string())>
{sql}
</div>
<div class="mb-4 border-b border-gray-300">
<button
Expand Down

0 comments on commit b6402a7

Please sign in to comment.