Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 264: Update qviz for multiblock files #437

Merged
Merged
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
42e11d6
initial commit
jorgeMarin1 Sep 6, 2024
b0fda5d
Merge branch 'Qbeast-io:main' into qviz-bug
jorgeMarin1 Sep 10, 2024
0fe8b08
initial commit
jorgeMarin1 Sep 6, 2024
5fb0956
added process table function using delta tables
jorgeMarin1 Sep 10, 2024
5f60426
create qviz using Delta tables
jorgeMarin1 Sep 10, 2024
0ee8232
added custom table
jorgeMarin1 Sep 10, 2024
30256f8
added custom table
jorgeMarin1 Sep 10, 2024
cf9cfc3
removed ecommerce300k_2019
jorgeMarin1 Sep 10, 2024
cbcbaed
fixed visualization
jorgeMarin1 Sep 12, 2024
92cf732
deleted parquet files and added a new folder for table test
jorgeMarin1 Sep 12, 2024
e9559f7
added comments on the code
jorgeMarin1 Sep 12, 2024
ee95b87
added comments to the code
jorgeMarin1 Sep 12, 2024
2f0aea8
Merge branch 'main' into qviz-bug
jorgeMarin1 Sep 12, 2024
a267f87
deleted code and files that won't be used
jorgeMarin1 Sep 12, 2024
032077c
added unit test
jorgeMarin1 Sep 13, 2024
398494d
addressed changes
jorgeMarin1 Sep 17, 2024
353011f
addressed changes
jorgeMarin1 Sep 18, 2024
13e95a7
snake case
jorgeMarin1 Sep 18, 2024
31ac8e1
edited example of json log file
jorgeMarin1 Sep 19, 2024
8ecfdae
raised exception when failed to create delta table
jorgeMarin1 Sep 19, 2024
6f824d4
edited name of the method that extracted cubes from delta tables
jorgeMarin1 Sep 19, 2024
cb0f1a5
made sure blocks_str is indeed a string
jorgeMarin1 Sep 19, 2024
5f06c45
put definition of cube_string before if statement
jorgeMarin1 Sep 19, 2024
d565fff
added else statement for block_string
jorgeMarin1 Sep 25, 2024
0178d1e
deleted non-necessary comments
jorgeMarin1 Sep 25, 2024
a1d75a7
changed path to test table in README
jorgeMarin1 Sep 25, 2024
512dd67
default value of sampling fraction is 0.02 and that value can be modi…
jorgeMarin1 Sep 25, 2024
22ff6b5
deleted unnecessary files
jorgeMarin1 Sep 25, 2024
1faa7c2
switched to version 3.2.0 of Delta Lake, which is compatible with 3.5…
jorgeMarin1 Sep 25, 2024
cda8c6d
update size of cubes using path of blocks
jorgeMarin1 Sep 26, 2024
b407ada
WIP
Jiaweihu08 Oct 14, 2024
1b5ee6b
Remove large file
Jiaweihu08 Oct 14, 2024
aa486ca
WPI, first working version
Jiaweihu08 Oct 16, 2024
4bce299
Organize code, add tests
Jiaweihu08 Oct 16, 2024
b6855b0
Update README
Jiaweihu08 Oct 17, 2024
db3ebe9
Add comments
Jiaweihu08 Oct 17, 2024
a6b88a7
Update poetry lock and toml
Jiaweihu08 Oct 17, 2024
bc4ef3c
Update README
Jiaweihu08 Oct 17, 2024
2727e64
Correct stats computation
Jiaweihu08 Oct 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add comments
  • Loading branch information
Jiaweihu08 committed Oct 17, 2024
commit db3ebe91361da9af736d67b3595666345a80941e
20 changes: 20 additions & 0 deletions utils/visualizer/qviz/content_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@


def process_table(table_path: str, revision_id: int) -> tuple[dict, list[dict]]:
"""
Load the table and revision, and populate the tree with cubes.
The elements to be displayed are the cube nodes and edges.
"""
delta_table = DeltaTable(table_path)
revision = load_revision(delta_table, revision_id)
cubes = load_revision_cubes(delta_table, revision)
Expand All @@ -17,6 +21,9 @@ def process_table(table_path: str, revision_id: int) -> tuple[dict, list[dict]]:


def load_revision(delta_table: DeltaTable, revision_id: int) -> (dict, int):
"""
Load revision metadata from the given revision ID.
"""
config = delta_table.metadata().configuration
revision_key = f"qbeast.revision.{revision_id}"
try:
Expand All @@ -34,6 +41,10 @@ def load_revision(delta_table: DeltaTable, revision_id: int) -> (dict, int):


def load_revision_cubes(delta_table: DeltaTable, revision: dict) -> dict:
"""
Load cubes from the given revision. Each cube contains a list of blocks, and one file
can contain multiple blocks.
"""
revision_id_str = str(revision["revisionID"])
dimension_count = len(revision["columnTransformers"])
symbol_count = (dimension_count + 5) // 6
Expand All @@ -56,6 +67,9 @@ def load_revision_cubes(delta_table: DeltaTable, revision: dict) -> dict:


def populate_tree(all_cubes: dict) -> None:
"""
Establish parent-child relationships between cubes
"""
max_level = 0
level_cubes = defaultdict(list)
for cube in all_cubes.values():
Expand All @@ -69,6 +83,9 @@ def populate_tree(all_cubes: dict) -> None:


def get_nodes_and_edges(all_cubes: dict, fraction: float = -1.0) -> list[dict]:
"""
Create nodes and edges for the tree. If fraction is provided, highlight sampled cubes and print sampling details.
"""
nodes = []
edges = []
sampling_info = SamplingInfo(fraction)
Expand All @@ -84,6 +101,9 @@ def get_nodes_and_edges(all_cubes: dict, fraction: float = -1.0) -> list[dict]:


def get_node_and_edges_from_cube(cube: Cube, fraction: float) -> (dict, list[dict]):
"""
Create a node and edges for a given cube. If fraction is provided, highlight sampled cubes.
"""
selected = cube.is_sampled(fraction)
name = cube.cube_id or "root"
label = (name + " " if name == "root" else "") + str(cube.max_weight)
Expand Down