Skip to content

Commit

Permalink
Issue 264: Update qviz for multiblock files (#437)
Browse files Browse the repository at this point in the history
* Update Qbeast Visualiser (qviz) with multiblock files

---------

Co-authored-by: Jorge Marín <jorge.marin.rodenas@estudiantat.upc.edu>
Co-authored-by: Jorge Marín <100561030+jorgeMarin1@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 18, 2024
1 parent c70670f commit 1f3c4bb
Show file tree
Hide file tree
Showing 28 changed files with 853 additions and 1,218 deletions.
8 changes: 0 additions & 8 deletions utils/visualizer/Makefile

This file was deleted.

22 changes: 12 additions & 10 deletions utils/visualizer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ cd qbeast-spark/utils/visualizer
# Pyhton version should be 3.12
python3 --version

# Install tool and required dependencies
brew install poetry
# Install required dependencies
poetry install
```

Expand All @@ -28,7 +27,7 @@ Launch a `Flask` serve with the following command and open the link with a brows

```bash
# Run the tool on the test table
poetry run qviz docs/test_table/
poetry run qviz tests/resources/test_table/

# optionally, specify the index revision(defaulted to 1)
# qviz <table-path> --revision-id=2
Expand All @@ -39,17 +38,20 @@ poetry run qviz docs/test_table/
- Sampling details: when a valid value for sampling fraction is given, a set of sampling metrics are displayed.
**Only the chosen revision is taken into account for computing sampling metrics.**
```
Sampling Info:
Sampling Info:
Disclaimer:
The displayed sampling metrics are valid only for single revision indexes(excluding revision 0):
The displayed sampling metrics are only for the chosen revisionId.
The values will be different if the table contains multiple revisions.
sample fraction: 0.3
number of cubes read:8/20, 40.00%
number of rows: 41858/100000, 41.86%
sample size: 0.00141/0.00324GB, 43.49%
sample fraction: 0.02
number of rows: 751130/8000000, 9.39%
sample size: 0.04273/2.09944GB, 2.04%
```
- To visualize any table, point to the folder in which the target `_delta_log/` is contained.
- To visualize a remote table, download its `_delta_log/` and point to the folder location.

### Tests
- To run all tests: `make test`
- To run all tests:
```bash
python -m unittest discover -s tests -p "*_test.py"
```

This file was deleted.

701 changes: 348 additions & 353 deletions utils/visualizer/poetry.lock

Large diffs are not rendered by default.

52 changes: 32 additions & 20 deletions utils/visualizer/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,30 +1,42 @@
[tool.poetry]
name = "qviz"
version = "0.0.1"
description = "OTree Index Visualization dependencies"
authors = ["qbeast"]
version = "0.1.0"
description = "CI tool for OTree index visualization"
authors = ["Qbeast"]
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.12"
Brotli = "1.0.9"
click = "8.1.3"
dash = "2.17.1"
python = "^3.12.5"
blinker = "1.8.2"
certifi = "2024.8.30"
charset-normalizer = "3.4.0"
click = "8.1.7"
dash = "2.18.1"
dash-core-components = "2.0.0"
dash-cytoscape = "0.3.0"
dash-html-components = "2.0.0"
dash-table = "5.0.0"
Flask = "2.2.5"
Flask-Compress= "1.12"
importlib-metadata = "4.11.4"
itsdangerous = "2.1.2"
Jinja2 = "3.1.2"
MarkupSafe = "2.1.1"
plotly = "5.8.1"
tenacity = "8.0.1"
Werkzeug = "2.2.3"
zipp = "3.8.0"
setuptools = "65.5.1"
pyarrow = "~17.0.0"
dash-cytoscape = "1.0.2"
deltalake = "0.20.2"
flask = "3.0.3"
idna = "3.10"
importlib-metadata = "8.5.0"
itsdangerous = "2.2.0"
jinja2 = "3.1.4"
markupsafe = "3.0.1"
nest-asyncio = "1.6.0"
numpy = "2.1.2"
packaging = "24.1"
plotly = "5.24.1"
pyarrow = "17.0.0"
requests = "2.32.3"
retrying = "1.3.4"
setuptools = "75.2.0"
six = "1.16.0"
tenacity = "9.0.0"
typing-extensions = "4.12.2"
urllib3 = "2.2.3"
werkzeug = "3.0.4"
zipp = "3.20.2"

[tool.poetry.scripts]
qviz = "qviz:show_tree"
Expand Down
2 changes: 1 addition & 1 deletion utils/visualizer/qviz/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from qviz.qviz import show_tree
from qviz.qviz import show_tree as show_tree
73 changes: 73 additions & 0 deletions utils/visualizer/qviz/block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from __future__ import annotations
from dataclasses import dataclass

OFFSET = -2147483648.0 # Scala Int.MinValue
RANGE = 2147483647.0 - OFFSET


@dataclass
class File:
path: str
bytes: int
num_rows: int

@classmethod
def from_add_file_json(cls, add_file: dict) -> File:
_path = add_file["path"]
_bytes = add_file["size_bytes"]
_num_rows = add_file["num_records"]
return cls(_path, _bytes, _num_rows)

def __hash__(self) -> int:
return hash(self.path)


class Block:
def __init__(
self,
cube_id: str,
element_count: int,
min_weight: int,
max_weight: int,
file: File,
) -> None:
self.cube_id = cube_id
self.element_count = element_count
self.min_weight = self.normalize_weight(min_weight)
self.max_weight = self.normalize_weight(max_weight)
self.file = file

@staticmethod
def normalize_weight(weight: int) -> float:
"""
Map Weight to NormalizedWeight
:param weight: Weight
:return: A Weight's corresponding NormalizedWeight
"""
fraction = (weight - OFFSET) / RANGE
# We make sure fraction is within [0, 1]
normalized = max(0.0, min(1.0, fraction))
return float("{:.3f}".format(normalized))

@classmethod
def from_block_and_file(cls, block_json: dict, add_file_json: dict) -> Block:
cube_id = block_json["cubeId"]
element_count = block_json["elementCount"]
min_weight = block_json["minWeight"]
max_weight = block_json["maxWeight"]
file = File.from_add_file_json(add_file_json)
return cls(
cube_id=cube_id,
element_count=element_count,
min_weight=min_weight,
max_weight=max_weight,
file=file,
)

def is_sampled(self, fraction: float) -> bool:
"""
Determine if the cube is to be included in sampling for a given fraction
:param fraction: sampling fraction between 0 and 1
:return: boolean determining if the cube is selected
"""
return self.min_weight <= fraction
Loading

0 comments on commit 1f3c4bb

Please sign in to comment.