Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions squeeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
optimized for CPU performance with SIMD vectorization and Rust backends.

Implemented algorithms:
- UMAP: Uniform Manifold Approximation and Projection
- UMAP: Uniform Manifold Approximation and Projection (Rust backend)
- UMAPRust: Rust-backed UMAP implementation (internal)
- PCA: Principal Component Analysis
- TSNE: t-Distributed Stochastic Neighbor Embedding
- MDS: Multidimensional Scaling
Expand All @@ -18,7 +19,7 @@

from warnings import catch_warnings, simplefilter, warn

from .umap_ import UMAP
from .umap import UMAP

# Import Rust-based algorithms
try:
Expand Down
Binary file modified squeeze/_hnsw_backend.abi3.so
Binary file not shown.
15 changes: 14 additions & 1 deletion squeeze/aligned_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,15 @@
from squeeze.sparse import arr_intersect as intersect1d
from squeeze.sparse import arr_union as union1d
from squeeze.spectral import spectral_layout
from squeeze.umap_ import UMAP, make_epochs_per_sample
from squeeze.umap import UMAP


def make_epochs_per_sample(weights, n_epochs):
result = -1.0 * np.ones(weights.shape[0], dtype=np.float64)
n_samples = n_epochs * (weights / weights.max())
result[n_samples > 0] = float(n_epochs) / np.float64(n_samples[n_samples > 0])
return result


if TYPE_CHECKING:
import scipy.sparse
Expand Down Expand Up @@ -557,6 +565,11 @@ def fit(
y: list[np.ndarray] | tuple[np.ndarray, ...] | np.ndarray | None = None,
**fit_params: Any,
) -> AlignedUMAP:
if getattr(UMAP, "_BACKEND", "") == "rust":
raise NotImplementedError(
"AlignedUMAP requires the removed Python UMAP backend; "
"Rust-only UMAP does not yet expose the graph state needed for alignment."
)
"""Fit aligned UMAP on multiple related datasets.

Parameters
Expand Down
16 changes: 12 additions & 4 deletions squeeze/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ def trustworthiness(
trust_sum += max(0, original_rank - k)

# Normalize
trustworthiness_score = 1 - (2 / (n_samples * k * (2 * n_samples - 3 * k - 1))) * trust_sum
trustworthiness_score = (
1 - (2 / (n_samples * k * (2 * n_samples - 3 * k - 1))) * trust_sum
)

return max(0, min(1, trustworthiness_score))

Expand Down Expand Up @@ -184,7 +186,9 @@ def continuity(
cont_sum += max(0, embedding_rank - k)

# Normalize
continuity_score = 1 - (2 / (n_samples * k * (2 * n_samples - 3 * k - 1))) * cont_sum
continuity_score = (
1 - (2 / (n_samples * k * (2 * n_samples - 3 * k - 1))) * cont_sum
)

return max(0, min(1, continuity_score))

Expand Down Expand Up @@ -340,8 +344,12 @@ def spearman_distance_correlation(

# If dataset is large, sample pairs
if sample_size is not None and len(original_distances) > sample_size:
sample_indices = np.random.choice(
len(original_distances), size=sample_size, replace=False
# Prefer deterministic sampling to reduce variance across runs.
rng = np.random.default_rng(42)
sample_indices = rng.choice(
len(original_distances),
size=sample_size,
replace=False,
)
original_distances = original_distances[sample_indices]
embedded_distances = embedded_distances[sample_indices]
Expand Down
2 changes: 1 addition & 1 deletion squeeze/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Any, Callable, Iterator

# Import all algorithms
from .umap_ import UMAP
from .umap import UMAP

try:
from ._hnsw_backend import (
Expand Down
7 changes: 6 additions & 1 deletion squeeze/tests/test_aligned_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score, pairwise_distances

from squeeze import AlignedUMAP
from squeeze import AlignedUMAP, UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"AlignedUMAP requires removed Python UMAP backend", allow_module_level=True
)

# ===============================
# Test AlignedUMAP on sliced iris
Expand Down
6 changes: 6 additions & 0 deletions squeeze/tests/test_composite_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"Composite UMAP operators require removed Python backend",
allow_module_level=True,
)

try:
# works for sklearn>=0.22
from sklearn.manifold import trustworthiness
Expand Down
5 changes: 5 additions & 0 deletions squeeze/tests/test_composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
from sklearn.decomposition import PCA

from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"UMAP composition tests target removed Python backend", allow_module_level=True
)
from squeeze.composition import AdaptiveDR, DRPipeline, EnsembleDR, ProgressiveDR


Expand Down
6 changes: 6 additions & 0 deletions squeeze/tests/test_data_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"Input-validation tests target removed Python UMAP backend",
allow_module_level=True,
)


@pytest.fixture(scope="session")
def all_finite_data():
Expand Down
5 changes: 5 additions & 0 deletions squeeze/tests/test_densmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"densMAP tests target removed Python UMAP backend", allow_module_level=True
)

try:
# works for sklearn>=0.22
from sklearn.manifold import trustworthiness
Expand Down
11 changes: 10 additions & 1 deletion squeeze/tests/test_sparse_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
import pytest
import scipy.sparse as sp

from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"SparseUMAP tests target removed Python UMAP backend", allow_module_level=True
)

from squeeze.sparse_ops import (
SparseFormatDetector,
SparseKNNGraph,
Expand Down Expand Up @@ -326,7 +333,9 @@ def test_workflow_sparse_data(self):
def test_sparse_vs_dense_equivalence(self):
"""Test that sparse and dense give similar results."""
# Create small test data
X_dense = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 1, 0], [1, 0, 1]]).astype(float)
X_dense = np.array(
[[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 1, 0], [1, 0, 1]]
).astype(float)
X_sparse = sp.csr_matrix(X_dense)

# Compute distances both ways
Expand Down
2 changes: 1 addition & 1 deletion squeeze/tests/test_umap_get_feature_names_out.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from sklearn.datasets import make_classification
from sklearn.pipeline import FeatureUnion, Pipeline

from squeeze.umap_ import UMAP
from squeeze import UMAP


def test_get_feature_names_out() -> None:
Expand Down
9 changes: 4 additions & 5 deletions squeeze/tests/test_umap_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
from sklearn.neighbors import KDTree
from sklearn.preprocessing import normalize

from squeeze import distances as dist
from squeeze.umap_ import (
nearest_neighbors,
smooth_knn_dist,
)
from squeeze import UMAP

# The legacy Python UMAP backend has been removed.
pytest.skip("Python UMAP backend removed", allow_module_level=True)

# ===================================================
# Nearest Neighbour Test cases
Expand Down
4 changes: 3 additions & 1 deletion squeeze/tests/test_umap_on_iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from sklearn.neighbors import KDTree

from squeeze import UMAP
from squeeze.umap_ import nearest_neighbors

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip("Python UMAP backend removed", allow_module_level=True)

try:
# works for sklearn>=0.22
Expand Down
3 changes: 3 additions & 0 deletions squeeze/tests/test_umap_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from sklearn.preprocessing import normalize

from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip("UMAP ops tests target removed Python backend", allow_module_level=True)
from squeeze.distances import pairwise_special_metric
from squeeze.spectral import component_layout
from squeeze.utils import disconnected_vertices
Expand Down
7 changes: 7 additions & 0 deletions squeeze/tests/test_umap_repeated_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import numpy as np
import pytest

from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"Repeated-data tests target removed Python UMAP backend",
allow_module_level=True,
)

# ===================================================
# Spatial Data Test cases
# ===================================================
Expand Down
34 changes: 34 additions & 0 deletions squeeze/tests/test_umap_rust.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import numpy as np
import pytest

try:
from squeeze._hnsw_backend import UMAPRust
except Exception: # pragma: no cover
UMAPRust = None


pytestmark = pytest.mark.skipif(UMAPRust is None, reason="Rust backend not available")


def test_umap_rust_fit_transform_runs(iris) -> None:
reducer = UMAPRust(n_components=2, n_neighbors=15, random_state=42)
emb = reducer.fit_transform(iris.data)

assert emb.shape == (iris.data.shape[0], 2)
assert np.isfinite(emb).all()

emb2 = reducer.embedding_
assert emb2.shape == emb.shape


def test_umap_rust_basic_trustworthiness(iris) -> None:
try:
from sklearn.manifold import trustworthiness
except Exception: # pragma: no cover
pytest.skip("scikit-learn trustworthiness not available")

reducer = UMAPRust(n_components=2, n_neighbors=15, random_state=42)
emb = reducer.fit_transform(iris.data)

trust = trustworthiness(iris.data, emb, n_neighbors=10)
assert trust >= 0.75
7 changes: 7 additions & 0 deletions squeeze/tests/test_umap_trustworthiness.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
import numpy as np
import pytest
import scipy.sparse
from sklearn.datasets import make_blobs
from sklearn.metrics import pairwise_distances

from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"UMAP trustworthiness tests target removed Python backend",
allow_module_level=True,
)

try:
# works for sklearn>=0.22
from sklearn.manifold import trustworthiness
Expand Down
6 changes: 6 additions & 0 deletions squeeze/tests/test_umap_validation_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# verify that we can import this; potentially for later use
from squeeze import UMAP

if getattr(UMAP, "_BACKEND", "") == "rust":
pytest.skip(
"UMAP parameter-validation tests target removed Python backend",
allow_module_level=True,
)

warnings.filterwarnings("ignore", category=UserWarning)


Expand Down
Loading
Loading