Open
Description
I tried to compare the results between CPU UMAP and GPU UMAP with fashion mnist dataset, it seems the CPU implementation is more accuracy from a visualization point. The comparison is made between branch-0.20 of cuml and 0.5.1 of CPU UMAP, both are run with seed:
CPU | GPU |
---|---|
![]() |
![]() |
Sample code:
import os
import gzip
import numpy as np
import cuml
from bokeh.plotting import figure, output_file, show
from bokeh.models import CategoricalColorMapper, ColumnDataSource
from bokeh.palettes import Category10
import umap
def load_mnist(path, kind='train'):
"""Load MNIST data from `path`"""
labels_path = os.path.join(path,
'%s-labels-idx1-ubyte.gz'
% kind)
images_path = os.path.join(path,
'%s-images-idx3-ubyte.gz'
% kind)
with gzip.open(labels_path, 'rb') as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
offset=8)
with gzip.open(images_path, 'rb') as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8,
offset=16).reshape(len(labels), 784)
return images, labels
X, y = load_mnist("fashion-mnist/data/fashion")
n_epochs = None
model = cuml.manifold.UMAP(random_state=1994)
# Use CPU or GPU
# model = umap.UMAP(random_state=1994)
embedding = model.fit_transform(X)
output_file("fashion.html")
def plot_fit():
targets = [str(d) for d in range(10)]
source = ColumnDataSource(
dict(
x=[e[0] for e in embedding],
y=[e[1] for e in embedding],
label=[targets[d] for d in y],
)
)
cmap = CategoricalColorMapper(factors=targets, palette=Category10[10])
p = figure(title="test umap")
p.circle(
x="x",
y="y",
source=source,
color={"field": "label", "transform": cmap},
legend="label",
)
show(p)
def plot_transform(model):
n = X.shape[0] // 2
transformed = model.transform(X[:n])
targets = [str(d) for d in range(10)]
labels = y[:n]
source = ColumnDataSource(
dict(
x=[e[0] for e in transformed],
y=[e[1] for e in transformed],
label=[targets[d] for d in labels],
)
)
cmap = CategoricalColorMapper(factors=targets, palette=Category10[10])
p = figure(title="test umap")
p.circle(
x="x",
y="y",
source=source,
color={"field": "label", "transform": cmap},
legend="label",
)
print("Show transformed")
show(p)
plot_fit()
plot_transform(model)
Activity