Skip to content

Implement "target time" for benchmarks #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Nov 22, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use tol=0. for kmeans everywhere
Using tol=0 means same number of iterations everywhere, allowing
apples-to-apples timing.
  • Loading branch information
bibikar committed Nov 22, 2019
commit cf63fe227b2cb41718c5d7a6cbac4d4c67c3e07a
6 changes: 2 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ ARGS_NATIVE_pca_full = --num-threads "$(NUM_THREADS)" --header \
ARGS_NATIVE_kmeans = --num-threads "$(NUM_THREADS)" --header \
--data-multiplier "$(MULTIPLIER)" \
--filex data/kmeans_$(KMEANS_SIZE).npy \
--filei data/kmeans_$(KMEANS_SIZE).init.npy \
--filet data/kmeans_$(KMEANS_SIZE).tol.npy
--filei data/kmeans_$(KMEANS_SIZE).init.npy
ARGS_NATIVE_svm2 = --fileX data/two/X-$(SVM_SIZE).npy \
--fileY data/two/y-$(SVM_SIZE).npy \
--num-threads $(SVM_NUM_THREADS) --header
Expand Down Expand Up @@ -159,8 +158,7 @@ ARGS_DAAL4PY_pca_daal = --size "$(REGRESSION_SIZE)" --svd-solver daal
ARGS_DAAL4PY_pca_full = --size "$(REGRESSION_SIZE)" --svd-solver full
ARGS_DAAL4PY_kmeans = --data-multiplier "$(MULTIPLIER)" \
--filex data/kmeans_$(KMEANS_SIZE).npy \
--filei data/kmeans_$(KMEANS_SIZE).init.npy \
--filet data/kmeans_$(KMEANS_SIZE).tol.npy
--filei data/kmeans_$(KMEANS_SIZE).init.npy
ARGS_DAAL4PY_svm2 = --fileX data/two/X-$(SVM_SIZE).npy \
--fileY data/two/y-$(SVM_SIZE).npy
ARGS_DAAL4PY_svm5 = --fileX data/multi/X-$(SVM_SIZE).npy \
Expand Down
7 changes: 3 additions & 4 deletions daal4py/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
type=str, help='Points to cluster')
parser.add_argument('-i', '--filei', '--fileI', '--init', required=True,
type=str, help='Initial clusters')
parser.add_argument('-t', '--filet', '--fileT', '--tol', required=True,
type=str, help='Absolute threshold')
parser.add_argument('-t', '--tol', default=0., type=float,
help='Absolute threshold')
parser.add_argument('-m', '--data-multiplier', default=100,
type=int, help='Data multiplier')
parser.add_argument('--maxiter', type=int, default=100,
Expand All @@ -26,7 +26,6 @@
X = np.load(params.filex)
X_init = np.load(params.filei)
X_mult = np.vstack((X,) * params.data_multiplier)
tol = np.load(params.filet)

params.size = size_str(X.shape)
params.n_clusters = X_init.shape[0]
Expand All @@ -40,7 +39,7 @@ def test_fit(X, X_init):
nClusters=params.n_clusters,
maxIterations=params.maxiter,
assignFlag=True,
accuracyThreshold=tol
accuracyThreshold=params.tol
)
return algorithm.compute(X, X_init)

Expand Down
16 changes: 5 additions & 11 deletions native/kmeans_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,16 @@ int main(int argc, char *argv[]) {
struct timing_options predict_opts = {10, 100, 10., 10};
add_timing_args(app, "predict", predict_opts);

std::string filex, filei, filet;
std::string filex, filei;
app.add_option("-x,--filex,--fileX", filex,
"Feature file name")
->required()->check(CLI::ExistingFile);
app.add_option("-i,--filei,--fileI", filei,
"Initial cluster centers file name")
->required()->check(CLI::ExistingFile);
app.add_option("-t,--filet,--fileT", filet,
"Absolute threshold file name")

double tol = 0.;
app.add_option("-t,--tol", tol, "Absolute threshold")
->required()->check(CLI::ExistingFile);

int data_multiplier = 100;
Expand All @@ -115,8 +116,7 @@ int main(int argc, char *argv[]) {
// Load data
struct npyarr *arrX = load_npy(filex.c_str());
struct npyarr *arrX_init = load_npy(filei.c_str());
struct npyarr *arrX_tol = load_npy(filet.c_str());
if (!arrX || !arrX_init || !arrX_tol) {
if (!arrX || !arrX_init) {
std::cerr << "Failed to load input arrays" << std::endl;
return EXIT_FAILURE;
}
Expand All @@ -130,12 +130,6 @@ int main(int argc, char *argv[]) {
<< arrX_init->shape_len << std::endl;
return EXIT_FAILURE;
}
if (arrX_tol->shape_len != 0) {
std::cerr << "Expected 0 dimensions for X_tol, found "
<< arrX_tol->shape_len << std::endl;
return EXIT_FAILURE;
}
double tol = ((double *) arrX_tol->data)[0];

// Infer data size from loaded arrays
std::ostringstream stringSizeStream;
Expand Down
10 changes: 5 additions & 5 deletions sklearn/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
from sklearn.cluster import KMeans

parser = argparse.ArgumentParser(description='scikit-learn K-means benchmark')
parser.add_argument('-x', '--filex', '--fileX', '--input',
parser.add_argument('-x', '--filex', '--fileX', '--input', required=True,
type=str, help='Points to cluster')
parser.add_argument('-i', '--filei', '--fileI', '--init',
parser.add_argument('-i', '--filei', '--fileI', '--init', required=True,
type=str, help='Initial clusters')
# parser.add_argument('-t', '--filet', '--fileT', '--tol',
# type=str, help='Absolute threshold')
parser.add_argument('-t', '--tol', type=float, default=0.,
help='Absolute threshold')
parser.add_argument('-m', '--data-multiplier', default=100,
type=int, help='Data multiplier')
parser.add_argument('--maxiter', type=int, default=100,
Expand All @@ -28,7 +28,7 @@
n_clusters = X_init.shape[0]

# Create our clustering object
kmeans = KMeans(n_clusters=n_clusters, n_jobs=params.n_jobs, tol=1e-16,
kmeans = KMeans(n_clusters=n_clusters, n_jobs=params.n_jobs, tol=params.tol,
max_iter=params.maxiter, n_init=1, init=X_init)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
Expand Down