Skip to content

Creating branch for large scale measurements #158

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions configs/spmd/large_scale/basic_stats.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd basicstats parameters": {
"algorithm": {
"estimator": "BasicStatistics",
"estimator_methods": { "training": "fit" }
},
"data": {
"split_kwargs": { "test_size": 0.0001 }
}
},
"synthetic data": {
"data": [
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
]
}
},
"TEMPLATES": {
"basicstats": {
"SETS": [
"sklearnex spmd implementation",
"large scale 2k parameters",
"synthetic data",
"spmd basicstats parameters"
]
}
}
}
29 changes: 29 additions & 0 deletions configs/spmd/large_scale/basic_stats_strong.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd basicstats parameters": {
"algorithm": {
"estimator": "BasicStatistics",
"estimator_methods": { "training": "fit" }
},
"data": {
"split_kwargs": { "test_size": 0.0001 }
}
},
"synthetic data": {
"data": [
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000, "n_features": 100, "centers": 1 } }
]
}
},
"TEMPLATES": {
"basicstats": {
"SETS": [
"sklearnex spmd implementation",
"large scale strong 2k parameters",
"synthetic data",
"spmd basicstats parameters"
]
}
}
}
30 changes: 30 additions & 0 deletions configs/spmd/large_scale/covariance.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd basicstats parameters": {
"algorithm": {
"estimator": "EmpiricalCovariance",
"estimator_methods": { "training": "fit" }
},
"data": {
"split_kwargs": { "test_size": 0.0001 }
}
},
"synthetic data": {
"data": [
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
]
}
},
"TEMPLATES": {
"covariance": {
"SETS": [
"sklearnex spmd implementation",
"large scale 2k parameters",
"synthetic data",
"spmd basicstats parameters"
]
}
}
}
29 changes: 29 additions & 0 deletions configs/spmd/large_scale/covariance_strong.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd basicstats parameters": {
"algorithm": {
"estimator": "EmpiricalCovariance",
"estimator_methods": { "training": "fit" }
},
"data": {
"split_kwargs": { "test_size": 0.0001 }
}
},
"synthetic data": {
"data": [
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000, "n_features": 100, "centers": 1 } }
]
}
},
"TEMPLATES": {
"covariance": {
"SETS": [
"sklearnex spmd implementation",
"large scale strong 2k parameters",
"synthetic data",
"spmd basicstats parameters"
]
}
}
}
32 changes: 32 additions & 0 deletions configs/spmd/large_scale/dbscan.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd dbscan parameters": {
"algorithm": {
"estimator": "DBSCAN",
"estimator_methods": {
"training": "fit"
}
},
"data": {
"dtype": "float64"
}
},
"synthetic dataset": {
"data": [
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 10, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
]
}
},
"TEMPLATES": {
"dbscan": {
"SETS": [
"common dbscan parameters",
"synthetic dataset",
"sklearnex spmd implementation",
"large scale default parameters",
"spmd dbscan parameters"
]
}
}
}
26 changes: 26 additions & 0 deletions configs/spmd/large_scale/forest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd forest classification parameters": {
"algorithm": {
"estimator": "RandomForestClassifier"
}
},
"synthetic data": {
"data": [
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 10, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } },
{ "source": "make_classification", "split_kwargs": { "train_size": 10000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 11000, "n_features": 1000, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
]
}
},
"TEMPLATES": {
"basicstats": {
"SETS": [
"sklearnex spmd implementation",
"large scale 2k parameters",
"synthetic data",
"spmd forest classification parameters"
]
}
}
}
25 changes: 25 additions & 0 deletions configs/spmd/large_scale/forest_strong.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd forest classification parameters": {
"algorithm": {
"estimator": "RandomForestClassifier"
}
},
"synthetic data": {
"data": [
{ "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
]
}
},
"TEMPLATES": {
"basicstats": {
"SETS": [
"sklearnex spmd implementation",
"large scale strong 2k parameters",
"synthetic data",
"spmd forest classification parameters"
]
}
}
}
31 changes: 31 additions & 0 deletions configs/spmd/large_scale/kmeans.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../regular/kmeans.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd kmeans parameters": {
"algorithm": {
"estimator": "KMeans",
"estimator_params": {
"algorithm": "lloyd"
},
"estimator_methods": { "training": "fit", "inference": "predict" }
}
},
"synthetic data": {
"data": [
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 5000000, "n_features": 10, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } },
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 30000, "n_features": 1000, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } },
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 100, "centers": 100 }, "algorithm": { "n_clusters": 100, "max_iter": 100 } }
]
}
},
"TEMPLATES": {
"kmeans": {
"SETS": [
"synthetic data",
"sklearnex spmd implementation",
"large scale 2k parameters",
"spmd kmeans parameters"
]
}
}
}
40 changes: 40 additions & 0 deletions configs/spmd/large_scale/knn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../regular/knn.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd knn cls parameters": {
"algorithm": {
"estimator": "KNeighborsClassifier",
"estimator_params": {
"algorithm": "brute",
"metric": "minkowski",
"p": 2,
"weights": "uniform",
"n_neighbors": 5
},
"estimator_methods": {
"training": "fit",
"inference": "predict"
}
}
},
"synthetic classification data": {
"data": [
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 50000 }, "generation_kwargs": { "n_samples": 550000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
{ "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 500000 }, "generation_kwargs": { "n_samples": 550000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
{ "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 50000 }, "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
{ "source": "make_classification", "split_kwargs": { "train_size": 200000, "test_size": 200000 }, "generation_kwargs": { "n_samples": 400000, "n_features": 10, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }
]
}
},
"TEMPLATES": {
"knn classifier": {
"SETS": [
"common knn parameters",
"synthetic classification data",
"sklearnex spmd implementation",
"large scale 2k parameters",
"spmd knn cls parameters"
]
}
}
}
49 changes: 49 additions & 0 deletions configs/spmd/large_scale/large_scale.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"PARAMETERS_SETS": {
"large scale default parameters": {
"data": {
"dtype": "float64",
"distributed_split": "None"
},
"bench": {
"mpi_params": {"n": [1,2,6,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
}
},
"large scale strong parameters": {
"data": {
"dtype": "float64",
"distributed_split": "rank_based"
},
"bench": {
"mpi_params": {"n": [1,2,6,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
}
},
"large scale 2k parameters": {
"data": {
"dtype": "float64",
"distributed_split": "None"
},
"bench": {
"mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
}
},
"large scale strong 2k parameters": {
"data": {
"dtype": "float64",
"distributed_split": "rank_based"
},
"bench": {
"mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
}
},
"large scale impi parameters": {
"data": {
"dtype": "float64",
"distributed_split": "no"
},
"bench": {
"mpi_params": {"n": [1,2,4,6,9,12], "ppn": 12}
}
}
}
}
27 changes: 27 additions & 0 deletions configs/spmd/large_scale/linear_model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../regular/linear_model.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd linear parameters": {
"algorithm": {
"estimator": "LinearRegression",
"estimator_methods": { "training": "fit" }
}
},
"synthetic data": {
"data": [
{ "source": "make_regression", "generation_kwargs": { "n_samples": 30005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 30000000, "test_size": 5000 } },
{ "source": "make_regression", "generation_kwargs": { "n_samples": 305000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 300000, "test_size": 5000 } }
]
}
},
"TEMPLATES": {
"linreg": {
"SETS": [
"sklearnex spmd implementation",
"large scale 2k parameters",
"synthetic data",
"spmd linear parameters"
]
}
}
}
26 changes: 26 additions & 0 deletions configs/spmd/large_scale/linear_model_strong.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"INCLUDE": ["../../common/sklearn.json", "../../regular/linear_model.json", "large_scale.json"],
"PARAMETERS_SETS": {
"spmd linear parameters": {
"algorithm": {
"estimator": "LinearRegression",
"estimator_methods": { "training": "fit" }
}
},
"synthetic data": {
"data": [
{ "source": "make_regression", "generation_kwargs": { "n_samples": 25005000, "n_features": 100, "noise": 1.25 }, "split_kwargs": { "train_size": 25000000, "test_size": 5000 } }
]
}
},
"TEMPLATES": {
"linreg": {
"SETS": [
"sklearnex spmd implementation",
"large scale strong 2k parameters",
"synthetic data",
"spmd linear parameters"
]
}
}
}
Loading
Loading