Skip to content

Commit

Permalink
Merge pull request bsc-wdc#51 from bsc-wdc/issue-20
Browse files Browse the repository at this point in the history
Failure of 'csvm-driver' example.
  • Loading branch information
javicid authored Nov 30, 2018
2 parents 118d93f + c9b8dd6 commit 3962b7c
Showing 1 changed file with 30 additions and 37 deletions.
67 changes: 30 additions & 37 deletions examples/csvm-driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from sklearn.datasets import load_svmlight_file

from dislib.data import load_file, load_files
from dislib.data import *
from dislib.classification import CascadeSVM

from pycompss.api.api import barrier
Expand All @@ -20,30 +20,30 @@ def main():
parser.add_argument("-dt", "--detailed_times",
help="get detailed execution times (read and fit)",
action="store_true")
parser.add_argument("-k", metavar="KERNEL", type=str,
parser.add_argument("-k", "--kernel", metavar="KERNEL", type=str,
help="linear or rbf (default is rbf)",
choices=["linear", "rbf"], default="rbf")
parser.add_argument("-a", metavar="CASCADE_ARITY", type=int,
parser.add_argument("-a", "--arity", metavar="CASCADE_ARITY", type=int,
help="default is 2", default=2)
parser.add_argument("-p", metavar="PART_SIZE", type=int,
parser.add_argument("-p", "--part_size", metavar="PART_SIZE", type=int,
help="size of the partitions in which to divide the "
"input dataset")
parser.add_argument("-i", metavar="MAX_ITERATIONS", type=int,
parser.add_argument("-i", "--iteration", metavar="MAX_ITERATIONS", type=int,
help="default is 5", default=5)
parser.add_argument("-g", metavar="GAMMA", type=float,
parser.add_argument("-g", "--gamma", metavar="GAMMA", type=float,
help="(only for rbf kernel) default is 1 / n_features",
default=None)
parser.add_argument("-c", metavar="C", type=float, help="default is 1",
default=1)
parser.add_argument("-f", metavar="N_FEATURES", type=int,
parser.add_argument("-c", metavar="C", type=float, default=1,
help="Penalty parameter C of the error term. Default:1")
parser.add_argument("-f", "--features", metavar="N_FEATURES", type=int,
help="mandatory if --libsvm option is used and "
"train_data is a directory (optional otherwise)",
default=None)
parser.add_argument("-t", metavar="TEST_FILE_PATH",
parser.add_argument("-t", "--test-file", metavar="TEST_FILE_PATH",
help="test CSV file path", type=str, required=False)
parser.add_argument("-o", metavar="OUTPUT_FILE_PATH",
parser.add_argument("-o", "--output_file", metavar="OUTPUT_FILE_PATH",
help="output file path", type=str, required=False)
parser.add_argument("-nd", metavar="N_DATASETS", type=int,
parser.add_argument("-nd", "--n_datasets", metavar="N_DATASETS", type=int,
help="number of times to load the dataset", default=1)
parser.add_argument("--convergence", help="check for convergence",
action="store_true")
Expand All @@ -58,61 +58,54 @@ def main():

train_data = args.train_data

if not args.g:
if not args.gamma:
gamma = "auto"
else:
gamma = args.g

if args.libsvm:
fmt = "libsvm"
else:
fmt = "labeled"
gamma = args.gamma

data = []

s_time = time.time()

if os.path.isdir(train_data):
for _ in range(args.nd):
data.append(load_files(path=train_data, fmt=fmt,
n_features=args.f, use_array=args.dense))
_loader_func = load_libsvm_files if args.libsvm else load_csv_files
for _ in range(args.n_datasets):
data.append(_loader_func(train_data, args.features))
else:
for _ in range(args.nd):
data.append(load_file(path=train_data, part_size=args.p,
fmt=fmt, n_features=args.f,
use_array=args.dense))
_loader_func = load_libsvm_file if args.libsvm else load_csv_file
for _ in range(args.n_datasets):
data.append(_loader_func(train_data, args.part_size, args.features))

if args.dt:
if args.detailed_times:
barrier()


csvm = CascadeSVM(cascade_arity=args.a, max_iter=args.i, c=args.c,
gamma=gamma, check_convergence=args.convergence)
csvm = CascadeSVM(cascade_arity=args.arity, max_iter=args.iteration,
c=args.c, gamma=gamma, check_convergence=args.convergence)

for d in data:
csvm.fit(d)

out = [args.k, args.a, args.p, csvm._clf_params["gamma"], args.c,
csvm.iterations, csvm.converged]
out = [args.kernel, args.arity, args.part_size, csvm._clf_params["gamma"],
args.c, csvm.iterations, csvm.converged]

if os.path.isdir(train_data):
n_files = os.listdir(train_data)
out.append(len(n_files))

if args.t:
if args.test_file:
if args.libsvm:
testx, testy = load_svmlight_file(args.t, args.f)
testx, testy = load_svmlight_file(args.test_file, args.features)

if args.dense:
testx = testx.toarray()

out.append(csvm.score(testx, testy))
else:
test = np.loadtxt(args.t, delimiter=",", dtype=float)
test = np.loadtxt(args.test_file, delimiter=",", dtype=float)
out.append(csvm.score(test[:, :-1], test[:, -1]))

if args.o:
with open(args.o, "ab") as f:
if args.output_file:
with open(args.output_file, "ab") as f:
wr = csv.writer(f)
wr.writerow(out)
else:
Expand Down

0 comments on commit 3962b7c

Please sign in to comment.