forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_GBMScore.py
60 lines (49 loc) · 2.64 KB
/
test_GBMScore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import unittest, random, sys, time
sys.path.extend(['.','..','py'])
import h2o, h2o_cmd, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_gbm, h2o_jobs as h2j, h2o_import
class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()
@classmethod
def setUpClass(cls):
global SEED, localhost
h2o.build_cloud(1,java_heap_GB=10)
#h2o_hosts.build_cloud_with_hosts()
@classmethod
def tearDownClass(cls):
h2o.tear_down_cloud()
def test_GBMScore(self):
h2o.beta_features = False
importFolderPath = 'standard'
csvTrainPath = importFolderPath + '/allyears2k.csv'
csvTestPath = csvTrainPath
# importFolderPath = 'newairlines'
# csvTrainPath = importFolderPath + '/train/*train*'
# csvTestPath = importFolderPath + '/train/*test*'
trainhex = 'train.hex'
testhex = 'test.hex'
parseTrainResult = h2i.import_parse(bucket='home-0xdiag-datasets', path = csvTrainPath, schema = 'local', hex_key = trainhex, timeoutSecs = 2400, doSummary = False)
parseTestResult = h2i.import_parse(bucket='home-0xdiag-datasets', path = csvTestPath, schema = 'local', hex_key = testhex, timeoutSecs = 2400, doSummary = False)
inspect_test = h2o.nodes[0].inspect(testhex, timeoutSecs=8000)
h2o.beta_features = True
response = 'IsDepDelayed'
ignored_cols = 'DepTime,ArrTime,FlightNum,TailNum,ActualElapsedTime,AirTime,ArrDelay,DepDelay,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed'
params = {'destination_key' : 'GBMScore',
'response' : response,
'ignored_cols_by_name' : ignored_cols,
'classification' : 1,
'validation' : None,
'ntrees' : 100,
'max_depth' : 10,
'learn_rate' : 0.00005,
}
parseResult = {'destination_key' : trainhex}
kwargs = params.copy()
gbm = h2o_cmd.runGBM(parseResult = parseResult, timeoutSecs=4800, **kwargs)
scoreStart = time.time()
h2o.nodes[0].generate_predictions(model_key = 'GBMScore', data_key = trainhex)
scoreElapsed = time.time() - scoreStart
print "It took ", scoreElapsed, " seconds to score ", inspect_test['num_rows'], " rows. Using a GBM with 100 10-deep trees."
print "That's ", 1.0*scoreElapsed / 100.0 ," seconds per 10-deep tree."
if __name__ == '__main__':
h2o.unit_main()