Skip to content

Commit de20114

Browse files
committed
Merge remote-tracking branch 'refs/remotes/origin/Book'
2 parents e0d3052 + 5fb3ec3 commit de20114

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+17926
-1384
lines changed

Util/Bases.py

-1
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,6 @@ def __init__(self, **kwargs):
682682
self._y_pred_raw = self._y_pred = None
683683
self._sess = tf.Session()
684684

685-
686685
@clf_timing.timeit(level=2, prefix="[Core] ")
687686
def _batch_training(self, x, y, batch_size, train_repeat, *args):
688687
loss, train_step, *args = args

Util/Util.py

+45
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,51 @@ def gen_noisy_linear(size=10000, n_dim=100, n_valid=5, noise_scale=0.5, test_rat
233233
return (x_train_noise, y_train), (x_test, y_test)
234234
return (x_train_noise, DataUtil.get_one_hot(y_train, 2)), (x_test, DataUtil.get_one_hot(y_test, 2))
235235

236+
@staticmethod
237+
def gen_noisy_poly(size=10000, p=3, n_dim=100, n_valid=5, noise_scale=0.5, test_ratio=0.15, one_hot=True):
238+
p = int(p)
239+
assert p > 1, "p should be greater than 1"
240+
x_train = np.random.randn(size, n_dim)
241+
x_train_list = [x_train] + [x_train ** i for i in range(2, p+1)]
242+
x_train_noise = x_train + np.random.randn(size, n_dim) * noise_scale
243+
x_test = np.random.randn(int(size * test_ratio), n_dim)
244+
x_test_list = [x_test] + [x_test ** i for i in range(2, p+1)]
245+
idx_list = [np.random.permutation(n_dim)[:n_valid] for _ in range(p)]
246+
w_list = [np.random.randn(n_valid, 1) for _ in range(p)]
247+
o_train = [x[..., idx].dot(w) for x, idx, w in zip(x_train_list, idx_list, w_list)]
248+
o_test = [x[..., idx].dot(w) for x, idx, w in zip(x_test_list, idx_list, w_list)]
249+
y_train = (np.sum(o_train, axis=0) > 0).astype(np.int8).ravel()
250+
y_test = (np.sum(o_test, axis=0) > 0).astype(np.int8).ravel()
251+
if not one_hot:
252+
return (x_train_noise, y_train), (x_test, y_test)
253+
return (x_train_noise, DataUtil.get_one_hot(y_train, 2)), (x_test, DataUtil.get_one_hot(y_test, 2))
254+
255+
@staticmethod
256+
def gen_special_linear(size=10000, n_dim=10, n_redundant=3, n_categorical=3,
257+
cv_ratio=0.15, test_ratio=0.15, one_hot=True):
258+
x_train = np.random.randn(size, n_dim)
259+
x_train_redundant = np.ones([size, n_redundant]) * np.random.randint(0, 3, n_redundant)
260+
x_train_categorical = np.random.randint(3, 8, [size, n_categorical])
261+
x_train_stacked = np.hstack([x_train, x_train_redundant, x_train_categorical])
262+
n_test = int(size * test_ratio)
263+
x_test = np.random.randn(n_test, n_dim)
264+
x_test_redundant = np.ones([n_test, n_redundant]) * np.random.randint(3, 6, n_redundant)
265+
x_test_categorical = np.random.randint(0, 5, [n_test, n_categorical])
266+
x_test_stacked = np.hstack([x_test, x_test_redundant, x_test_categorical])
267+
w = np.random.randn(n_dim, 1)
268+
y_train = (x_train.dot(w) > 0).astype(np.int8).ravel()
269+
y_test = (x_test.dot(w) > 0).astype(np.int8).ravel()
270+
n_cv = int(size * cv_ratio)
271+
x_train_stacked, x_cv_stacked = x_train_stacked[:-n_cv], x_train_stacked[-n_cv:]
272+
y_train, y_cv = y_train[:-n_cv], y_train[-n_cv:]
273+
if not one_hot:
274+
return (x_train_stacked, y_train), (x_cv_stacked, y_cv), (x_test_stacked, y_test)
275+
return (
276+
(x_train_stacked, DataUtil.get_one_hot(y_train, 2)),
277+
(x_cv_stacked, DataUtil.get_one_hot(y_cv, 2)),
278+
(x_test_stacked, DataUtil.get_one_hot(y_test, 2))
279+
)
280+
236281
@staticmethod
237282
def quantize_data(x, y, wc=None, continuous_rate=0.1, separate=False):
238283
if isinstance(x, list):

0 commit comments

Comments
 (0)