Zob-scene
diff --git a/‎Util/Bases.py
-1 b/‎Util/Bases.py
-1
diff --git a/‎Util/Util.py
+45 b/‎Util/Util.py
+45
@@ -682,7 +682,6 @@ def __init__(self, **kwargs):
         self._y_pred_raw = self._y_pred = None
         self._sess = tf.Session()
 
-
     @clf_timing.timeit(level=2, prefix="[Core] ")
     def _batch_training(self, x, y, batch_size, train_repeat, *args):
         loss, train_step, *args = args
 
@@ -233,6 +233,51 @@ def gen_noisy_linear(size=10000, n_dim=100, n_valid=5, noise_scale=0.5, test_rat
             return (x_train_noise, y_train), (x_test, y_test)
         return (x_train_noise, DataUtil.get_one_hot(y_train, 2)), (x_test, DataUtil.get_one_hot(y_test, 2))
 
+    @staticmethod
+    def gen_noisy_poly(size=10000, p=3, n_dim=100, n_valid=5, noise_scale=0.5, test_ratio=0.15, one_hot=True):
+        p = int(p)
+        assert p > 1, "p should be greater than 1"
+        x_train = np.random.randn(size, n_dim)
+        x_train_list = [x_train] + [x_train ** i for i in range(2, p+1)]
+        x_train_noise = x_train + np.random.randn(size, n_dim) * noise_scale
+        x_test = np.random.randn(int(size * test_ratio), n_dim)
+        x_test_list = [x_test] + [x_test ** i for i in range(2, p+1)]
+        idx_list = [np.random.permutation(n_dim)[:n_valid] for _ in range(p)]
+        w_list = [np.random.randn(n_valid, 1) for _ in range(p)]
+        o_train = [x[..., idx].dot(w) for x, idx, w in zip(x_train_list, idx_list, w_list)]
+        o_test = [x[..., idx].dot(w) for x, idx, w in zip(x_test_list, idx_list, w_list)]
+        y_train = (np.sum(o_train, axis=0) > 0).astype(np.int8).ravel()
+        y_test = (np.sum(o_test, axis=0) > 0).astype(np.int8).ravel()
+        if not one_hot:
+            return (x_train_noise, y_train), (x_test, y_test)
+        return (x_train_noise, DataUtil.get_one_hot(y_train, 2)), (x_test, DataUtil.get_one_hot(y_test, 2))
+
+    @staticmethod
+    def gen_special_linear(size=10000, n_dim=10, n_redundant=3, n_categorical=3,
+                           cv_ratio=0.15, test_ratio=0.15, one_hot=True):
+        x_train = np.random.randn(size, n_dim)
+        x_train_redundant = np.ones([size, n_redundant]) * np.random.randint(0, 3, n_redundant)
+        x_train_categorical = np.random.randint(3, 8, [size, n_categorical])
+        x_train_stacked = np.hstack([x_train, x_train_redundant, x_train_categorical])
+        n_test = int(size * test_ratio)
+        x_test = np.random.randn(n_test, n_dim)
+        x_test_redundant = np.ones([n_test, n_redundant]) * np.random.randint(3, 6, n_redundant)
+        x_test_categorical = np.random.randint(0, 5, [n_test, n_categorical])
+        x_test_stacked = np.hstack([x_test, x_test_redundant, x_test_categorical])
+        w = np.random.randn(n_dim, 1)
+        y_train = (x_train.dot(w) > 0).astype(np.int8).ravel()
+        y_test = (x_test.dot(w) > 0).astype(np.int8).ravel()
+        n_cv = int(size * cv_ratio)
+        x_train_stacked, x_cv_stacked = x_train_stacked[:-n_cv], x_train_stacked[-n_cv:]
+        y_train, y_cv = y_train[:-n_cv], y_train[-n_cv:]
+        if not one_hot:
+            return (x_train_stacked, y_train), (x_cv_stacked, y_cv), (x_test_stacked, y_test)
+        return (
+            (x_train_stacked, DataUtil.get_one_hot(y_train, 2)),
+            (x_cv_stacked, DataUtil.get_one_hot(y_cv, 2)),
+            (x_test_stacked, DataUtil.get_one_hot(y_test, 2))
+        )
+
     @staticmethod
     def quantize_data(x, y, wc=None, continuous_rate=0.1, separate=False):
         if isinstance(x, list):