diff --git a/app/__init__.py b/app/__init__.py deleted file mode 100644 index d9a49f3..0000000 --- a/app/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["config", "controller", "dao", "model", "service", "utils"] \ No newline at end of file diff --git a/app/common/__init__.py b/app/common/__init__.py new file mode 100644 index 0000000..944f814 --- /dev/null +++ b/app/common/__init__.py @@ -0,0 +1 @@ +__all__ = ["common", "errorcode"] diff --git a/app/config/common.py b/app/common/common.py similarity index 62% rename from app/config/common.py rename to app/common/common.py index 4562ea6..6a14f8e 100644 --- a/app/config/common.py +++ b/app/common/common.py @@ -8,6 +8,10 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ +import traceback +from functools import wraps +from errorcode import * + DEFAULT_WINDOW = 180 INPUT_LEN_ENG_MAX = 32 INPUT_LEN_CH_MAX = 64 @@ -17,3 +21,20 @@ UPLOAD_FILE = '/tmp/tmpfile_%s.csv' MARK_POSITIVE = 1 MARK_NEGATIVE = 2 + + +def build_ret_data(ret_code, data=""): + return {"code": ret_code, "msg": ERR_CODE[ret_code], "data": data} + + +def exce_service(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + ret_code, ret_data = func(*args, **kwargs) + return_dict = build_ret_data(ret_code, ret_data) + except Exception as ex: + traceback.print_exc() + return_dict = build_ret_data(THROW_EXP, str(ex)) + return return_dict + return wrapper diff --git a/app/config/errorcode.py b/app/common/errorcode.py similarity index 99% rename from app/config/errorcode.py rename to app/common/errorcode.py index b1c312b..95939aa 100644 --- a/app/config/errorcode.py +++ b/app/common/errorcode.py @@ -32,4 +32,4 @@ READ_FEATURE_FAILED: "读取特征数据失败", TRAIN_ERR: "训练出错", LACK_SAMPLE: "缺少正样本或负样本" -} \ No newline at end of file +} diff --git a/app/config/__init__.py b/app/config/__init__.py deleted file mode 100644 index 65aa12f..0000000 --- a/app/config/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["database", "common", "errorcode"] \ No newline at end of file diff --git a/app/controller/api/views.py b/app/controller/api/views.py index 0e4aee4..e78d21e 100644 --- a/app/controller/api/views.py +++ b/app/controller/api/views.py @@ -1,16 +1,16 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import json +from functools import wraps from django.shortcuts import render from django.http import FileResponse -from common.render import render_json -from functools import wraps +from render import render_json from app.service.time_series_detector.anomaly_service import * from app.service.time_series_detector.sample_service import * from app.service.time_series_detector.task_service import * from app.service.time_series_detector.detect_service import * -from app.config.errorcode import * -from app.utils.utils import * +from app.common.errorcode import * +from app.common.common import * def check_post(func): diff --git a/app/controller/common/__init__.py b/app/controller/common/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/app/controller/common/render.py b/app/controller/render.py similarity index 100% rename from app/controller/common/render.py rename to app/controller/render.py diff --git a/app/dao/__init__.py b/app/dao/__init__.py index 0850b63..cd4665e 100644 --- a/app/dao/__init__.py +++ b/app/dao/__init__.py @@ -1 +1 @@ -__all__ = ["time_series_detector"] \ No newline at end of file +__all__ = ["db_common", "time_series_detector"] diff --git a/app/dao/db_common/__init__.py b/app/dao/db_common/__init__.py new file mode 100644 index 0000000..4511830 --- /dev/null +++ b/app/dao/db_common/__init__.py @@ -0,0 +1 @@ +__all__ = ["database"] diff --git a/app/config/database.py b/app/dao/db_common/database.py similarity index 100% rename from app/config/database.py rename to app/dao/db_common/database.py diff --git a/app/dao/time_series_detector/__init__.py b/app/dao/time_series_detector/__init__.py index a86c554..4b655b3 100644 --- a/app/dao/time_series_detector/__init__.py +++ b/app/dao/time_series_detector/__init__.py @@ -1 +1 @@ -__all__ = ["anomaly_op", "sample_op", "train_op"] \ No newline at end of file +__all__ = ["anomaly_op", "sample_op", "train_op"] diff --git a/app/dao/time_series_detector/anomaly_op.py b/app/dao/time_series_detector/anomaly_op.py index 1facf0c..d048005 100644 --- a/app/dao/time_series_detector/anomaly_op.py +++ b/app/dao/time_series_detector/anomaly_op.py @@ -8,13 +8,11 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ -import time -import datetime import MySQLdb -from app.config import database +from app.dao.db_common import database from app.dao.time_series_detector.sample_op import * -from app.config.common import * -from app.config.errorcode import * +from app.common.common import * +from app.common.errorcode import * class AbnormalOperation(object): diff --git a/app/dao/time_series_detector/sample_op.py b/app/dao/time_series_detector/sample_op.py index ad9afd1..958d05e 100644 --- a/app/dao/time_series_detector/sample_op.py +++ b/app/dao/time_series_detector/sample_op.py @@ -13,9 +13,9 @@ import csv import codecs import MySQLdb -from app.config import database -from app.config.common import * -from app.config.errorcode import * +from app.dao.db_common import database +from app.common.common import * +from app.common.errorcode import * class SampleOperation(object): diff --git a/app/dao/time_series_detector/train_op.py b/app/dao/time_series_detector/train_op.py index 527adc1..75a0856 100644 --- a/app/dao/time_series_detector/train_op.py +++ b/app/dao/time_series_detector/train_op.py @@ -9,9 +9,9 @@ """ import MySQLdb -from app.config import database -from app.config.common import * -from app.config.errorcode import * +from app.dao.db_common import database +from app.common.common import * +from app.common.errorcode import * class TrainOperation(object): diff --git a/app/model/__init__.py b/app/model/__init__.py deleted file mode 100644 index 0850b63..0000000 --- a/app/model/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["time_series_detector"] \ No newline at end of file diff --git a/app/model/time_series_detector/__init__.py b/app/model/time_series_detector/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/app/service/__init__.py b/app/service/__init__.py index 6022528..6d39471 100644 --- a/app/service/__init__.py +++ b/app/service/__init__.py @@ -1 +1 @@ -__all__ = ["time_series_detector"] +__all__ = ["time_series_detector"] diff --git a/app/service/time_series_detector/__init__.py b/app/service/time_series_detector/__init__.py index 62d4366..f5cbe93 100644 --- a/app/service/time_series_detector/__init__.py +++ b/app/service/time_series_detector/__init__.py @@ -1 +1 @@ -__all__ = ["algorithm", "feature", "anomaly_service", "sample_service", "task_service", "detect_service"] +__all__ = ["anomaly_service", "sample_service", "task_service", "detect_service"] diff --git a/app/service/time_series_detector/anomaly_service.py b/app/service/time_series_detector/anomaly_service.py index 8e170fd..6af20d7 100644 --- a/app/service/time_series_detector/anomaly_service.py +++ b/app/service/time_series_detector/anomaly_service.py @@ -10,7 +10,6 @@ import json from app.dao.time_series_detector.anomaly_op import * -from app.utils.utils import * class AnomalyService(object): diff --git a/app/service/time_series_detector/detect_service.py b/app/service/time_series_detector/detect_service.py index 8bd840e..bc32f59 100644 --- a/app/service/time_series_detector/detect_service.py +++ b/app/service/time_series_detector/detect_service.py @@ -14,11 +14,12 @@ from app.dao.time_series_detector import anomaly_op from app.dao.time_series_detector import sample_op from app.dao.time_series_detector import train_op -from app.utils.utils import * -from app.service.time_series_detector.algorithm import isolation_forest, ewma, polynomial_interpolation, statistic, xgboosting -from app.config.errorcode import * -from app.config.common import * -MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../model/time_series_detector/') +from time_series_detector.algorithm import xgboosting +from time_series_detector import detect +from app.common.errorcode import * +from app.common.common import * +from time_series_detector.common.tsd_errorcode import * +MODEL_PATH = os.path.join(os.path.dirname(__file__), './model/') class DetectService(object): @@ -26,11 +27,7 @@ class DetectService(object): def __init__(self): self.sample_op_obj = sample_op.SampleOperation() self.anomaly_op_obj = anomaly_op.AbnormalOperation() - self.iforest_obj = isolation_forest.IForest() - self.ewma_obj = ewma.Ewma() - self.polynomial_obj = polynomial_interpolation.PolynomialInterpolation() - self.statistic_obj = statistic.Statistic() - self.supervised_obj = xgboosting.XGBoosting() + self.detect_obj = detect.Detect() def __generate_model(self, data, task_id): """ @@ -125,62 +122,14 @@ def __list_is_digit(self, data): def __check_param(self, data): if ("viewName" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()): return CHECK_PARAM_FAILED, "missing parameter" - if not data['dataA']: - return CHECK_PARAM_FAILED, "dataA can not be empty" - if not data['dataB']: - return CHECK_PARAM_FAILED, "dataB can not be empty" - if not data['dataC']: - return CHECK_PARAM_FAILED, "dataC can not be empty" - if not self.__list_is_digit(data['dataA'].split(',')): - return CHECK_PARAM_FAILED, "dataA contains illegal numbers" - if not self.__list_is_digit(data['dataB'].split(',')): - return CHECK_PARAM_FAILED, "dataB contains illegal numbers" - if not self.__list_is_digit(data['dataC'].split(',')): - return CHECK_PARAM_FAILED, "dataC contains illegal numbers" - if "window" in data: - window = data["window"] - else: - window = DEFAULT_WINDOW - if len(data['dataC'].split(',')) != (2 * window + 1): - return CHECK_PARAM_FAILED, "dataC length does not match" - if len(data['dataB'].split(',')) != (2 * window + 1): - return CHECK_PARAM_FAILED, "dataB length does not match" - if len(data['dataA'].split(',')) != (window + 1): - return CHECK_PARAM_FAILED, "dataA length does not match" return OP_SUCCESS, "" def value_predict(self, data): - """ - Predict the data - - :param data: the time series to detect of - """ ret_code, ret_data = self.__check_param(data) if ret_code != OP_SUCCESS: return build_ret_data(ret_code, ret_data) - if "taskId" in data and data["taskId"]: - model_name = MODEL_PATH + data["taskId"] + "_model" - else: - model_name = MODEL_PATH + "xgb_default_model" - combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"] - time_series = map(int, combined_data.split(',')) - if "window" in data: - window = data["window"] - else: - window = DEFAULT_WINDOW - statistic_result = self.statistic_obj.predict(time_series) - ewma_result = self.ewma_obj.predict(time_series) - polynomial_result = self.polynomial_obj.predict(time_series, window) - iforest_result = self.iforest_obj.predict(time_series, window) - if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 or iforest_result == 0: - xgb_result = self.supervised_obj.predict(time_series, window, model_name) - res_value = xgb_result[0] - prob = xgb_result[1] - else: - res_value = 1 - prob = 1 - ret_data = {"ret": res_value, "p": str(prob)} - if ret_data["ret"] == 0: + ret_code, ret_data = self.detect_obj.value_predict(data) + if ret_code == TSD_OP_SUCCESS and ret_data["ret"] == 0: anomaly_params = { "view_id": data["viewId"], "view_name": data["viewName"], @@ -192,18 +141,14 @@ def value_predict(self, data): "data_a": data["dataA"] } self.anomaly_op_obj.insert_anomaly(anomaly_params) - return build_ret_data(OP_SUCCESS, ret_data) + return build_ret_data(ret_code, ret_data) def rate_predict(self, data): - combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"] - time_series = map(float, combined_data.split(',')) - statistic_result = self.statistic_obj.predict(time_series) - if statistic_result == 0: - prob = 0 - else: - prob = 1 - ret_data = {"ret": statistic_result, "p": str(prob)} - if ret_data["ret"] == 0: + ret_code, ret_data = self.__check_param(data) + if ret_code != OP_SUCCESS: + return build_ret_data(ret_code, ret_data) + ret_data, ret_data = self.detect_obj.rate_predict(data) + if ret_code == TSD_OP_SUCCESS and ret_data["ret"] == 0: anomaly_params = { "view_id": data["viewId"], "view_name": data["viewName"], diff --git a/app/service/time_series_detector/sample_service.py b/app/service/time_series_detector/sample_service.py index e2f3a14..cf4a188 100644 --- a/app/service/time_series_detector/sample_service.py +++ b/app/service/time_series_detector/sample_service.py @@ -12,9 +12,8 @@ import traceback import csv from app.dao.time_series_detector.sample_op import * -from app.config.errorcode import * -from app.utils.utils import * -from app.config.common import * +from app.common.errorcode import * +from app.common.common import * class SampleService(object): diff --git a/app/service/time_series_detector/task_service.py b/app/service/time_series_detector/task_service.py index 73e1cc5..56ef07c 100644 --- a/app/service/time_series_detector/task_service.py +++ b/app/service/time_series_detector/task_service.py @@ -10,8 +10,8 @@ import json from app.dao.time_series_detector.train_op import * -from app.config.errorcode import * -from app.utils.utils import * +from app.common.errorcode import * +from app.common.common import * class TrainService(object): diff --git a/app/utils/__init__.py b/app/utils/__init__.py deleted file mode 100644 index 406d035..0000000 --- a/app/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["utils"] \ No newline at end of file diff --git a/docs/install.md b/docs/install.md index 7a2e7ac..7b0dcfc 100644 --- a/docs/install.md +++ b/docs/install.md @@ -137,7 +137,7 @@ python /data/Metis/app/controller/manage.py runserver {ip}:{port} 运行npm run build -将uweb目录下的custom文件夹下复制到uweb目录下生成的dist文件夹中 +将uweb目录下的custom文件夹复制到uweb目录下生成的dist文件夹中 将nginx配置文件中的root定位到uweb目录下的dist文件夹 diff --git a/app/sql/time_series_detector/anomaly.sql b/sql/time_series_detector/anomaly.sql similarity index 100% rename from app/sql/time_series_detector/anomaly.sql rename to sql/time_series_detector/anomaly.sql diff --git a/app/sql/time_series_detector/sample_dataset.sql b/sql/time_series_detector/sample_dataset.sql similarity index 100% rename from app/sql/time_series_detector/sample_dataset.sql rename to sql/time_series_detector/sample_dataset.sql diff --git a/app/sql/time_series_detector/train_task.sql b/sql/time_series_detector/train_task.sql similarity index 97% rename from app/sql/time_series_detector/train_task.sql rename to sql/time_series_detector/train_task.sql index 9d13feb..fc157de 100644 --- a/app/sql/time_series_detector/train_task.sql +++ b/sql/time_series_detector/train_task.sql @@ -1,25 +1,25 @@ -SET FOREIGN_KEY_CHECKS=0; --- ---------------------------- --- Table structure for `train_task` --- ---------------------------- -DROP TABLE IF EXISTS `train_task`; -CREATE TABLE `train_task` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `task_id` char(255) DEFAULT NULL, - `sample_num` int(11) DEFAULT NULL, - `postive_sample_num` int(11) DEFAULT NULL, - `negative_sample_num` int(11) DEFAULT NULL, - `window` int(2) DEFAULT NULL, - `model_name` varchar(20) DEFAULT NULL, - `source` varchar(255) DEFAULT NULL, - `start_time` timestamp NULL DEFAULT NULL, - `end_time` timestamp NULL DEFAULT NULL, - `status` varchar(11) DEFAULT NULL, - PRIMARY KEY (`id`), - KEY `id` (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; - --- ---------------------------- --- Records of train_task --- ---------------------------- -INSERT INTO `train_task` VALUES ('1', '1535790960079', '90675', '45228', '45447', '180', 'xgb_default_model', 'Metis', '2018-09-01 16:36:00', '2018-09-01 16:45:40', 'complete'); +SET FOREIGN_KEY_CHECKS=0; +-- ---------------------------- +-- Table structure for `train_task` +-- ---------------------------- +DROP TABLE IF EXISTS `train_task`; +CREATE TABLE `train_task` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `task_id` char(255) DEFAULT NULL, + `sample_num` int(11) DEFAULT NULL, + `postive_sample_num` int(11) DEFAULT NULL, + `negative_sample_num` int(11) DEFAULT NULL, + `window` int(2) DEFAULT NULL, + `model_name` varchar(20) DEFAULT NULL, + `source` varchar(255) DEFAULT NULL, + `start_time` timestamp NULL DEFAULT NULL, + `end_time` timestamp NULL DEFAULT NULL, + `status` varchar(11) DEFAULT NULL, + PRIMARY KEY (`id`), + KEY `id` (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; + +-- ---------------------------- +-- Records of train_task +-- ---------------------------- +INSERT INTO `train_task` VALUES ('1', '1535790960079', '90675', '45228', '45447', '180', 'xgb_default_model', 'Metis', '2018-09-01 16:36:00', '2018-09-01 16:45:40', 'complete'); diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..6c0a388 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1 @@ +__all__ = ["fixtures", "test_feature"] diff --git a/tests/test_feature.py b/tests/test_feature.py index c3fd583..9f6dd54 100644 --- a/tests/test_feature.py +++ b/tests/test_feature.py @@ -9,8 +9,7 @@ """ from tests.fixtures import DataTestCase -from app.service.time_series_detector.feature.statistical_features import * - +from time_series_detector.feature.statistical_features import * class FeatureTestCase(DataTestCase): diff --git a/time_series_detector/__init__.py b/time_series_detector/__init__.py new file mode 100644 index 0000000..1c2a0d7 --- /dev/null +++ b/time_series_detector/__init__.py @@ -0,0 +1 @@ +__all__ = ["algorithm", "feature", "common", "detect"] diff --git a/app/service/time_series_detector/algorithm/__init__.py b/time_series_detector/algorithm/__init__.py similarity index 100% rename from app/service/time_series_detector/algorithm/__init__.py rename to time_series_detector/algorithm/__init__.py diff --git a/app/service/time_series_detector/algorithm/ewma.py b/time_series_detector/algorithm/ewma.py similarity index 100% rename from app/service/time_series_detector/algorithm/ewma.py rename to time_series_detector/algorithm/ewma.py diff --git a/app/service/time_series_detector/algorithm/ewma_and_polynomial.py b/time_series_detector/algorithm/ewma_and_polynomial.py similarity index 92% rename from app/service/time_series_detector/algorithm/ewma_and_polynomial.py rename to time_series_detector/algorithm/ewma_and_polynomial.py index 1d35dbd..4e1f77a 100644 --- a/app/service/time_series_detector/algorithm/ewma_and_polynomial.py +++ b/time_series_detector/algorithm/ewma_and_polynomial.py @@ -8,9 +8,9 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ -from app.service.time_series_detector.algorithm import ewma -from app.service.time_series_detector.algorithm import polynomial_interpolation -from app.config.common import * +from time_series_detector.algorithm import ewma +from time_series_detector.algorithm import polynomial_interpolation +from time_series_detector.common.tsd_common import * class EwmaAndPolynomialInterpolation(object): diff --git a/app/service/time_series_detector/algorithm/gbdt.py b/time_series_detector/algorithm/gbdt.py similarity index 92% rename from app/service/time_series_detector/algorithm/gbdt.py rename to time_series_detector/algorithm/gbdt.py index 7675084..ac5daf3 100644 --- a/app/service/time_series_detector/algorithm/gbdt.py +++ b/time_series_detector/algorithm/gbdt.py @@ -13,13 +13,12 @@ import numpy as np from sklearn.ensemble import GradientBoostingClassifier from sklearn.externals import joblib -from app.service.time_series_detector.feature import feature_service -from app.utils.utils import * -from app.config.errorcode import * -from app.config.common import * +from time_series_detector.feature import feature_service +from time_series_detector.common.tsd_common import * +from time_series_detector.common.tsd_errorcode import * -MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../../model/time_series_detector/') +MODEL_PATH = os.path.join(os.path.dirname(__file__), '../model/') DEFAULT_MODEL = MODEL_PATH + "gbdt_default_model" @@ -73,7 +72,7 @@ def gbdt_train(self, data, task_id, window=DEFAULT_WINDOW): y_train = [] features = self.__calculate_features(data, window) if features: - return LACK_SAMPLE + return TSD_LACK_SAMPLE for index in features: X_train.append(index[0]) y_train.append(index[1]) @@ -85,8 +84,8 @@ def gbdt_train(self, data, task_id, window=DEFAULT_WINDOW): model_name = MODEL_PATH + task_id + "_model" joblib.dump(grd, model_name) except Exception as ex: - return TRAIN_ERR, str(ex) - return OP_SUCCESS, "" + return TSD_TRAIN_ERR, str(ex) + return TSD_OP_SUCCESS, "" def predict(self, X, window=DEFAULT_WINDOW, model_name=DEFAULT_MODEL): """ diff --git a/app/service/time_series_detector/algorithm/isolation_forest.py b/time_series_detector/algorithm/isolation_forest.py similarity index 98% rename from app/service/time_series_detector/algorithm/isolation_forest.py rename to time_series_detector/algorithm/isolation_forest.py index 9aff256..4ed4d04 100644 --- a/app/service/time_series_detector/algorithm/isolation_forest.py +++ b/time_series_detector/algorithm/isolation_forest.py @@ -9,7 +9,7 @@ """ from sklearn.ensemble import IsolationForest -from app.config.common import * +from time_series_detector.common.tsd_common import * class IForest(object): diff --git a/app/service/time_series_detector/algorithm/polynomial_interpolation.py b/time_series_detector/algorithm/polynomial_interpolation.py similarity index 97% rename from app/service/time_series_detector/algorithm/polynomial_interpolation.py rename to time_series_detector/algorithm/polynomial_interpolation.py index 5efdd3d..bf3ea69 100644 --- a/app/service/time_series_detector/algorithm/polynomial_interpolation.py +++ b/time_series_detector/algorithm/polynomial_interpolation.py @@ -12,7 +12,7 @@ from sklearn.linear_model import Ridge from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline -from app.config.common import * +from time_series_detector.common.tsd_common import * class PolynomialInterpolation(object): diff --git a/app/service/time_series_detector/algorithm/statistic.py b/time_series_detector/algorithm/statistic.py similarity index 100% rename from app/service/time_series_detector/algorithm/statistic.py rename to time_series_detector/algorithm/statistic.py diff --git a/app/service/time_series_detector/algorithm/xgboosting.py b/time_series_detector/algorithm/xgboosting.py similarity index 92% rename from app/service/time_series_detector/algorithm/xgboosting.py rename to time_series_detector/algorithm/xgboosting.py index 72f5a86..0a3e851 100644 --- a/app/service/time_series_detector/algorithm/xgboosting.py +++ b/time_series_detector/algorithm/xgboosting.py @@ -10,11 +10,10 @@ import os import xgboost as xgb -from app.service.time_series_detector.feature import feature_service -from app.utils.utils import * -from app.config.errorcode import * -from app.config.common import * -MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../../model/time_series_detector/') +from time_series_detector.feature import feature_service +from time_series_detector.common.tsd_errorcode import * +from time_series_detector.common.tsd_common import * +MODEL_PATH = os.path.join(os.path.dirname(__file__), '../model/') DEFAULT_MODEL = MODEL_PATH + "xgb_default_model" @@ -76,7 +75,7 @@ def __save_libsvm_format(self, data, feature_file_name): try: f = open(feature_file_name, "w") except Exception as ex: - return CAL_FEATURE_ERR, str(ex) + return TSD_CAL_FEATURE_ERR, str(ex) times = 0 for temp in data: if times > 0: @@ -86,7 +85,7 @@ def __save_libsvm_format(self, data, feature_file_name): for x in result: f.write(' ' + x) times = times + 1 - return OP_SUCCESS, "" + return TSD_OP_SUCCESS, "" def __calculate_features(self, data, feature_file_name, window=DEFAULT_WINDOW): """ @@ -106,7 +105,7 @@ def __calculate_features(self, data, feature_file_name, window=DEFAULT_WINDOW): try: ret_code, ret_data = self.__save_libsvm_format(features, feature_file_name) except Exception as ex: - ret_code = CAL_FEATURE_ERR + ret_code = TSD_CAL_FEATURE_ERR ret_data = str(ex) return ret_code, ret_data @@ -121,12 +120,12 @@ def xgb_train(self, data, task_id, num_round=300): model_name = MODEL_PATH + task_id + "_model" feature_file_name = MODEL_PATH + task_id + "_features" ret_code, ret_data = self.__calculate_features(data, feature_file_name) - if ret_code != OP_SUCCESS: + if ret_code != TSD_OP_SUCCESS: return ret_code, ret_data try: dtrain = xgb.DMatrix(feature_file_name) except Exception as ex: - return READ_FEATURE_FAILED, str(ex) + return TSD_READ_FEATURE_FAILED, str(ex) params = { 'max_depth': self.max_depth, 'eta': self.eta, @@ -143,8 +142,8 @@ def xgb_train(self, data, task_id, num_round=300): bst = xgb.train(params, dtrain, num_round) bst.save_model(model_name) except Exception as ex: - return TRAIN_ERR, str(ex) - return OP_SUCCESS, "" + return TSD_TRAIN_ERR, str(ex) + return TSD_OP_SUCCESS, "" def predict(self, X, window=DEFAULT_WINDOW, model_name=DEFAULT_MODEL): """ diff --git a/time_series_detector/common/__init__.py b/time_series_detector/common/__init__.py new file mode 100644 index 0000000..ef0a5bd --- /dev/null +++ b/time_series_detector/common/__init__.py @@ -0,0 +1 @@ +__all__ = ["tsd_common", "tsd_errorcode"] diff --git a/app/utils/utils.py b/time_series_detector/common/tsd_common.py similarity index 82% rename from app/utils/utils.py rename to time_series_detector/common/tsd_common.py index e9903cc..c8a5064 100644 --- a/app/utils/utils.py +++ b/time_series_detector/common/tsd_common.py @@ -1,99 +1,80 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Tencent is pleased to support the open source community by making Metis available. -Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. -Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -https://opensource.org/licenses/BSD-3-Clause -Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -""" - -import numpy as np -import traceback -from functools import wraps -from app.config.errorcode import * -from app.config.common import * - - -def is_standard_time_series(time_series, window=DEFAULT_WINDOW): - """ - Check the length of time_series. If window = 180, then the length of time_series should be 903. - The mean value of last window should be larger than 0. - - :param time_series: the time series to check, like [data_c, data_b, data_a] - :type time_series: pandas.Series - :param window: the length of window - :return: True or False - :return type: boolean - """ - return bool(len(time_series) == 5 * window + 3 and np.mean(time_series[(4 * window + 2):]) > 0) - - -def split_time_series(time_series, window=DEFAULT_WINDOW): - """ - Spilt the time_series into five parts. Each has a length of window + 1 - - :param time_series: [data_c, data_b, data_a] - :param window: the length of window - :return: spilt list [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]] - """ - data_c_left = time_series[0:(window + 1)] - data_c_right = time_series[window:(2 * window + 1)] - data_b_left = time_series[(2 * window + 1):(3 * window + 2)] - data_b_right = time_series[(3 * window + 1):(4 * window + 2)] - data_a = time_series[(4 * window + 2):] - split_time_series = [ - data_c_left, - data_c_right, - data_b_left, - data_b_right, - data_a - ] - return split_time_series - - -def normalize_time_series(split_time_series): - """ - Normalize the split_time_series. - - :param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]] - :return: all list / mean(split_time_series) - """ - value = np.mean(split_time_series[4]) - if value > 1: - normalized_data_c_left = list(split_time_series[0] / value) - normalized_data_c_right = list(split_time_series[1] / value) - normalized_data_b_left = list(split_time_series[2] / value) - normalized_data_b_right = list(split_time_series[3] / value) - normalized_data_a = list(split_time_series[4] / value) - else: - normalized_data_c_left = split_time_series[0] - normalized_data_c_right = split_time_series[1] - normalized_data_b_left = split_time_series[2] - normalized_data_b_right = split_time_series[3] - normalized_data_a = split_time_series[4] - normalized_split_time_series = [ - normalized_data_c_left, - normalized_data_c_right, - normalized_data_b_left, - normalized_data_b_right, - normalized_data_a - ] - return normalized_split_time_series - - -def build_ret_data(ret_code, data=""): - return {"code": ret_code, "msg": ERR_CODE[ret_code], "data": data} - - -def exce_service(func): - @wraps(func) - def wrapper(*args, **kwargs): - try: - ret_code, ret_data = func(*args, **kwargs) - return_dict = build_ret_data(ret_code, ret_data) - except Exception as ex: - traceback.print_exc() - return_dict = build_ret_data(THROW_EXP, str(ex)) - return return_dict - return wrapper +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Tencent is pleased to support the open source community by making Metis available. +Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at +https://opensource.org/licenses/BSD-3-Clause +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. +""" + +import numpy as np + +DEFAULT_WINDOW = 180 + + +def is_standard_time_series(time_series, window=DEFAULT_WINDOW): + """ + Check the length of time_series. If window = 180, then the length of time_series should be 903. + The mean value of last window should be larger than 0. + + :param time_series: the time series to check, like [data_c, data_b, data_a] + :type time_series: pandas.Series + :param window: the length of window + :return: True or False + :return type: boolean + """ + return bool(len(time_series) == 5 * window + 3 and np.mean(time_series[(4 * window + 2):]) > 0) + + +def split_time_series(time_series, window=DEFAULT_WINDOW): + """ + Spilt the time_series into five parts. Each has a length of window + 1 + + :param time_series: [data_c, data_b, data_a] + :param window: the length of window + :return: spilt list [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]] + """ + data_c_left = time_series[0:(window + 1)] + data_c_right = time_series[window:(2 * window + 1)] + data_b_left = time_series[(2 * window + 1):(3 * window + 2)] + data_b_right = time_series[(3 * window + 1):(4 * window + 2)] + data_a = time_series[(4 * window + 2):] + split_time_series = [ + data_c_left, + data_c_right, + data_b_left, + data_b_right, + data_a + ] + return split_time_series + + +def normalize_time_series(split_time_series): + """ + Normalize the split_time_series. + + :param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]] + :return: all list / mean(split_time_series) + """ + value = np.mean(split_time_series[4]) + if value > 1: + normalized_data_c_left = list(split_time_series[0] / value) + normalized_data_c_right = list(split_time_series[1] / value) + normalized_data_b_left = list(split_time_series[2] / value) + normalized_data_b_right = list(split_time_series[3] / value) + normalized_data_a = list(split_time_series[4] / value) + else: + normalized_data_c_left = split_time_series[0] + normalized_data_c_right = split_time_series[1] + normalized_data_b_left = split_time_series[2] + normalized_data_b_right = split_time_series[3] + normalized_data_a = split_time_series[4] + normalized_split_time_series = [ + normalized_data_c_left, + normalized_data_c_right, + normalized_data_b_left, + normalized_data_b_right, + normalized_data_a + ] + return normalized_split_time_series diff --git a/time_series_detector/common/tsd_errorcode.py b/time_series_detector/common/tsd_errorcode.py new file mode 100644 index 0000000..1f8103e --- /dev/null +++ b/time_series_detector/common/tsd_errorcode.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Tencent is pleased to support the open source community by making Metis available. +Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at +https://opensource.org/licenses/BSD-3-Clause +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. +""" + +TSD_OP_SUCCESS = 0 +TSD_THROW_EXP = 1000 +TSD_CHECK_PARAM_FAILED = 1002 +TSD_FILE_FORMAT_ERR = 1003 +TSD_CAL_FEATURE_ERR = 2001 +TSD_READ_FEATURE_FAILED = 2002 +TSD_TRAIN_ERR = 2003 +TSD_LACK_SAMPLE = 2004 + +ERR_CODE = { + TSD_OP_SUCCESS: "操作成功", + TSD_THROW_EXP: "抛出异常", + TSD_CHECK_PARAM_FAILED: "参数检查失败", + TSD_FILE_FORMAT_ERR: "文件格式有误", + TSD_CAL_FEATURE_ERR: "特征计算出错", + TSD_READ_FEATURE_FAILED: "读取特征数据失败", + TSD_TRAIN_ERR: "训练出错", + TSD_LACK_SAMPLE: "缺少正样本或负样本" +} \ No newline at end of file diff --git a/time_series_detector/detect.py b/time_series_detector/detect.py new file mode 100644 index 0000000..db5a88f --- /dev/null +++ b/time_series_detector/detect.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Tencent is pleased to support the open source community by making Metis available. +Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at +https://opensource.org/licenses/BSD-3-Clause +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. +""" + +import os +from time_series_detector.algorithm import isolation_forest, ewma, polynomial_interpolation, statistic, xgboosting +from time_series_detector.common.tsd_errorcode import * +from time_series_detector.common.tsd_common import * +MODEL_PATH = os.path.join(os.path.dirname(__file__), './model/') + + +class Detect(object): + + def __init__(self): + self.iforest_obj = isolation_forest.IForest() + self.ewma_obj = ewma.Ewma() + self.polynomial_obj = polynomial_interpolation.PolynomialInterpolation() + self.statistic_obj = statistic.Statistic() + self.supervised_obj = xgboosting.XGBoosting() + + def __list_is_digit(self, data): + for index in data: + try: + float(index) + except ValueError: + return False + return True + + def __check_param(self, data): + if ("viewName" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()): + return TSD_CHECK_PARAM_FAILED, "missing parameter" + if not data['dataA']: + return TSD_CHECK_PARAM_FAILED, "dataA can not be empty" + if not data['dataB']: + return TSD_CHECK_PARAM_FAILED, "dataB can not be empty" + if not data['dataC']: + return TSD_CHECK_PARAM_FAILED, "dataC can not be empty" + if not self.__list_is_digit(data['dataA'].split(',')): + return TSD_CHECK_PARAM_FAILED, "dataA contains illegal numbers" + if not self.__list_is_digit(data['dataB'].split(',')): + return TSD_CHECK_PARAM_FAILED, "dataB contains illegal numbers" + if not self.__list_is_digit(data['dataC'].split(',')): + return TSD_CHECK_PARAM_FAILED, "dataC contains illegal numbers" + if "window" in data: + window = data["window"] + else: + window = DEFAULT_WINDOW + if len(data['dataC'].split(',')) != (2 * window + 1): + return TSD_CHECK_PARAM_FAILED, "dataC length does not match" + if len(data['dataB'].split(',')) != (2 * window + 1): + return TSD_CHECK_PARAM_FAILED, "dataB length does not match" + if len(data['dataA'].split(',')) != (window + 1): + return TSD_CHECK_PARAM_FAILED, "dataA length does not match" + return TSD_OP_SUCCESS, "" + + def value_predict(self, data): + """ + Predict the data + + :param data: the time series to detect of + """ + ret_code, ret_data = self.__check_param(data) + if ret_code != TSD_OP_SUCCESS: + return ret_code, ret_data + if "taskId" in data and data["taskId"]: + model_name = MODEL_PATH + data["taskId"] + "_model" + else: + model_name = MODEL_PATH + "xgb_default_model" + combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"] + time_series = map(int, combined_data.split(',')) + if "window" in data: + window = data["window"] + else: + window = DEFAULT_WINDOW + statistic_result = self.statistic_obj.predict(time_series) + ewma_result = self.ewma_obj.predict(time_series) + polynomial_result = self.polynomial_obj.predict(time_series, window) + iforest_result = self.iforest_obj.predict(time_series, window) + if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 or iforest_result == 0: + xgb_result = self.supervised_obj.predict(time_series, window, model_name) + res_value = xgb_result[0] + prob = xgb_result[1] + else: + res_value = 1 + prob = 1 + ret_data = {"ret": res_value, "p": str(prob)} + return TSD_OP_SUCCESS, ret_data + + def rate_predict(self, data): + """ + Predict the data + + :param data: the time series to detect of + """ + combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"] + time_series = map(float, combined_data.split(',')) + statistic_result = self.statistic_obj.predict(time_series) + if statistic_result == 0: + prob = 0 + else: + prob = 1 + ret_data = {"ret": statistic_result, "p": str(prob)} + return TSD_OP_SUCCESS, ret_data diff --git a/app/service/time_series_detector/feature/__init__.py b/time_series_detector/feature/__init__.py similarity index 100% rename from app/service/time_series_detector/feature/__init__.py rename to time_series_detector/feature/__init__.py diff --git a/app/service/time_series_detector/feature/classification_features.py b/time_series_detector/feature/classification_features.py similarity index 100% rename from app/service/time_series_detector/feature/classification_features.py rename to time_series_detector/feature/classification_features.py diff --git a/app/service/time_series_detector/feature/feature_service.py b/time_series_detector/feature/feature_service.py similarity index 86% rename from app/service/time_series_detector/feature/feature_service.py rename to time_series_detector/feature/feature_service.py index ed0b04f..19379cc 100644 --- a/app/service/time_series_detector/feature/feature_service.py +++ b/time_series_detector/feature/feature_service.py @@ -11,7 +11,7 @@ import statistical_features import classification_features import fitting_features -from app.utils import utils +from time_series_detector.common import tsd_common def extract_features(time_series, window): @@ -25,15 +25,15 @@ def extract_features(time_series, window): :return: the value of features :return type: list with float """ - if not utils.is_standard_time_series(time_series, window): + if not tsd_common.is_standard_time_series(time_series, window): # add your report of this error here... return [] # spilt time_series - split_time_series = utils.split_time_series(time_series, window) + split_time_series = tsd_common.split_time_series(time_series, window) # nomalize time_series - normalized_split_time_series = utils.normalize_time_series(split_time_series) + normalized_split_time_series = tsd_common.normalize_time_series(split_time_series) s_features = statistical_features.get_statistical_features(normalized_split_time_series[4]) f_features = fitting_features.get_fitting_features(normalized_split_time_series) c_features = classification_features.get_classification_features(normalized_split_time_series[0] + normalized_split_time_series[1][1:] + normalized_split_time_series[2] + normalized_split_time_series[3][1:] + normalized_split_time_series[4]) diff --git a/app/service/time_series_detector/feature/fitting_features.py b/time_series_detector/feature/fitting_features.py similarity index 99% rename from app/service/time_series_detector/feature/fitting_features.py rename to time_series_detector/feature/fitting_features.py index 5430213..9a146b6 100644 --- a/app/service/time_series_detector/feature/fitting_features.py +++ b/time_series_detector/feature/fitting_features.py @@ -9,7 +9,7 @@ """ import numpy as np -from app.config.common import * +from time_series_detector.common.tsd_common import * def time_series_moving_average(x): diff --git a/app/service/time_series_detector/feature/statistical_features.py b/time_series_detector/feature/statistical_features.py similarity index 100% rename from app/service/time_series_detector/feature/statistical_features.py rename to time_series_detector/feature/statistical_features.py diff --git a/app/model/time_series_detector/gbdt_default_model b/time_series_detector/model/gbdt_default_model similarity index 100% rename from app/model/time_series_detector/gbdt_default_model rename to time_series_detector/model/gbdt_default_model diff --git a/app/model/time_series_detector/xgb_default_model b/time_series_detector/model/xgb_default_model similarity index 100% rename from app/model/time_series_detector/xgb_default_model rename to time_series_detector/model/xgb_default_model