Skip to content

Commit

Permalink
Merge branch 'caiyd_refactor_config_1' into 'branch-0.5.0'
Browse files Browse the repository at this point in the history
MS-574 Milvus configuration refactor

See merge request megasearch/milvus!610

Former-commit-id: 9d0f05589f4344bb029c2f65470d9b567999ae3f
  • Loading branch information
XuPeng-SH committed Sep 26, 2019
2 parents 3b67f09 + 44b5397 commit 64e3cc0
Show file tree
Hide file tree
Showing 33 changed files with 1,484 additions and 917 deletions.
3 changes: 2 additions & 1 deletion cpp/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-562 - Add JobMgr and TaskCreator in Scheduler
- MS-566 - Refactor cmake
- MS-555 - Remove old scheduler
- MS-578 - Makesure milvus5.0 don't crack 0.3.1 data
- MS-574 - Milvus configuration refactor
- MS-578 - Make sure milvus5.0 don't crack 0.3.1 data
- MS-585 - Update namespace in scheduler

## New Feature
Expand Down
45 changes: 21 additions & 24 deletions cpp/conf/server_config.template
Original file line number Diff line number Diff line change
@@ -1,42 +1,39 @@
server_config:
address: 0.0.0.0 # milvus server ip address (IPv4)
port: 19530 # the port milvus listen to, default: 19530, range: 1025 ~ 65534
mode: single # milvus deployment type: single, cluster, read_only
time_zone: UTC+8 # Use the UTC-x or UTC+x to specify a time zone. eg. UTC+8 for China Standard Time
address: 0.0.0.0 # milvus server ip address (IPv4)
port: 19530 # port range: 1025 ~ 65534
mode: single # deployment type: single, cluster, read_only
time_zone: UTC+8

db_config:
db_path: @MILVUS_DB_PATH@ # milvus data storage path
db_slave_path: # secondry data storage path, split by semicolon
path: @MILVUS_DB_PATH@ # milvus database path
slave_path: # secondary database path, split by semicolon

# URI format: dialect://username:password@host:port/database
# All parts except dialect are optional, but you MUST include the delimiters
# Currently dialect supports mysql or sqlite
db_backend_url: sqlite://:@:/
backend_url: sqlite://:@:/

archive_disk_threshold: 0 # triger archive action if storage size exceed this value, 0 means no limit, unit: GB
archive_days_threshold: 0 # files older than x days will be archived, 0 means no limit, unit: day
insert_buffer_size: 4 # maximum insert buffer size allowed, default: 4, unit: GB, should be at least 1 GB.
# the sum of insert_buffer_size and cpu_cache_capacity should be less than total memory, unit: GB
build_index_gpu: 0 # which gpu is used to build index, default: 0, range: 0 ~ gpu number - 1
archive_disk_threshold: 0 # GB, file will be archived when disk usage exceed, 0 for no limit
archive_days_threshold: 0 # DAYS, older files will be archived, 0 for no limit
buffer_size: 4 # GB, maximum insert buffer size allowed
build_index_gpu: 0 # gpu id used for building index

metric_config:
is_startup: off # if monitoring start: on, off
collector: prometheus # metrics collector: prometheus
prometheus_config: # following are prometheus configure
port: 8080 # the port prometheus use to fetch metrics
push_gateway_ip_address: 127.0.0.1 # push method configure: push gateway ip address
push_gateway_port: 9091 # push method configure: push gateway port
auto_bootup: off # whether enable monitoring when bootup
collector: prometheus # prometheus
prometheus_config:
port: 8080 # port prometheus used to fetch metrics

cache_config:
cpu_cache_capacity: 16 # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory
cpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0
insert_cache_immediately: false # insert data will be load into cache immediately for hot query
cpu_mem_capacity: 16 # GB, CPU memory size used for cache
cpu_mem_threshold: 0.85 # percent of data kept when cache cleanup triggered
cache_insert_data: false # whether load data into cache when insert

engine_config:
use_blas_threshold: 20
blas_threshold: 20

resource_config:
mode: simple
resources:
# - cpu
pool:
- cpu
- gpu0
28 changes: 19 additions & 9 deletions cpp/src/cache/CpuCacheMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


#include "CpuCacheMgr.h"
#include "server/ServerConfig.h"
#include "server/Config.h"
#include "utils/Log.h"

namespace zilliz {
Expand All @@ -29,17 +29,27 @@ namespace {
}

CpuCacheMgr::CpuCacheMgr() {
server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
int64_t cap = config.GetInt64Value(server::CONFIG_CPU_CACHE_CAPACITY, 16);
cap *= unit;
server::Config& config = server::Config::GetInstance();
Status s;

int32_t cpu_mem_cap;
s = config.GetCacheConfigCpuMemCapacity(cpu_mem_cap);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
int64_t cap = cpu_mem_cap * unit;
cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL<<32);

double free_percent = config.GetDoubleValue(server::CACHE_FREE_PERCENT, 0.85);
if(free_percent > 0.0 && free_percent <= 1.0) {
cache_->set_freemem_percent(free_percent);
float cpu_mem_threshold;
s = config.GetCacheConfigCpuMemThreshold(cpu_mem_threshold);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
if (cpu_mem_threshold > 0.0 && cpu_mem_threshold <= 1.0) {
cache_->set_freemem_percent(cpu_mem_threshold);
} else {
SERVER_LOG_ERROR << "Invalid cache_free_percent: " << free_percent <<
", defaultly set to " << cache_->freemem_percent();
SERVER_LOG_ERROR << "Invalid cpu_mem_threshold: " << cpu_mem_threshold
<< ", by default set to " << cache_->freemem_percent();
}
}

Expand Down
27 changes: 18 additions & 9 deletions cpp/src/cache/GpuCacheMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <sstream>
#include "utils/Log.h"
#include "GpuCacheMgr.h"
#include "server/ServerConfig.h"
#include "server/Config.h"

namespace zilliz {
namespace milvus {
Expand All @@ -33,18 +33,27 @@ namespace {
}

GpuCacheMgr::GpuCacheMgr() {
server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
server::Config& config = server::Config::GetInstance();
Status s;

int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 0);
cap *= G_BYTE;
int32_t gpu_mem_cap;
s = config.GetCacheConfigGpuMemCapacity(gpu_mem_cap);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
int32_t cap = gpu_mem_cap * G_BYTE;
cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL<<32);

double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85);
if (free_percent > 0.0 && free_percent <= 1.0) {
cache_->set_freemem_percent(free_percent);
float gpu_mem_threshold;
s = config.GetCacheConfigGpuMemThreshold(gpu_mem_threshold);
if (!s.ok()) {
SERVER_LOG_ERROR << s.message();
}
if (gpu_mem_threshold > 0.0 && gpu_mem_threshold <= 1.0) {
cache_->set_freemem_percent(gpu_mem_threshold);
} else {
SERVER_LOG_ERROR << "Invalid gpu_cache_free_percent: " << free_percent <<
", defaultly set to " << cache_->freemem_percent();
SERVER_LOG_ERROR << "Invalid gpu_mem_threshold: " << gpu_mem_threshold
<< ", by default set to " << cache_->freemem_percent();
}
}

Expand Down
7 changes: 4 additions & 3 deletions cpp/src/db/engine/ExecutionEngineImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "src/wrapper/vec_index.h"
#include "src/wrapper/vec_impl.h"
#include "knowhere/common/Exception.h"
#include "server/Config.h"

#include <stdexcept>

Expand Down Expand Up @@ -326,9 +327,9 @@ Status ExecutionEngineImpl::GpuCache(uint64_t gpu_id) {
// TODO(linxj): remove.
Status ExecutionEngineImpl::Init() {
using namespace zilliz::milvus::server;
ServerConfig &config = ServerConfig::GetInstance();
ConfigNode server_config = config.GetConfig(CONFIG_DB);
gpu_num_ = server_config.GetInt32Value(CONFIG_DB_BUILD_INDEX_GPU, 0);
server::Config &config = server::Config::GetInstance();
Status s = config.GetDBConfigBuildIndexGPU(gpu_num_);
if (!s.ok()) return s;

return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/db/engine/ExecutionEngineImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class ExecutionEngineImpl : public ExecutionEngine {
std::string location_;

int32_t nlist_ = 0;
int64_t gpu_num_ = 0;
int32_t gpu_num_ = 0;
};


Expand Down
2 changes: 1 addition & 1 deletion cpp/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ main(int argc, char *argv[]) {
signal(SIGUSR2, server::SignalUtil::HandleSignal);
signal(SIGTERM, server::SignalUtil::HandleSignal);

server::Server &server = server::Server::Instance();
server::Server &server = server::Server::GetInstance();
server.Init(start_daemonized, pid_filename, config_filename, log_config_file);
server.Start();

Expand Down
6 changes: 0 additions & 6 deletions cpp/src/metrics/MetricBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#pragma once

#include "utils/Error.h"
#include "server/ServerConfig.h"
#include "SystemInfo.h"

namespace zilliz {
Expand Down Expand Up @@ -83,11 +82,6 @@ class MetricsBase{
virtual void CPUTemperature() {};
};






}
}
}
7 changes: 5 additions & 2 deletions cpp/src/metrics/Metrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

#include "Metrics.h"
#include "server/Config.h"
#include "PrometheusMetrics.h"


Expand All @@ -31,8 +32,10 @@ Metrics::GetInstance() {

MetricsBase &
Metrics::CreateMetricsCollector() {
ConfigNode &config = ServerConfig::GetInstance().GetConfig(CONFIG_METRIC);
std::string collector_type_str = config.GetValue(CONFIG_METRIC_COLLECTOR);
Config &config = Config::GetInstance();
std::string collector_type_str;

config.GetMetricConfigCollector(collector_type_str);

if (collector_type_str == "prometheus") {
return PrometheusMetrics::GetInstance();
Expand Down
19 changes: 12 additions & 7 deletions cpp/src/metrics/PrometheusMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
// under the License.


#include <cache/GpuCacheMgr.h>
#include "cache/GpuCacheMgr.h"
#include "PrometheusMetrics.h"
#include "server/Config.h"
#include "utils/Log.h"
#include "SystemInfo.h"

Expand All @@ -26,15 +27,19 @@ namespace zilliz {
namespace milvus {
namespace server {

ErrorCode
ErrorCode
PrometheusMetrics::Init() {
try {
ConfigNode &configNode = ServerConfig::GetInstance().GetConfig(CONFIG_METRIC);
startup_ = configNode.GetValue(CONFIG_METRIC_IS_STARTUP) == "on";
if(!startup_) return SERVER_SUCCESS;
Config &config = Config::GetInstance();
Status s = config.GetMetricConfigAutoBootup(startup_);
if (!s.ok()) return s.code();
if (!startup_) return SERVER_SUCCESS;

// Following should be read from config file.
const std::string bind_address = configNode.GetChild(CONFIG_PROMETHEUS).GetValue(CONFIG_METRIC_PROMETHEUS_PORT);
const std::string uri = std::string("/metrics");
std::string bind_address;
s = config.GetMetricConfigPrometheusPort(bind_address);
if (!s.ok()) return s.code();
const std::string uri = std::string("/tmp/metrics");
const std::size_t num_threads = 2;

// Init Exposer
Expand Down
19 changes: 2 additions & 17 deletions cpp/src/metrics/PrometheusMetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@

#pragma once

#include "utils/Error.h"
#include <memory>
#include <vector>


#include <prometheus/registry.h>
#include <prometheus/exposer.h>
#include <iostream>
#include "server/ServerConfig.h"

#include "utils/Error.h"
#include "MetricBase.h"


Expand All @@ -38,10 +36,6 @@ namespace zilliz {
namespace milvus {
namespace server {





class PrometheusMetrics: public MetricsBase {

public:
Expand Down Expand Up @@ -107,11 +101,6 @@ class PrometheusMetrics: public MetricsBase {
void GPUTemperature() override;
void CPUTemperature() override;






std::shared_ptr<prometheus::Exposer> &exposer_ptr() {return exposer_ptr_; }
// prometheus::Exposer& exposer() { return exposer_;}
std::shared_ptr<prometheus::Registry> &registry_ptr() {return registry_; }
Expand All @@ -125,8 +114,6 @@ class PrometheusMetrics: public MetricsBase {
// .Register(*registry_);
// prometheus::Counter &connection_total_ = connect_request_.Add({});



////all from DBImpl.cpp
using BucketBoundaries = std::vector<double>;
//record add_group request
Expand Down Expand Up @@ -472,10 +459,8 @@ class PrometheusMetrics: public MetricsBase {
.Name("CPU_temperature")
.Help("CPU temperature")
.Register(*registry_);

};


}
}
}
Expand Down
14 changes: 8 additions & 6 deletions cpp/src/scheduler/SchedInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


#include "SchedInst.h"
#include "server/ServerConfig.h"
#include "server/Config.h"
#include "ResourceFactory.h"
#include "knowhere/index/vector_index/IndexGPUIVF.h"
#include "Utils.h"
Expand All @@ -38,13 +38,15 @@ std::mutex JobMgrInst::mutex_;

void
load_simple_config() {
server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
auto mode = config.GetValue("mode", "simple");
server::Config &config = server::Config::GetInstance();
std::string mode;
config.GetResourceConfigMode(mode);
std::vector<std::string> pool;
config.GetResourceConfigPool(pool);

auto resources = config.GetSequence("resources");
bool cpu = false;
std::set<uint64_t> gpu_ids;
for (auto &resource : resources) {
for (auto &resource : pool) {
if (resource == "cpu") {
cpu = true;
break;
Expand Down Expand Up @@ -82,7 +84,7 @@ load_simple_config() {
void
load_advance_config() {
// try {
// server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
// server::ConfigNode &config = server::Config::GetInstance().GetConfig(server::CONFIG_RESOURCE);
//
// if (config.GetChildren().empty()) throw "resource_config null exception";
//
Expand Down
Loading

0 comments on commit 64e3cc0

Please sign in to comment.