diff --git a/cmd/tools/config/generate.go b/cmd/tools/config/generate.go index 1b50ea64e2ab0..218d816d9e6d8 100644 --- a/cmd/tools/config/generate.go +++ b/cmd/tools/config/generate.go @@ -85,7 +85,7 @@ func collectRecursive(params *paramtable.ComponentParam, data *[]DocContent, val for _, key := range keys { value := m[key] log.Debug("got group entry", zap.String("key", key), zap.String("value", value)) - *data = append(*data, DocContent{fmt.Sprintf("%s%s", item.KeyPrefix, key), quoteIfNeeded(value), item.Version, refreshable, item.Export, ""}) + *data = append(*data, DocContent{fmt.Sprintf("%s%s", item.KeyPrefix, key), quoteIfNeeded(value), item.Version, refreshable, item.Export, item.GetDoc(key)}) } } else { collectRecursive(params, data, &subVal) @@ -338,6 +338,11 @@ func WriteYaml(w io.Writer) { header: ` # Any configuration related to the streaming service.`, }, + { + name: "knowhere", + header: ` +# Any configuration related to the knowhere vector search engine`, + }, } marshller := YamlMarshaller{w, groups, result} marshller.writeYamlRecursive(lo.Filter(result, func(d DocContent, _ int) bool { diff --git a/cmd/tools/config/generate_test.go b/cmd/tools/config/generate_test.go index 43665ac766d4d..48aae3bdd082b 100644 --- a/cmd/tools/config/generate_test.go +++ b/cmd/tools/config/generate_test.go @@ -43,14 +43,23 @@ func TestYamlFile(t *testing.T) { fileScanner := bufio.NewScanner(f) codeScanner := bufio.NewScanner(&w) - for fileScanner.Scan() && codeScanner.Scan() { + msg := func(file, code string) string { + return fmt.Sprintf(`configs/milvus.yaml is not consistent with paramtable, file: [%s], code: [%s]. +Do not edit milvus.yaml directly, instead, run "make milvus-tools && ./bin/tools/config gen-yaml && mv milvus.yaml configs/milvus.yaml"`, file, code) + } + for fileScanner.Scan() { + if !codeScanner.Scan() { + assert.FailNow(t, msg(fileScanner.Text(), "EMPTY")) + } if strings.Contains(codeScanner.Text(), "etcd:") || strings.Contains(codeScanner.Text(), "minio:") || strings.Contains(codeScanner.Text(), "pulsar:") { // Skip check of endpoints given by .env continue } if fileScanner.Text() != codeScanner.Text() { - assert.FailNow(t, fmt.Sprintf("configs/milvus.yaml is not consistent with paramtable, file: [%s], code: [%s]. Do not edit milvus.yaml directly.", - fileScanner.Text(), codeScanner.Text())) + assert.FailNow(t, msg(fileScanner.Text(), codeScanner.Text())) } } + if codeScanner.Scan() { + assert.FailNow(t, msg("EMPTY", codeScanner.Text())) + } } diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 8a21b53744b67..03fcbc294f28d 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -1081,11 +1081,11 @@ streaming: # Any configuration related to the knowhere vector search engine knowhere: enable: true # When enable this configuration, the index parameters defined following will be automatically populated as index parameters, without requiring user input. - DISKANN: # Index parameters for diskann - build: # Diskann build params + DISKANN: + build: max_degree: 56 # Maximum degree of the Vamana graph - search_list_size: 100 # Size of the candidate list during building graph pq_code_budget_gb_ratio: 0.125 # Size limit on the PQ code (compared with raw data) search_cache_budget_gb_ratio: 0.1 # Ratio of cached node numbers to raw data - search: # Diskann search params - beam_width_ratio: 4.0 # Ratio between the maximum number of IO requests per search iteration and CPU number + search_list_size: 100 # Size of the candidate list during building graph + search: + beam_width_ratio: 4 # Ratio between the maximum number of IO requests per search iteration and CPU number diff --git a/pkg/util/paramtable/knowhere_param.go b/pkg/util/paramtable/knowhere_param.go index 035631fac5153..a41caba44c431 100644 --- a/pkg/util/paramtable/knowhere_param.go +++ b/pkg/util/paramtable/knowhere_param.go @@ -30,6 +30,23 @@ func (p *knowhereConfig) init(base *BaseTable) { p.IndexParam = ParamGroup{ KeyPrefix: "knowhere.", Version: "2.5.0", + Export: true, + DocFunc: func(key string) string { + switch key { + case "DISKANN.build.max_degree": + return "Maximum degree of the Vamana graph" + case "DISKANN.build.pq_code_budget_gb_ratio": + return "Size limit on the PQ code (compared with raw data)" + case "DISKANN.build.search_cache_budget_gb_ratio": + return "Ratio of cached node numbers to raw data" + case "DISKANN.build.search_list_size": + return "Size of the candidate list during building graph" + case "DISKANN.search.beam_width_ratio": + return "Ratio between the maximum number of IO requests per search iteration and CPU number" + default: + return "" + } + }, } p.IndexParam.Init(base.mgr) @@ -37,6 +54,8 @@ func (p *knowhereConfig) init(base *BaseTable) { Key: "knowhere.enable", Version: "2.5.0", DefaultValue: "true", + Export: true, + Doc: "When enable this configuration, the index parameters defined following will be automatically populated as index parameters, without requiring user input.", } p.Enable.Init(base.mgr) } diff --git a/pkg/util/paramtable/param_item.go b/pkg/util/paramtable/param_item.go index a33ba1126b49d..b8718b65ba848 100644 --- a/pkg/util/paramtable/param_item.go +++ b/pkg/util/paramtable/param_item.go @@ -314,6 +314,7 @@ type ParamGroup struct { Export bool GetFunc func() map[string]string + DocFunc func(string) string manager *config.Manager } @@ -330,6 +331,13 @@ func (pg *ParamGroup) GetValue() map[string]string { return values } +func (pg *ParamGroup) GetDoc(key string) string { + if pg.DocFunc != nil { + return pg.DocFunc(key) + } + return "" +} + func ParseAsStings(v string) []string { return getAsStrings(v) }