-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Bruno Sofiato <bruno.sofiato@gmail.com>
- Loading branch information
Showing
38 changed files
with
688 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
// Copyright 2024 The Gitea Authors. All rights reserved. | ||
// SPDX-License-Identifier: MIT | ||
|
||
package path | ||
|
||
import ( | ||
"slices" | ||
"strings" | ||
|
||
"github.com/blevesearch/bleve/v2/analysis" | ||
"github.com/blevesearch/bleve/v2/registry" | ||
) | ||
|
||
const ( | ||
Name = "gitea/path" | ||
) | ||
|
||
type TokenFilter struct{} | ||
|
||
func NewTokenFilter() *TokenFilter { | ||
return &TokenFilter{} | ||
} | ||
|
||
func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) { | ||
return NewTokenFilter(), nil | ||
} | ||
|
||
func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream { | ||
if len(input) == 1 { | ||
// if there is only one token, we dont need to generate the reversed chain | ||
return generatePathTokens(input, false) | ||
} | ||
|
||
normal := generatePathTokens(input, false) | ||
reversed := generatePathTokens(input, true) | ||
|
||
return append(normal, reversed...) | ||
} | ||
|
||
// Generates path tokens from the input tokens. | ||
// This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component | ||
// in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md). | ||
// | ||
// If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful | ||
// to efficiently search for filenames without supplying the fullpath. | ||
func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream { | ||
terms := make([]string, 0, len(input)) | ||
longestTerm := 0 | ||
|
||
if reversed { | ||
slices.Reverse(input) | ||
} | ||
|
||
for i := 0; i < len(input); i++ { | ||
var sb strings.Builder | ||
sb.WriteString(string(input[0].Term)) | ||
|
||
for j := 1; j < i; j++ { | ||
sb.WriteString("/") | ||
sb.WriteString(string(input[j].Term)) | ||
} | ||
|
||
term := sb.String() | ||
|
||
if longestTerm < len(term) { | ||
longestTerm = len(term) | ||
} | ||
|
||
terms = append(terms, term) | ||
} | ||
|
||
output := make(analysis.TokenStream, 0, len(terms)) | ||
|
||
for _, term := range terms { | ||
var start, end int | ||
|
||
if reversed { | ||
start = 0 | ||
end = len(term) | ||
} else { | ||
start = longestTerm - len(term) | ||
end = longestTerm | ||
} | ||
|
||
token := analysis.Token{ | ||
Position: 1, | ||
Start: start, | ||
End: end, | ||
Type: analysis.AlphaNumeric, | ||
Term: []byte(term), | ||
} | ||
|
||
output = append(output, &token) | ||
} | ||
|
||
return output | ||
} | ||
|
||
func init() { | ||
registry.RegisterTokenFilter(Name, TokenFilterConstructor) | ||
} |
Oops, something went wrong.