Skip to content

Commit

Permalink
Refactor and enhance issue indexer to support both searching, filteri…
Browse files Browse the repository at this point in the history
…ng and paging (#26012)

Fix #24662.

Replace #24822 and #25708 (although it has been merged)


## Background

In the past, Gitea supported issue searching with a keyword and
conditions in a less efficient way. It worked by searching for issues
with the keyword and obtaining limited IDs (as it is heavy to get all)
on the indexer (bleve/elasticsearch/meilisearch), and then querying with
conditions on the database to find a subset of the found IDs. This is
why the results could be incomplete.

To solve this issue, we need to store all fields that could be used as
conditions in the indexer and support both keyword and additional
conditions when searching with the indexer.

## Major changes

- Redefine `IndexerData` to include all fields that could be used as
filter conditions.
- Refactor `Search(ctx context.Context, kw string, repoIDs []int64,
limit, start int, state string)` to `Search(ctx context.Context, options
*SearchOptions)`, so it supports more conditions now.
- Change the data type stored in `issueIndexerQueue`. Use
`IndexerMetadata` instead of `IndexerData` in case the data has been
updated while it is in the queue. This also reduces the storage size of
the queue.
- Enhance searching with Bleve/Elasticsearch/Meilisearch, make them
fully support `SearchOptions`. Also, update the data versions.
- Keep most logic of database indexer, but remove
`issues.SearchIssueIDsByKeyword` in `models` to avoid confusion where is
the entry point to search issues.
- Start a Meilisearch instance to test it in unit tests.
- Add unit tests with almost full coverage to test
Bleve/Elasticsearch/Meilisearch indexer.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
  • Loading branch information
wolfogre and lunny authored Jul 31, 2023
1 parent aba9096 commit 1e76a82
Show file tree
Hide file tree
Showing 37 changed files with 2,955 additions and 851 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/pull-db-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ jobs:
discovery.type: single-node
ports:
- "9200:9200"
meilisearch:
image: getmeili/meilisearch:v1.2.0
env:
MEILI_ENV: development # disable auth
ports:
- "7700:7700"
smtpimap:
image: tabascoterrier/docker-imap-devel:latest
ports:
Expand Down Expand Up @@ -128,7 +134,7 @@ jobs:
go-version: ">=1.20"
check-latest: true
- name: Add hosts to /etc/hosts
run: '[ -e "/.dockerenv" ] || [ -e "/run/.containerenv" ] || echo "127.0.0.1 mysql elasticsearch smtpimap" | sudo tee -a /etc/hosts'
run: '[ -e "/.dockerenv" ] || [ -e "/run/.containerenv" ] || echo "127.0.0.1 mysql elasticsearch meilisearch smtpimap" | sudo tee -a /etc/hosts'
- run: make deps-backend
- run: make backend
env:
Expand Down
17 changes: 17 additions & 0 deletions models/db/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,20 @@ func BuildCaseInsensitiveLike(key, value string) builder.Cond {
}
return builder.Like{"UPPER(" + key + ")", strings.ToUpper(value)}
}

// BuildCaseInsensitiveIn returns a condition to check if the given value is in the given values case-insensitively.
// Handles especially SQLite correctly as UPPER there only transforms ASCII letters.
func BuildCaseInsensitiveIn(key string, values []string) builder.Cond {
uppers := make([]string, 0, len(values))
if setting.Database.Type.IsSQLite3() {
for _, value := range values {
uppers = append(uppers, util.ToUpperASCII(value))
}
} else {
for _, value := range values {
uppers = append(uppers, strings.ToUpper(value))
}
}

return builder.In("UPPER("+key+")", uppers)
}
26 changes: 24 additions & 2 deletions models/issues/issue.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
project_model "code.gitea.io/gitea/models/project"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
Expand Down Expand Up @@ -550,9 +551,30 @@ func GetIssueWithAttrsByID(id int64) (*Issue, error) {
}

// GetIssuesByIDs return issues with the given IDs.
func GetIssuesByIDs(ctx context.Context, issueIDs []int64) (IssueList, error) {
// If keepOrder is true, the order of the returned issues will be the same as the given IDs.
func GetIssuesByIDs(ctx context.Context, issueIDs []int64, keepOrder ...bool) (IssueList, error) {
issues := make([]*Issue, 0, len(issueIDs))
return issues, db.GetEngine(ctx).In("id", issueIDs).Find(&issues)

if err := db.GetEngine(ctx).In("id", issueIDs).Find(&issues); err != nil {
return nil, err
}

if len(keepOrder) > 0 && keepOrder[0] {
m := make(map[int64]*Issue, len(issues))
appended := container.Set[int64]{}
for _, issue := range issues {
m[issue.ID] = issue
}
issues = issues[:0]
for _, id := range issueIDs {
if issue, ok := m[id]; ok && !appended.Contains(id) { // make sure the id is existed and not appended
appended.Add(id)
issues = append(issues, issue)
}
}
}

return issues, nil
}

// GetIssueIDsByRepoID returns all issue ids by repo id
Expand Down
81 changes: 36 additions & 45 deletions models/issues/issue_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (

// IssuesOptions represents options of an issue.
type IssuesOptions struct { //nolint
db.ListOptions
db.Paginator
RepoIDs []int64 // overwrites RepoCond if the length is not 0
RepoCond builder.Cond
AssigneeID int64
Expand Down Expand Up @@ -99,15 +99,28 @@ func applySorts(sess *xorm.Session, sortType string, priorityRepoID int64) {
}

func applyLimit(sess *xorm.Session, opts *IssuesOptions) *xorm.Session {
if opts.Page >= 0 && opts.PageSize > 0 {
var start int
if opts.Page == 0 {
start = 0
} else {
start = (opts.Page - 1) * opts.PageSize
if opts.Paginator == nil || opts.Paginator.IsListAll() {
return sess
}

// Warning: Do not use GetSkipTake() for *db.ListOptions
// Its implementation could reset the page size with setting.API.MaxResponseItems
if listOptions, ok := opts.Paginator.(*db.ListOptions); ok {
if listOptions.Page >= 0 && listOptions.PageSize > 0 {
var start int
if listOptions.Page == 0 {
start = 0
} else {
start = (listOptions.Page - 1) * listOptions.PageSize
}
sess.Limit(listOptions.PageSize, start)
}
sess.Limit(opts.PageSize, start)
return sess
}

start, limit := opts.Paginator.GetSkipTake()
sess.Limit(limit, start)

return sess
}

Expand Down Expand Up @@ -435,7 +448,7 @@ func Issues(ctx context.Context, opts *IssuesOptions) ([]*Issue, error) {
applyConditions(sess, opts)
applySorts(sess, opts.SortType, opts.PriorityRepoID)

issues := make(IssueList, 0, opts.ListOptions.PageSize)
issues := IssueList{}
if err := sess.Find(&issues); err != nil {
return nil, fmt.Errorf("unable to query Issues: %w", err)
}
Expand All @@ -447,45 +460,23 @@ func Issues(ctx context.Context, opts *IssuesOptions) ([]*Issue, error) {
return issues, nil
}

// SearchIssueIDsByKeyword search issues on database
func SearchIssueIDsByKeyword(ctx context.Context, kw string, repoIDs []int64, limit, start int) (int64, []int64, error) {
repoCond := builder.In("repo_id", repoIDs)
subQuery := builder.Select("id").From("issue").Where(repoCond)
cond := builder.And(
repoCond,
builder.Or(
db.BuildCaseInsensitiveLike("name", kw),
db.BuildCaseInsensitiveLike("content", kw),
builder.In("id", builder.Select("issue_id").
From("comment").
Where(builder.And(
builder.Eq{"type": CommentTypeComment},
builder.In("issue_id", subQuery),
db.BuildCaseInsensitiveLike("content", kw),
)),
),
),
)

ids := make([]int64, 0, limit)
res := make([]struct {
ID int64
UpdatedUnix int64
}, 0, limit)
err := db.GetEngine(ctx).Distinct("id", "updated_unix").Table("issue").Where(cond).
OrderBy("`updated_unix` DESC").Limit(limit, start).
Find(&res)
if err != nil {
return 0, nil, err
}
for _, r := range res {
ids = append(ids, r.ID)
// IssueIDs returns a list of issue ids by given conditions.
func IssueIDs(ctx context.Context, opts *IssuesOptions, otherConds ...builder.Cond) ([]int64, int64, error) {
sess := db.GetEngine(ctx).
Join("INNER", "repository", "`issue`.repo_id = `repository`.id")
applyConditions(sess, opts)
for _, cond := range otherConds {
sess.And(cond)
}

total, err := db.GetEngine(ctx).Distinct("id").Table("issue").Where(cond).Count()
applyLimit(sess, opts)
applySorts(sess, opts.SortType, opts.PriorityRepoID)

var res []int64
total, err := sess.Select("`issue`.id").Table(&Issue{}).FindAndCount(&res)
if err != nil {
return 0, nil, err
return nil, 0, err
}

return total, ids, nil
return res, total, nil
}
47 changes: 18 additions & 29 deletions models/issues/issue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func TestIssueAPIURL(t *testing.T) {
func TestGetIssuesByIDs(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
testSuccess := func(expectedIssueIDs, nonExistentIssueIDs []int64) {
issues, err := issues_model.GetIssuesByIDs(db.DefaultContext, append(expectedIssueIDs, nonExistentIssueIDs...))
issues, err := issues_model.GetIssuesByIDs(db.DefaultContext, append(expectedIssueIDs, nonExistentIssueIDs...), true)
assert.NoError(t, err)
actualIssueIDs := make([]int64, len(issues))
for i, issue := range issues {
Expand All @@ -83,6 +83,7 @@ func TestGetIssuesByIDs(t *testing.T) {
}
testSuccess([]int64{1, 2, 3}, []int64{})
testSuccess([]int64{1, 2, 3}, []int64{unittest.NonexistentID})
testSuccess([]int64{3, 2, 1}, []int64{})
}

func TestGetParticipantIDsByIssue(t *testing.T) {
Expand Down Expand Up @@ -165,7 +166,7 @@ func TestIssues(t *testing.T) {
issues_model.IssuesOptions{
RepoCond: builder.In("repo_id", 1, 3),
SortType: "oldest",
ListOptions: db.ListOptions{
Paginator: &db.ListOptions{
Page: 1,
PageSize: 4,
},
Expand All @@ -175,7 +176,7 @@ func TestIssues(t *testing.T) {
{
issues_model.IssuesOptions{
LabelIDs: []int64{1},
ListOptions: db.ListOptions{
Paginator: &db.ListOptions{
Page: 1,
PageSize: 4,
},
Expand All @@ -185,7 +186,7 @@ func TestIssues(t *testing.T) {
{
issues_model.IssuesOptions{
LabelIDs: []int64{1, 2},
ListOptions: db.ListOptions{
Paginator: &db.ListOptions{
Page: 1,
PageSize: 4,
},
Expand Down Expand Up @@ -333,30 +334,6 @@ func TestIssue_loadTotalTimes(t *testing.T) {
assert.Equal(t, int64(3682), ms.TotalTrackedTime)
}

func TestIssue_SearchIssueIDsByKeyword(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
total, ids, err := issues_model.SearchIssueIDsByKeyword(context.TODO(), "issue2", []int64{1}, 10, 0)
assert.NoError(t, err)
assert.EqualValues(t, 1, total)
assert.EqualValues(t, []int64{2}, ids)

total, ids, err = issues_model.SearchIssueIDsByKeyword(context.TODO(), "first", []int64{1}, 10, 0)
assert.NoError(t, err)
assert.EqualValues(t, 1, total)
assert.EqualValues(t, []int64{1}, ids)

total, ids, err = issues_model.SearchIssueIDsByKeyword(context.TODO(), "for", []int64{1}, 10, 0)
assert.NoError(t, err)
assert.EqualValues(t, 5, total)
assert.ElementsMatch(t, []int64{1, 2, 3, 5, 11}, ids)

// issue1's comment id 2
total, ids, err = issues_model.SearchIssueIDsByKeyword(context.TODO(), "good", []int64{1}, 10, 0)
assert.NoError(t, err)
assert.EqualValues(t, 1, total)
assert.EqualValues(t, []int64{1}, ids)
}

func TestGetRepoIDsForIssuesOptions(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
Expand Down Expand Up @@ -496,7 +473,19 @@ func TestCorrectIssueStats(t *testing.T) {
wg.Wait()

// Now we will get all issueID's that match the "Bugs are nasty" query.
total, ids, err := issues_model.SearchIssueIDsByKeyword(context.TODO(), "Bugs are nasty", []int64{1}, issueAmount, 0)
issues, err := issues_model.Issues(context.TODO(), &issues_model.IssuesOptions{
Paginator: &db.ListOptions{
PageSize: issueAmount,
},
RepoIDs: []int64{1},
})
total := int64(len(issues))
var ids []int64
for _, issue := range issues {
if issue.Content == "Bugs are nasty" {
ids = append(ids, issue.ID)
}
}

// Just to be sure.
assert.NoError(t, err)
Expand Down
10 changes: 10 additions & 0 deletions models/issues/issue_user.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,13 @@ func UpdateIssueUsersByMentions(ctx context.Context, issueID int64, uids []int64
}
return nil
}

// GetIssueMentionIDs returns all mentioned user IDs of an issue.
func GetIssueMentionIDs(ctx context.Context, issueID int64) ([]int64, error) {
var ids []int64
return ids, db.GetEngine(ctx).Table(IssueUser{}).
Where("issue_id=?", issueID).
And("is_mentioned=?", true).
Select("uid").
Find(&ids)
}
16 changes: 14 additions & 2 deletions models/issues/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,12 @@ func GetLabelByID(ctx context.Context, labelID int64) (*Label, error) {
}

// GetLabelsByIDs returns a list of labels by IDs
func GetLabelsByIDs(labelIDs []int64) ([]*Label, error) {
func GetLabelsByIDs(labelIDs []int64, cols ...string) ([]*Label, error) {
labels := make([]*Label, 0, len(labelIDs))
return labels, db.GetEngine(db.DefaultContext).Table("label").
In("id", labelIDs).
Asc("name").
Cols("id", "repo_id", "org_id").
Cols(cols...).
Find(&labels)
}

Expand Down Expand Up @@ -476,6 +476,18 @@ func GetLabelsByOrgID(ctx context.Context, orgID int64, sortType string, listOpt
return labels, sess.Find(&labels)
}

// GetLabelIDsByNames returns a list of labelIDs by names.
// It doesn't filter them by repo or org, so it could return labels belonging to different repos/orgs.
// It's used for filtering issues via indexer, otherwise it would be useless.
// Since it could return labels with the same name, so the length of returned ids could be more than the length of names.
func GetLabelIDsByNames(ctx context.Context, labelNames []string) ([]int64, error) {
labelIDs := make([]int64, 0, len(labelNames))
return labelIDs, db.GetEngine(ctx).Table("label").
In("name", labelNames).
Cols("id").
Find(&labelIDs)
}

// CountLabelsByOrgID count all labels that belong to given organization by ID.
func CountLabelsByOrgID(orgID int64) (int64, error) {
return db.GetEngine(db.DefaultContext).Where("org_id = ?", orgID).Count(&Label{})
Expand Down
12 changes: 12 additions & 0 deletions models/issues/milestone.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,18 @@ func GetMilestones(opts GetMilestonesOption) (MilestoneList, int64, error) {
return miles, total, err
}

// GetMilestoneIDsByNames returns a list of milestone ids by given names.
// It doesn't filter them by repo, so it could return milestones belonging to different repos.
// It's used for filtering issues via indexer, otherwise it would be useless.
// Since it could return milestones with the same name, so the length of returned ids could be more than the length of names.
func GetMilestoneIDsByNames(ctx context.Context, names []string) ([]int64, error) {
var ids []int64
return ids, db.GetEngine(ctx).Table("milestone").
Where(db.BuildCaseInsensitiveIn("name", names)).
Cols("id").
Find(&ids)
}

// SearchMilestones search milestones
func SearchMilestones(repoCond builder.Cond, page int, isClosed bool, sortType, keyword string) (MilestoneList, error) {
miles := make([]*Milestone, 0, setting.UI.IssuePagingNum)
Expand Down
13 changes: 2 additions & 11 deletions modules/indexer/code/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,6 @@ const (
maxBatchSize = 16
)

// numericEqualityQuery a numeric equality query for the given value and field
func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
f := float64(value)
tru := true
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
q.SetField(field)
return q
}

func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
"type": unicodenorm.Name,
Expand Down Expand Up @@ -225,7 +216,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st

// Delete deletes indexes by ids
func (b *Indexer) Delete(_ context.Context, repoID int64) error {
query := numericEqualityQuery(repoID, "RepoID")
query := inner_bleve.NumericEqualityQuery(repoID, "RepoID")
searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false)
result, err := b.inner.Indexer.Search(searchRequest)
if err != nil {
Expand Down Expand Up @@ -262,7 +253,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
if len(repoIDs) > 0 {
repoQueries := make([]query.Query, 0, len(repoIDs))
for _, repoID := range repoIDs {
repoQueries = append(repoQueries, numericEqualityQuery(repoID, "RepoID"))
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
}

indexerQuery = bleve.NewConjunctionQuery(
Expand Down
Loading

0 comments on commit 1e76a82

Please sign in to comment.