@@ -20,8 +20,10 @@ import (
2020 "code.gitea.io/gitea/modules/git"
2121 "code.gitea.io/gitea/modules/log"
2222 "code.gitea.io/gitea/modules/setting"
23+ "code.gitea.io/gitea/modules/timeutil"
2324
2425 "github.com/olivere/elastic/v7"
26+ "github.com/src-d/enry/v2"
2527)
2628
2729var (
@@ -70,22 +72,34 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, bo
7072 client : client ,
7173 indexerName : indexerName ,
7274 }
73- success , err := indexer .init ()
75+ exists , err := indexer .init ()
7476
75- return indexer , success , err
77+ return indexer , exists , err
7678}
7779
7880const (
7981 defaultMapping = `{
8082 "mappings": {
8183 "properties": {
8284 "repo_id": {
83- "type": "integer ",
85+ "type": "long ",
8486 "index": true
8587 },
8688 "content": {
8789 "type": "text",
8890 "index": true
91+ },
92+ "commit_id": {
93+ "type": "keyword",
94+ "index": true
95+ },
96+ "language": {
97+ "type": "keyword",
98+ "index": true
99+ },
100+ "updated_at": {
101+ "type": "long",
102+ "index": true
89103 }
90104 }
91105 }
@@ -99,21 +113,21 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
99113 if err != nil {
100114 return false , err
101115 }
116+ if exists {
117+ return true , nil
118+ }
102119
103- if ! exists {
104- var mapping = defaultMapping
120+ var mapping = defaultMapping
105121
106- createIndex , err := b .client .CreateIndex (b .indexerName ).BodyString (mapping ).Do (ctx )
107- if err != nil {
108- return false , err
109- }
110- if ! createIndex .Acknowledged {
111- return false , errors .New ("init failed" )
112- }
113-
114- return false , nil
122+ createIndex , err := b .client .CreateIndex (b .indexerName ).BodyString (mapping ).Do (ctx )
123+ if err != nil {
124+ return false , err
115125 }
116- return true , nil
126+ if ! createIndex .Acknowledged {
127+ return false , errors .New ("init failed" )
128+ }
129+
130+ return false , nil
117131}
118132
119133func (b * ElasticSearchIndexer ) addUpdate (sha string , update fileUpdate , repo * models.Repository ) ([]elastic.BulkableRequest , error ) {
@@ -148,7 +162,7 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
148162 "content" : string (charset .ToUTF8DropErrors (fileContents )),
149163 "commit_id" : sha ,
150164 "language" : analyze .GetCodeLanguage (update .Filename , fileContents ),
151- "updated_at" : time . Now (). UTC (),
165+ "updated_at" : timeutil . TimeStampNow (),
152166 }),
153167 }, nil
154168}
@@ -203,6 +217,73 @@ func (b *ElasticSearchIndexer) Delete(repoID int64) error {
203217 return err
204218}
205219
220+ func convertResult (searchResult * elastic.SearchResult , kw string , pageSize int ) (int64 , []* SearchResult , []* SearchResultLanguages , error ) {
221+ hits := make ([]* SearchResult , 0 , pageSize )
222+ for _ , hit := range searchResult .Hits .Hits {
223+ // FIXME: There is no way to get the position the keyword on the content currently on the same request.
224+ // So we get it from content, this may made the query slower. See
225+ // https://discuss.elastic.co/t/fetching-position-of-keyword-in-matched-document/94291
226+ var startIndex , endIndex int = - 1 , - 1
227+ c , ok := hit .Highlight ["content" ]
228+ if ok && len (c ) > 0 {
229+ var subStr = make ([]rune , 0 , len (kw ))
230+ startIndex = strings .IndexFunc (c [0 ], func (r rune ) bool {
231+ if len (subStr ) >= len (kw ) {
232+ subStr = subStr [1 :]
233+ }
234+ subStr = append (subStr , r )
235+ return strings .EqualFold (kw , string (subStr ))
236+ })
237+ if startIndex > - 1 {
238+ endIndex = startIndex + len (kw )
239+ } else {
240+ panic (fmt .Sprintf ("1===%#v" , hit .Highlight ))
241+ }
242+ } else {
243+ panic (fmt .Sprintf ("2===%#v" , hit .Highlight ))
244+ }
245+
246+ repoID , fileName := parseIndexerID (hit .Id )
247+ var res = make (map [string ]interface {})
248+ if err := json .Unmarshal (hit .Source , & res ); err != nil {
249+ return 0 , nil , nil , err
250+ }
251+
252+ language := res ["language" ].(string )
253+
254+ hits = append (hits , & SearchResult {
255+ RepoID : repoID ,
256+ Filename : fileName ,
257+ CommitID : res ["commit_id" ].(string ),
258+ Content : res ["content" ].(string ),
259+ UpdatedUnix : timeutil .TimeStamp (res ["updated_at" ].(float64 )),
260+ Language : language ,
261+ StartIndex : startIndex ,
262+ EndIndex : endIndex ,
263+ Color : enry .GetColor (language ),
264+ })
265+ }
266+
267+ return searchResult .TotalHits (), hits , extractAggs (searchResult ), nil
268+ }
269+
270+ func extractAggs (searchResult * elastic.SearchResult ) []* SearchResultLanguages {
271+ var searchResultLanguages []* SearchResultLanguages
272+ agg , found := searchResult .Aggregations .Terms ("language" )
273+ if found {
274+ searchResultLanguages = make ([]* SearchResultLanguages , 0 , 10 )
275+
276+ for _ , bucket := range agg .Buckets {
277+ searchResultLanguages = append (searchResultLanguages , & SearchResultLanguages {
278+ Language : bucket .Key .(string ),
279+ Color : enry .GetColor (bucket .Key .(string )),
280+ Count : int (bucket .DocCount ),
281+ })
282+ }
283+ }
284+ return searchResultLanguages
285+ }
286+
206287// Search searches for codes and language stats by given conditions.
207288func (b * ElasticSearchIndexer ) Search (repoIDs []int64 , language , keyword string , page , pageSize int ) (int64 , []* SearchResult , []* SearchResultLanguages , error ) {
208289 kwQuery := elastic .NewMultiMatchQuery (keyword , "content" )
@@ -216,10 +297,45 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
216297 repoQuery := elastic .NewTermsQuery ("repo_id" , repoStrs ... )
217298 query = query .Must (repoQuery )
218299 }
219- start := 0
300+
301+ var (
302+ start int
303+ kw = "<em>" + keyword + "</em>"
304+ aggregation = elastic .NewTermsAggregation ().Field ("language" ).Size (10 ).OrderByCountDesc ()
305+ )
306+
220307 if page > 0 {
221308 start = (page - 1 ) * pageSize
222309 }
310+
311+ if len (language ) == 0 {
312+ searchResult , err := b .client .Search ().
313+ Index (b .indexerName ).
314+ Aggregation ("language" , aggregation ).
315+ Query (query ).
316+ Highlight (elastic .NewHighlight ().Field ("content" )).
317+ Sort ("repo_id" , true ).
318+ From (start ).Size (pageSize ).
319+ Do (context .Background ())
320+ if err != nil {
321+ return 0 , nil , nil , err
322+ }
323+
324+ return convertResult (searchResult , kw , pageSize )
325+ }
326+
327+ langQuery := elastic .NewMatchQuery ("language" , language )
328+ countResult , err := b .client .Search ().
329+ Index (b .indexerName ).
330+ Aggregation ("language" , aggregation ).
331+ Query (query ).
332+ Size (0 ). // We only needs stats information
333+ Do (context .Background ())
334+ if err != nil {
335+ return 0 , nil , nil , err
336+ }
337+
338+ query = query .Must (langQuery )
223339 searchResult , err := b .client .Search ().
224340 Index (b .indexerName ).
225341 Query (query ).
@@ -231,35 +347,9 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
231347 return 0 , nil , nil , err
232348 }
233349
234- var kw = "<em>" + keyword + "</em>"
235-
236- hits := make ([]* SearchResult , 0 , pageSize )
237- for _ , hit := range searchResult .Hits .Hits {
238- var startIndex , endIndex int = - 1 , - 1
239- c , ok := hit .Highlight ["content" ]
240- if ok && len (c ) > 0 {
241- startIndex = strings .Index (c [0 ], kw )
242- if startIndex > - 1 {
243- endIndex = startIndex + len (kw )
244- }
245- }
246-
247- repoID , fileName := parseIndexerID (hit .Id )
248- var h = SearchResult {
249- RepoID : repoID ,
250- StartIndex : startIndex ,
251- EndIndex : endIndex ,
252- Filename : fileName ,
253- }
254-
255- if err := json .Unmarshal (hit .Source , & h ); err != nil {
256- return 0 , nil , nil , err
257- }
258-
259- hits = append (hits , & h )
260- }
350+ total , hits , _ , err := convertResult (searchResult , kw , pageSize )
261351
262- return searchResult . TotalHits () , hits , nil , nil
352+ return total , hits , extractAggs ( countResult ), err
263353}
264354
265355// Close implements indexer
0 commit comments