atom · jeancroy · Jul 9, 2015 · Jul 9, 2015 · Jul 9, 2015 · Jul 9, 2015
diff --git a/spec/filter-spec.coffee b/spec/filter-spec.coffee
@@ -16,12 +16,12 @@ rootPath = (segments...) ->
 describe "filtering", ->
   it "returns an array of the most accurate results", ->
     candidates = ['Gruntfile','filter', 'bile', null, '', undefined]
-    expect(filter(candidates, 'file')).toEqual ['filter', 'Gruntfile']
+    expect(filter(candidates, 'file')).toEqual ['Gruntfile', 'filter']
 
   describe "when the maxResults option is set", ->
     it "limits the results to the result size", ->
       candidates = ['Gruntfile', 'filter', 'bile']
-      expect(bestMatch(candidates, 'file')).toBe 'filter'
+      expect(bestMatch(candidates, 'file')).toBe 'Gruntfile'
 
   describe "when the entries contains slashes", ->
     it "weighs basename matches higher", ->
@@ -111,6 +111,31 @@ describe "filtering", ->
     expect(bestMatch(['a_b_c', 'a_b'], 'ab')).toBe 'a_b'
     expect(bestMatch(['z_a_b', 'a_b'], 'ab')).toBe 'a_b'
     expect(bestMatch(['a_b_c', 'c_a_b'], 'ab')).toBe 'a_b_c'
+    expect(bestMatch(['Unin-stall', path.join('dir1', 'dir2', 'dir3', 'Installation')], 'install')).toBe path.join('dir1', 'dir2', 'dir3', 'Installation')
+    expect(bestMatch(['Uninstall', path.join('dir', 'Install')], 'install')).toBe path.join('dir', 'Install')
+
+  it "weighs substring higher than individual characters", ->
+    candidates = [
+      'Git Plus: Stage Hunk',
+      'Git Plus: Reset Head',
+      'Git Plus: Push',
+      'Git Plus: Show'
+    ]
+    expect(bestMatch(candidates, 'push')).toBe 'Git Plus: Push'
+    expect(bestMatch(['a_b_c', 'somethingabc'], 'abc')).toBe 'somethingabc'
+
+  it "returns the result in order", ->
+    candidates = [
+      'Find And Replace: Selet All',
+      'Settings View: Uninstall Packages',
+      'Application: Install Update',
+      'install'
+    ]
+    result = filter(candidates, 'install')
+    expect(result[0]).toBe candidates[3]
+    expect(result[1]).toBe candidates[2]
+    expect(result[2]).toBe candidates[1]
+    expect(result[3]).toBe candidates[0]
 
   describe "when the entries are of differing directory depths", ->
     it "places exact matches first, even if they're deeper", ->
@@ -136,4 +161,4 @@ describe "filtering", ->
         path.join('app', 'models', 'cars', 'car.rb')
         path.join('spec', 'cars.rb')
       ]
-      expect(bestMatch(candidates, 'car.rb')).toBe candidates[0]
+      expect(bestMatch(candidates, 'car.rb')).toBe candidates[0]
diff --git a/src/scorer.coffee b/src/scorer.coffee
@@ -1,22 +1,193 @@
-# Original ported from:
 #
-# string_score.js: String Scoring Algorithm 0.1.10
+# Score similarity between two string
 #
-# http://joshaven.com/string_score
-# https://github.com/joshaven/string_score
+#  isMatch: Fast detection if all character of needle is in haystack
+#  score: Find string similarity using a Smith Waterman Gotoh algorithm
+#         Modified to account for programing scenarios (CamelCase folder/file.ext object.property)
 #
-# Copyright (C) 2009-2011 Joshaven Potter <yourtech@gmail.com>
-# Special thanks to all of the contributors listed here https://github.com/joshaven/string_score
-# MIT license: http://www.opensource.org/licenses/mit-license.php
+# Copyright (C) 2015 Jean Christophe Roy and contributors
+# MIT License: http://opensource.org/licenses/MIT
 #
-# Date: Tue Mar 1 2011
+# Previous version of scorer used string_score from Joshaven Potter
+# https://github.com/joshaven/string_score/
 
+
+wm = 10 # base score of making a match
+ws = 30 # bonus of making a separator match
+wa = 20 # bonus of making an acronym match
+wc = 10 # bonus for proper case
+
+wo = -8 # penalty to open a gap
+we = -2 # penalty to continue an open gap (inside a match)
+wh = -0.1 # penalty for haystack size (outside match)
+
+wst = 20 # bonus for match near start of string  (fade one per position until 0)
+wex = 10 # bonus per character of an exact match. If exact coincide with prefix, bonus will be 2*wex, then it'll fade to 1*wex as string happens later.
+
+#Note: separator are likely to trigger both a
+# "acronym" and "proper case" bonus in addition of their own bonus.
+
+
+separators = ' .-_/\\'
 PathSeparator = require('path').sep
 
+separator_map = ->
+  sep_map = {}
+  k = -1
+  while ++k < separators.length
+    sep_map[separators[k]] = k
+
+  sep_map
+
+sep_map = separator_map()
+
+exports.score = score = (subject, query, ignore) ->
+
+  #bypass isMatch will allow inexact match, but will be slower
+  return 0 if !( subject and query and isMatch(query, subject) )
+
+  m = query.length + 1
+  n = subject.length + 1
+
+  #Init
+  vrow = new Array(n)
+  gapArow = new Array(n)
+  gapA = 0
+  gapB = 0
+  vmax = 0
+
+  #DEBUG
+  #VV = []
+
+  #Fill with 0
+  j = -1
+  while ++j < n
+    gapArow[j] = 0
+    vrow[j] = 0
+
+  i = 0 #1..m-1
+  while ++i < m
+    #foreach char of query
+    gapB = 0
+    vd = vrow[0]
+
+    #DEBUG
+    #VV[i] = []
+
+    j = 0 #1..n-1
+    while ++j < n
+      #foreach char of subject
+
+      # Score the options
+      gapA = gapArow[j] = Math.max(gapArow[j] + we, vrow[j] + wo)
+      gapB = Math.max(gapB + we, vrow[j - 1] + wo)
+      align = vd + char_score(query, subject, i - 1, j - 1)
+      vd = vrow[j]
+
+      #Get the best option
+      v = vrow[j] = Math.max(align, gapA, gapB, 0)
+
+      #DEBUG
+      #VV[i][j] = v
+
+      #Record best score
+      if v > vmax
+        vmax = v
+
+  #DEBUG
+  #console.log(query,subject)
+  #console.table(VV);
+
+
+  #haystack penalty
+  vmax = Math.max(vmax / 2, vmax + wh * (n - m))
+
+  #sustring bonus, start of string bonus
+  vmax += if (p = subject.toLowerCase().indexOf(query.toLowerCase())) > -1 then wex * m * (1.0 + 1.0 / (1.0 + p)) else 0
+
+  return vmax
+
+char_score = (query, subject, i, j) ->
+  qi = query[i]
+  sj = subject[j]
+
+  if qi.toLowerCase() == sj.toLowerCase()
+
+    #Proper casing bonus
+    bonus = if qi == sj then wc else 0
+
+    #start of string bonus
+    bonus += Math.max(wst - j, 0)
+
+    #match IS a separator
+    if qi of sep_map
+      return ws + bonus
+
+    #match is first char ( place a virtual token separator before first char of string)
+    return wa + bonus if ( j == 0 or i == 0)
+
+    #get previous char
+    prev_s = subject[j - 1]
+    prev_q = query[i - 1]
+
+    #match FOLLOW a separator
+    return wa + bonus if ( prev_s of sep_map) or ( prev_q of sep_map )
+
+    #match IS Capital in camelCase (preceded by lowercase)
+    return wa + bonus if (sj == sj.toUpperCase() and prev_s == prev_s.toLowerCase())
+
+    #normal Match, add proper case bonus
+    return wm + bonus
+
+  #No match, best move will be to take a gap in either query or subject.
+  return -Infinity
+
+
+isMatch = (query, subject) ->
+  m = query.length
+  n = subject.length
+
+  if !m or !n or m > n
+    return false
+
+  lq = query.toLowerCase()
+  ls = subject.toLowerCase()
+
+  i = -1
+  j = -1
+  k = n - 1
+
+  while ++i < m
+
+    qi = lq[i]
+
+    while ++j < n
+
+      if ls[j] == qi
+        break
+
+      else if j == k
+        return false
+
+
+  true
+
 exports.basenameScore = (string, query, score) ->
-  index = string.length - 1
-  index-- while string[index] is PathSeparator # Skip trailing slashes
+
+  return 0 if score == 0
+  end = string.length - 1
+  end-- while string[end] is PathSeparator # Skip trailing slashes
+
+  basePos = string.lastIndexOf(PathSeparator, end)
+  baseScore = if (basePos == -1) then score else Math.max(score, exports.score(string.substring(basePos + 1, end+1), query))
+  score = 0.15*score + 0.85*baseScore
+
+  score
+
+ ###
+
   slashCount = 0
+  baseScore = 0
   lastCharacter = index
   base = null
   while index >= 0
@@ -30,60 +201,12 @@ exports.basenameScore = (string, query, score) ->
         base ?= string
     index--
 
-  # Basename matches count for more.
-  if base is string
-    score *= 2
-  else if base
-    score += exports.score(base, query)
-
-  # Shallow files are scored higher
-  segmentCount = slashCount + 1
-  depth = Math.max(1, 10 - segmentCount)
-  score *= depth * 0.01
-  score
-
-exports.score = (string, query) ->
-  return 1 if string is query
-
-  # Return a perfect score if the file name itself matches the query.
-  return 1 if queryIsLastPathSegment(string, query)
-
-  totalCharacterScore = 0
-  queryLength = query.length
-  stringLength = string.length
-
-  indexInQuery = 0
-  indexInString = 0
-
-  while indexInQuery < queryLength
-    character = query[indexInQuery++]
-    lowerCaseIndex = string.indexOf(character.toLowerCase())
-    upperCaseIndex = string.indexOf(character.toUpperCase())
-    minIndex = Math.min(lowerCaseIndex, upperCaseIndex)
-    minIndex = Math.max(lowerCaseIndex, upperCaseIndex) if minIndex is -1
-    indexInString = minIndex
-    return 0 if indexInString is -1
-
-    characterScore = 0.1
+   # Shallow files are scored higher
+   score += baseScore*( 3.0 + 3.0/(3.0+slashCount) )
 
-    # Same case bonus.
-    characterScore += 0.1 if string[indexInString] is character
+ ###
 
-    if indexInString is 0 or string[indexInString - 1] is PathSeparator
-      # Start of string bonus
-      characterScore += 0.8
-    else if string[indexInString - 1] in ['-', '_', ' ']
-      # Start of word bonus
-      characterScore += 0.7
 
-    # Trim string to after current abbreviation match
-    string = string.substring(indexInString + 1, stringLength)
 
-    totalCharacterScore += characterScore
 
-  queryScore = totalCharacterScore / queryLength
-  ((queryScore * (queryLength / stringLength)) + queryScore) / 2
 
-queryIsLastPathSegment = (string, query) ->
-  if string[string.length - query.length - 1] is PathSeparator
-    string.lastIndexOf(query) is string.length - query.length