nim-lang · Araq · Aug 31, 2019 · Aug 23, 2019 · Aug 28, 2019 · Aug 28, 2019
diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim
@@ -97,7 +97,7 @@ proc openArrayLoc(p: BProc, n: PNode): Rope =
         result = "($1)+($2), ($3)-($2)+1" % [rdLoc(a), rdLoc(b), rdLoc(c)]
       else:
         result = "($1)+(($2)-($4)), ($3)-($2)+1" % [rdLoc(a), rdLoc(b), rdLoc(c), intLiteral(first)]
-    of tyOpenArray, tyVarargs, tyUncheckedArray:
+    of tyOpenArray, tyVarargs, tyUncheckedArray, tyCString:
       result = "($1)+($2), ($3)-($2)+1" % [rdLoc(a), rdLoc(b), rdLoc(c)]
     of tyString, tySequence:
       if skipTypes(n.typ, abstractInst).kind == tyVar and

diff --git a/compiler/condsyms.nim b/compiler/condsyms.nim
@@ -97,4 +97,5 @@ proc initDefines*(symbols: StringTableRef) =
 
   defineSymbol("nimFixedOwned")
   defineSymbol("nimHasStyleChecks")
+  defineSymbol("nimToOpenArrayCString")
   defineSymbol("nimHasUsed")
diff --git a/compiler/vmops.nim b/compiler/vmops.nim
@@ -17,6 +17,8 @@ from os import getEnv, existsEnv, dirExists, fileExists, putEnv, walkDir, getApp
 from md5 import getMD5
 from sighashes import symBodyDigest
 
+from hashes import hash
+
 template mathop(op) {.dirty.} =
   registerCallback(c, "stdlib.math." & astToStr(op), `op Wrapper`)
 
@@ -88,6 +90,16 @@ proc staticWalkDirImpl(path: string, relative: bool): PNode =
     result.add newTree(nkTupleConstr, newIntNode(nkIntLit, k.ord),
                               newStrNode(nkStrLit, f))
 
+proc hashVmImplByte(a: VmArgs) {.nimcall.} =
+  # nkBracket[...]
+  let sPos = a.getInt(1).int
+  let ePos = a.getInt(2).int
+  let arr = a.getNode(0)
+  var bytes = newSeq[byte](arr.len)
+  for i in 0 ..< arr.len:
+    bytes[i] = byte(arr[i].intVal and 0xff)
+  setResult(a, hashes.hash(bytes, sPos, ePos))
+
 proc registerAdditionalOps*(c: PCtx) =
   proc gorgeExWrapper(a: VmArgs) =
     let (s, e) = opGorge(getString(a, 0), getString(a, 1), getString(a, 2),
@@ -157,3 +169,9 @@ proc registerAdditionalOps*(c: PCtx) =
       stackTrace(c, PStackFrame(prc: c.prc.sym, comesFrom: 0, next: nil), c.exceptionInstr,
                   "isExported() requires a symbol. '" & $n & "' is of kind '" & $n.kind & "'", n.info)
     setResult(a, sfExported in n.sym.flags)
+
+  registerCallback c, "stdlib.hashes.hashVmImpl", proc(a: VmArgs) {.nimcall.} =
+    setResult(a, hashes.hash(a.getString(0), a.getInt(1).int, a.getInt(2).int))
+
+  registerCallback c, "stdlib.hashes.hashVmImplByte", hashVmImplByte
+  registerCallback c, "stdlib.hashes.hashVmImplChar", hashVmImplByte
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
@@ -49,9 +49,6 @@ type
                ## always have a size of a power of two and can use the ``and``
                ## operator instead of ``mod`` for truncation of the hash value.
 
-const
-  IntSize = sizeof(int)
-
 proc `!&`*(h: Hash, val: int): Hash {.inline.} =
   ## Mixes a hash value `h` with `val` to produce a new hash value.
   ##
@@ -108,13 +105,12 @@ proc hash*(x: pointer): Hash {.inline.} =
   else:
     result = cast[Hash](cast[uint](x) shr 3) # skip the alignment
 
-when not defined(booting):
-  proc hash*[T: proc](x: T): Hash {.inline.} =
-    ## Efficient hashing of proc vars. Closures are supported too.
-    when T is "closure":
-      result = hash(rawProc(x)) !& hash(rawEnv(x))
-    else:
-      result = hash(pointer(x))
+proc hash*[T: proc](x: T): Hash {.inline.} =
+  ## Efficient hashing of proc vars. Closures are supported too.
+  when T is "closure":
+    result = hash(rawProc(x)) !& hash(rawEnv(x))
+  else:
+    result = hash(pointer(x))
 
 proc hash*(x: int): Hash {.inline.} =
   ## Efficient hashing of integers.
@@ -151,27 +147,87 @@ proc hash*(x: float): Hash {.inline.} =
 proc hash*[A](x: openArray[A]): Hash
 proc hash*[A](x: set[A]): Hash
 
-template bytewiseHashing(result: Hash, x: typed, start, stop: int) =
-  for i in start .. stop:
-    result = result !& hash(x[i])
-  result = !$result
 
-template hashImpl(result: Hash, x: typed, start, stop: int) =
+when defined(JS):
+  proc imul(a, b: uint32): uint32 =
+    # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul
+    let mask = 0xffff'u32
+    var
+      aHi = (a shr 16) and mask
+      aLo = a and mask
+      bHi = (b shr 16) and mask
+      bLo = b and mask
+    result = (aLo * bLo) + (aHi * bLo + aLo * bHi) shl 16
+else:
+  template imul(a, b: uint32): untyped = a * b
+
+proc rotl32(x: uint32, r: int): uint32 {.inline.} =
+  (x shl r) or (x shr (32 - r))
+
+proc murmurHash(x: openArray[byte]): Hash =
+  # https://github.com/PeterScott/murmur3/blob/master/murmur3.c
+  const
+    c1 = 0xcc9e2d51'u32
+    c2 = 0x1b873593'u32
+    n1 = 0xe6546b64'u32
+    m1 = 0x85ebca6b'u32
+    m2 = 0xc2b2ae35'u32
   let
-    elementSize = sizeof(x[start])
-    stepSize = IntSize div elementSize
-  var i = start
-  while i <= stop+1 - stepSize:
-    var n = 0
+    size = len(x)
+    stepSize = 4 # 32-bit
+    n = size div stepSize
+  var
+    h1: uint32
+    i = 0
+
+  # body
+  while i < n * stepSize:
+    var k1: uint32
     when nimvm:
-      # we cannot cast in VM, so we do it manually
-      for j in countdown(stepSize-1, 0):
-        n = (n shl (8*elementSize)) or ord(x[i+j])
+      var j = stepSize
+      while j > 0:
+        dec j
+        k1 = (k1 shl 8) or (ord(x[i+j])).uint32
     else:
-      n = cast[ptr Hash](unsafeAddr x[i])[]
-    result = result !& n
-    i += stepSize
-  bytewiseHashing(result, x, i, stop) # hash the remaining elements and finish
+      k1 = cast[ptr uint32](unsafeAddr x[i])[]
+    inc i, stepSize
+
+    k1 = imul(k1, c1)
+    k1 = rotl32(k1, 15)
+    k1 = imul(k1, c2)
+
+    h1 = h1 xor k1
+    h1 = rotl32(h1, 13)
+    h1 = h1*5 + n1
+
+  # tail
+  var k1: uint32
+  var rem = size mod stepSize
+  while rem > 0:
+    dec rem
+    k1 = (k1 shl 8) or (ord(x[i+rem])).uint32
+  k1 = imul(k1, c1)
+  k1 = rotl32(k1, 15)
+  k1 = imul(k1, c2)
+  h1 = h1 xor k1
+
+  # finalization
+  h1 = h1 xor size.uint32
+  h1 = h1 xor (h1 shr 16)
+  h1 = imul(h1, m1)
+  h1 = h1 xor (h1 shr 13)
+  h1 = imul(h1, m2)
+  h1 = h1 xor (h1 shr 16)
+  return cast[Hash](h1)
+
+proc hashVmImpl(x: string, sPos, ePos: int): Hash =
+  discard "look at compiler/vmops.nim"
+
+proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash =
+  discard "look at compiler/vmops.nim"
+
+proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash =
+  discard "look at compiler/vmops.nim"
 
 proc hash*(x: string): Hash =
   ## Efficient hashing of strings.
@@ -182,7 +238,10 @@ proc hash*(x: string): Hash =
   runnableExamples:
     doAssert hash("abracadabra") != hash("AbracadabrA")
 
-  hashImpl(result, x, 0, high(x))
+  when nimvm:
+    result = hashVmImpl(x, 0, high(x))
+  else:
+    result = murmurHash(toOpenArrayByte(x, 0, high(x)))
 
 proc hash*(x: cstring): Hash =
   ## Efficient hashing of null-terminated strings.
@@ -191,7 +250,11 @@ proc hash*(x: cstring): Hash =
     doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA")
     doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")
 
-  hashImpl(result, x, 0, high(x))
+  when not defined(JS) and defined(nimToOpenArrayCString):
+    murmurHash(toOpenArrayByte(x, 0, x.high))
+  else:
+    let xx = $x
+    murmurHash(toOpenArrayByte(xx, 0, high(xx)))
 
 proc hash*(sBuf: string, sPos, ePos: int): Hash =
   ## Efficient hashing of a string buffer, from starting
@@ -202,7 +265,8 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash =
     var a = "abracadabra"
     doAssert hash(a, 0, 3) == hash(a, 7, 10)
 
-  hashImpl(result, sBuf, sPos, ePos)
+  murmurHash(toOpenArrayByte(sBuf, sPos, ePos))
+
 
 proc hashIgnoreStyle*(x: string): Hash =
   ## Efficient hashing of strings; style is ignored.
@@ -300,12 +364,20 @@ proc hash*[T: tuple](x: T): Hash =
     result = result !& hash(f)
   result = !$result
 
+
 proc hash*[A](x: openArray[A]): Hash =
   ## Efficient hashing of arrays and sequences.
-  when A is char|SomeInteger:
-    hashImpl(result, x, 0, x.high)
+  when A is byte:
+    result = murmurHash(x)
+  elif A is char:
+    when nimvm:
+      result = hashVmImplChar(x, 0, x.high)
+    else:
+      result = murmurHash(toOpenArrayByte(x, 0, x.high))
   else:
-    bytewiseHashing(result, x, 0, x.high)
+    for a in x:
+      result = result !& hash(a)
+    result = !$result
 
 proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
   ## Efficient hashing of portions of arrays and sequences, from starting
@@ -316,10 +388,20 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
     let a = [1, 2, 5, 1, 2, 6]
     doAssert hash(a, 0, 1) == hash(a, 3, 4)
 
-  when A is char|SomeInteger:
-    hashImpl(result, aBuf, sPos, ePos)
+  when A is byte:
+    when nimvm:
+      result = hashVmImplByte(aBuf, 0, aBuf.high)
+    else:
+      result = murmurHash(toOpenArray(aBuf, sPos, ePos))
+  elif A is char:
+    when nimvm:
+      result = hashVmImplChar(aBuf, 0, aBuf.high)
+    else:
+      result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
   else:
-    bytewiseHashing(result, aBuf, sPos, ePos)
+    for i in sPos .. ePos:
+      result = result !& hash(aBuf[i])
+    result = !$result
 
 proc hash*[A](x: set[A]): Hash =
   ## Efficient hashing of sets.
@@ -334,26 +416,30 @@ when isMainModule:
       a = ""
       b = newSeq[char]()
       c = newSeq[int]()
+      d = cstring""
+      e = "abcd"
     doAssert hash(a) == 0
     doAssert hash(b) == 0
     doAssert hash(c) == 0
+    doAssert hash(d) == 0
     doAssert hashIgnoreCase(a) == 0
     doAssert hashIgnoreStyle(a) == 0
+    doAssert hash(e, 3, 2) == 0
   block sameButDifferent:
     doAssert hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13)
     doAssert hash("aa bb aaaa1234") == hash(cstring"aa bb aaaa1234")
     doAssert hashIgnoreCase("aA bb aAAa1234") == hashIgnoreCase("aa bb aaaa1234")
     doAssert hashIgnoreStyle("aa_bb_AAaa1234") == hashIgnoreCase("aaBBAAAa1234")
   block smallSize: # no multibyte hashing
     let
-      xx = @['H','e','l','l','o']
-      ii = @[72'i8, 101, 108, 108, 111]
-      ss = "Hello"
+      xx = @['H','i']
+      ii = @[72'u8, 105]
+      ss = "Hi"
     doAssert hash(xx) == hash(ii)
     doAssert hash(xx) == hash(ss)
     doAssert hash(xx) == hash(xx, 0, xx.high)
     doAssert hash(ss) == hash(ss, 0, ss.high)
-  block largeSize: # longer than 8 characters, should trigger multibyte hashing
+  block largeSize: # longer than 4 characters
     let
       xx = @['H','e','l','l','o']
       xxl = @['H','e','l','l','o','w','e','e','n','s']
@@ -362,9 +448,6 @@ when isMainModule:
     doAssert hash(xxl) == hash(xxl, 0, xxl.high)
     doAssert hash(ssl) == hash(ssl, 0, ssl.high)
     doAssert hash(xx) == hash(xxl, 0, 4)
-  block misc:
-    let
-      a = [1'u8, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4]
-      b = [1'i8, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4]
-    doAssert hash(a) == hash(b)
-    doAssert hash(a, 2, 5) == hash(b, 2, 5)
+    doAssert hash(xx) == hash(ssl, 0, 4)
+    doAssert hash(xx, 0, 3) == hash(xxl, 0, 3)
+    doAssert hash(xx, 0, 3) == hash(ssl, 0, 3)
diff --git a/lib/system.nim b/lib/system.nim
@@ -4503,6 +4503,11 @@ when defined(nimconfig):
 when not defined(js):
   proc toOpenArray*[T](x: ptr UncheckedArray[T]; first, last: int): openArray[T] {.
     magic: "Slice".}
+  when defined(nimToOpenArrayCString):
+    proc toOpenArray*(x: cstring; first, last: int): openArray[char] {.
+      magic: "Slice".}
+    proc toOpenArrayByte*(x: cstring; first, last: int): openArray[byte] {.
+      magic: "Slice".}
 
 proc toOpenArray*[T](x: seq[T]; first, last: int): openArray[T] {.
   magic: "Slice".}
@@ -4512,8 +4517,13 @@ proc toOpenArray*[I, T](x: array[I, T]; first, last: I): openArray[T] {.
   magic: "Slice".}
 proc toOpenArray*(x: string; first, last: int): openArray[char] {.
   magic: "Slice".}
+
 proc toOpenArrayByte*(x: string; first, last: int): openArray[byte] {.
   magic: "Slice".}
+proc toOpenArrayByte*(x: openArray[char]; first, last: int): openArray[byte] {.
+  magic: "Slice".}
+proc toOpenArrayByte*(x: seq[char]; first, last: int): openArray[byte] {.
+  magic: "Slice".}
 
 type
   ForLoopStmt* {.compilerproc.} = object ## \

diff --git a/tests/parallel/tsendtwice.nim b/tests/parallel/tsendtwice.nim
@@ -1,11 +1,12 @@
 discard """
-  output: '''ob @[]
+  output: '''ob2 @[]
+ob @[]
 ob3 @[]
-ob2 @[]
 3
+ob2 @[]
 ob @[]
 ob3 @[]
-ob2 @[]'''
+'''
   cmd: "nim c -r --threads:on $file"
 """