Bader-Research
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎arkouda.py‎
Lines changed: 87 additions & 4 deletions b/‎arkouda.py‎
Lines changed: 87 additions & 4 deletions
diff --git a/‎benchmarks/bench_mac_llvm.log‎
Lines changed: 108 additions & 0 deletions b/‎benchmarks/bench_mac_llvm.log‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎benchmarks/run_all.sh‎
Lines changed: 15 additions & 0 deletions b/‎benchmarks/run_all.sh‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎src/ConcatenateMsg.chpl‎
Lines changed: 104 additions & 0 deletions b/‎src/ConcatenateMsg.chpl‎
Lines changed: 104 additions & 0 deletions
@@ -1,5 +1,8 @@
 arkouda_server
 arkouda_server_real
+arkouda_server_llvm
+arkouda_server_llvm_real
+*_real
 #*#
 .#*
 *.~*
 
@@ -339,7 +339,11 @@ def __neg__(self):
 
     # overload unary~ for pdarray implemented as pdarray^(~0)
     def __invert__(self):
-        return self.binop(~0, "^")
+        if self.dtype == np.int64:
+            return self.binop(~0, "^")
+        if self.dtype == np.bool:
+            return self.binop(True, "^")
+        return NotImplemented
 
     # op= operators
     def opeq(self, other, op):
@@ -674,12 +678,12 @@ def histogram(pda, bins=10):
     else:
         raise TypeError("must be pdarray {} and bins must be an int {}".format(pda,bins))
 
-def in1d(pda1, pda2):
+def in1d(pda1, pda2, invert=False):
     if isinstance(pda1, pdarray) and isinstance(pda2, pdarray):
-        repMsg = generic_msg("in1d {} {}".format(pda1.name, pda2.name))
+        repMsg = generic_msg("in1d {} {} {}".format(pda1.name, pda2.name, invert))
         return create_pdarray(repMsg)
     else:
-        raise TypeError("must be pdarray {} and bins must be an int {}".format(pda,bins))
+        raise TypeError("must be pdarray {} or {}".format(pda1,pda2))
 
 def unique(pda, return_counts=False):
     if isinstance(pda, pdarray):
@@ -737,6 +741,85 @@ def coargsort(arrays):
     repMsg = generic_msg("coargsort {} {}".format(len(arrays), ' '.join([a.name for a in arrays])))
     return create_pdarray(repMsg)
 
+def concatenate(arrays):
+    size = 0
+    dtype = None
+    for a in arrays:
+        if not isinstance(a, pdarray):
+            raise ValueError("Argument must be an iterable of pdarrays")
+        if dtype == None:
+            dtype = a.dtype
+        elif dtype != a.dtype:
+            raise ValueError("All pdarrays must have same dtype")
+        size += a.size
+    if size == 0:
+        return zeros(0, dtype=int64)
+    repMsg = generic_msg("concatenate {} {}".format(len(arrays), ' '.join([a.name for a in arrays])))
+    return create_pdarray(repMsg)
+
+# (A1 | A2) Set Union: elements are in one or the other or both
+def union1d(pda1, pda2):
+    if isinstance(pda1, pdarray) and isinstance(pda2, pdarray):
+        if pda1.size == 0:
+            return pda2 # union is pda2
+        if pda2.size == 0:
+            return pda1 # union is pda1
+        return unique(concatenate((unique(pda1), unique(pda2))))
+    else:
+        raise TypeError("must be pdarray {} or {}".format(pda1,pda2))
+
+# (A1 & A2) Set Intersection: elements have to be in both arrays
+def intersect1d(pda1, pda2, assume_unique=False):
+    if isinstance(pda1, pdarray) and isinstance(pda2, pdarray):
+        if pda1.size == 0:
+            return pda1 # nothing in the intersection
+        if pda2.size == 0:
+            return pda2 # nothing in the intersection
+        if not assume_unique:
+            pda1 = unique(pda1)
+            pda2 = unique(pda2)
+        aux = concatenate((pda1, pda2))
+        aux_sort_indices = argsort(aux)
+        aux = aux[aux_sort_indices]
+        mask = aux[1:] == aux[:-1]
+        int1d = aux[:-1][mask]
+        return int1d
+    else:
+        raise TypeError("must be pdarray {} or {}".format(pda1,pda2))
+
+# (A1 - A2) Set Difference: elements have to be in first array but not second
+def setdiff1d(pda1, pda2, assume_unique=False):
+    if isinstance(pda1, pdarray) and isinstance(pda2, pdarray):
+        if pda1.size == 0:
+            return pda1 # return a zero length pdarray
+        if pda2.size == 0:
+            return pda1 # subtracting nothing return orig pdarray
+        if not assume_unique:
+            pda1 = unique(pda1)
+            pda2 = unique(pda2)
+        return pda1[in1d(pda1, pda2, invert=True)]
+    else:
+        raise TypeError("must be pdarray {} or {}".format(pda1,pda2))
+
+# (A1 ^ A2) Set Symmetric Difference: elements are not in the intersection
+def setxor1d(pda1, pda2, assume_unique=False):
+    if isinstance(pda1, pdarray) and isinstance(pda2, pdarray):
+        if pda1.size == 0:
+            return pda2 # return other pdarray if pda1 is empty
+        if pda2.size == 0:
+            return pda1 # return other pdarray if pda2 is empty
+        if not assume_unique:
+            pda1 = unique(pda1)
+            pda2 = unique(pda2)
+        aux = concatenate((pda1, pda2))
+        aux_sort_indices = argsort(aux)
+        aux = aux[aux_sort_indices]
+        flag = concatenate((array([True]), aux[1:] != aux[:-1], array([True])))
+        return aux[flag[1:] & flag[:-1]]
+    else:
+        raise TypeError("must be pdarray {} or {}".format(pda1,pda2))
+
+
 def local_argsort(pda):
     if isinstance(pda, pdarray):
         if pda.size == 0:
 
@@ -0,0 +1,108 @@
+4.3.1
+psp =  tcp://localhost:5555
+connected to tcp://localhost:5555
+array size = 1,000,000
+number of trials =  6
+>>> arkouda argsort
+numLocales = 1, N = 1,000,000
+Average time = 0.0469 sec
+Average rate = 0.1588 GiB/sec
+>>> numpy argsort
+N = 1,000,000
+Average time = 0.0978 sec
+Average rate = 0.0762 GiB/sec
+4.3.1
+psp =  tcp://localhost:5555
+connected to tcp://localhost:5555
+size of index array = 100,000,000
+size of values array = 100,000,000
+number of trials =  6
+>>> arkouda gather
+numLocales = 1, num_indices = 100,000,000 ; num_values = 100,000,000
+Average time = 1.3009 sec
+Average rate = 1.72 GiB/sec
+>>> numpy gather
+num_indices = 100,000,000 ; num_values = 100,000,000
+Average time = 2.6057 sec
+Average rate = 0.86 GiB/sec
+4.3.1
+psp =  tcp://localhost:5555
+connected to tcp://localhost:5555
+array size = 100,000,000
+number of trials =  6
+>>> arkouda reduce
+numLocales = 1, N = 100,000,000
+sum = 4999999950000000
+  Average time = 0.0391 sec
+  Average rate = 19.04 GiB/sec
+prod = 0.0
+  Average time = 0.0421 sec
+  Average rate = 17.72 GiB/sec
+min = 0
+  Average time = 0.0389 sec
+  Average rate = 19.13 GiB/sec
+max = 99999999
+  Average time = 0.0398 sec
+  Average rate = 18.73 GiB/sec
+>>> numpy reduce
+N = 100,000,000
+sum = 4999999950000000
+  Average time = 0.0557 sec
+  Average rate = 13.38 GiB/sec
+prod = 0
+  Average time = 0.0900 sec
+  Average rate = 8.28 GiB/sec
+min = 0
+  Average time = 0.0897 sec
+  Average rate = 8.31 GiB/sec
+max = 99999999
+  Average time = 0.0885 sec
+  Average rate = 8.42 GiB/sec
+4.3.1
+psp =  tcp://localhost:5555
+connected to tcp://localhost:5555
+array size = 100,000,000
+number of trials =  6
+>>> arkouda scan
+numLocales = 1, N = 100,000,000
+cumsum, final value = 4999999950000000
+  Average time = 0.7731 sec
+  Average rate = 1.93 GiB/sec
+cumprod, final value = 0
+  Average time = 0.6849 sec
+  Average rate = 2.18 GiB/sec
+>>> numpy scan
+N = 100,000,000
+cumsum, final value = 4999999950000000
+  Average time = 0.6315 sec
+  Average rate = 2.36 GiB/sec
+cumprod, final value = 0
+  Average time = 0.7011 sec
+  Average rate = 2.13 GiB/sec
+4.3.1
+psp =  tcp://localhost:5555
+connected to tcp://localhost:5555
+size of index array = 100,000,000
+size of values array = 100,000,000
+number of trials =  6
+>>> arkouda scatter
+numLocales = 1, num_indices = 100,000,000 ; num_values = 100,000,000
+Average time = 0.9536 sec
+Average rate = 2.34 GiB/sec
+>>> numpy scatter
+num_indices = 100,000,000 ; num_values = 100,000,000
+Average time = 2.0967 sec
+Average rate = 1.07 GiB/sec
+4.3.1
+psp =  tcp://localhost:5555
+connected to tcp://localhost:5555
+array size = 100,000,000
+number of trials =  6
+>>> arkouda stream
+numLocales = 1, N = 100,000,000
+Average time = 0.6236 sec
+Average rate = 3.58 GiB/sec
+>>> numpy stream
+N = 100,000,000
+Average time = 0.8965 sec
+Average rate = 2.49 GiB/sec
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+echo ---- argsort ----
+./argsort.py -n 10000000 localhost 5555
+echo ---- gather ----
+./gather.py localhost 5555
+echo ---- reduce ----
+./reduce.py -t 10 localhost 5555
+echo ---- scan ----
+./scan.py localhost 5555
+echo ---- scatter ----
+./scatter.py localhost 5555
+echo ---- stream ----
+./stream.py localhost 5555
+
@@ -0,0 +1,104 @@
+module ConcatenateMsg
+{
+    use ServerConfig;
+    
+    use Time only;
+    use Math only;
+    
+    use MultiTypeSymbolTable;
+    use MultiTypeSymEntry;
+    use ServerErrorStrings;
+    
+    use AryUtil;
+
+    /* Concatenate a list of arrays together
+       to form one array
+     */
+    proc concatenateMsg(reqMsg: string, st: borrowed SymTab) {
+        var pn = "concatenate";
+        var repMsg: string;
+        var fields = reqMsg.split();
+        var cmd = fields[1];
+        var n = try! fields[2]:int; // number of arrays to sort
+        var names = fields[3..];
+        // Check that fields contains the stated number of arrays
+        if (n != names.size) { return try! incompatibleArgumentsError(pn, "Expected %i arrays but got %i".format(n, names.size)); }
+        /* var arrays: [0..#n] borrowed GenSymEntry; */
+        var size: int = 0;
+        var dtype: DType;
+        // Check that all arrays exist in the symbol table and have the same size
+        for (name, i) in zip(names, 1..) {
+            // arrays[i] = st.lookup(name): borrowed GenSymEntry;
+            var g: borrowed GenSymEntry = st.lookup(name);
+            if (g == nil) { return unknownSymbolError(pn, name); }
+            if (i == 1) {dtype = g.dtype;}
+            else {
+                if (dtype != g.dtype) {
+                    return try! incompatibleArgumentsError(pn, "Expected %s dtype but got %s dtype".format(dtype2str(dtype), dtype2str(g.dtype)));
+                }
+            }
+            // accumulate size from each array size
+            size += g.size;
+        }
+        // allocate a new array in the symboltable
+        // and copy in arrays
+        var rname = st.nextName();
+        select (dtype) {
+            when DType.Int64 {
+                // create array to copy into
+                var e = st.addEntry(rname, size, int);
+                var start: int;
+                var end: int;
+                start = 0;
+                for (name, i) in zip(names, 1..) {
+                    // lookup and cast operand to copy from
+                    var o = toSymEntry(st.lookup(name), int);
+                    // calculate end which is inclusive
+                    end = start + o.size - 1;
+                    // copy array into concatenation array
+                    e.a[{start..end}] = o.a;
+                    // update new start for next array copy
+                    start += o.size;
+                }
+            }
+            when DType.Float64 {
+                // create array to copy into
+                var e = st.addEntry(rname, size, real);
+                var start: int;
+                var end: int;
+                start = 0;
+                for (name, i) in zip(names, 1..) {
+                    // lookup and cast operand to copy from
+                    var o = toSymEntry(st.lookup(name), real);
+                    // calculate end which is inclusive
+                    end = start + o.size - 1;
+                    // copy array into concatenation array
+                    e.a[{start..end}] = o.a;
+                    // update new start for next array copy
+                    start += o.size;
+                }
+            }
+            when DType.Bool {
+                // create array to copy into
+                var e = st.addEntry(rname, size, bool);
+                var start: int;
+                var end: int;
+                start = 0;
+                for (name, i) in zip(names, 1..) {
+                    // lookup and cast operand to copy from
+                    var o = toSymEntry(st.lookup(name), bool);
+                    // calculate end which is inclusive
+                    end = start + o.size - 1;
+                    // copy array into concatenation array
+                    e.a[{start..end}] = o.a;
+                    // update new start for next array copy
+                    start += o.size;
+                }
+            }
+            otherwise {return notImplementedError("concatenate",dtype);}
+        }
+
+        return try! "created " + st.attrib(rname);
+    }
+    
+}