Skip to content

Commit 51343db

Browse files
committed
dataframe lib
1 parent 3af7b0f commit 51343db

File tree

7 files changed

+357
-4
lines changed

7 files changed

+357
-4
lines changed

build.sbt

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,20 @@ name := "qnt"
22

33
version := "0.1"
44

5-
scalaVersion := "2.11.12"
5+
scalaVersion := "2.13.1"
66

77
libraryDependencies += "com.github.nscala-time" %% "nscala-time" % "2.22.0"
88

99
resolvers += "Unidata" at "https://artifacts.unidata.ucar.edu/repository/unidata-all"
1010
libraryDependencies += "edu.ucar" % "netcdf" % "4.3.22"
1111

12+
//resolvers += "Sonatype Releases" at "http://oss.sonatype.org/content/repositories/releases"
1213
resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots"
13-
libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.10.1"
14+
libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.10.0"
1415

15-
resolvers += "Sonatype Releases" at "http://oss.sonatype.org/content/repositories/releases"
16-
libraryDependencies += "org.scala-saddle" %% "saddle-core" % "1.3.+"
16+
//
17+
//libraryDependencies += "org.scala-saddle" %% "saddle-core" % "1.3.+"
18+
19+
libraryDependencies += "org.scalanlp" %% "breeze" % "1.0"
1720

1821
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.8" % "test"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package qnt.breeze
2+
3+
object BinarySearch {
4+
5+
6+
7+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package qnt.breeze
2+
3+
object IndexMatchType extends Enumeration{
4+
val prev = Value
5+
val exact = Value
6+
val next = Value
7+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package qnt.breeze
2+
3+
import breeze.linalg.{SliceVector, Vector}
4+
5+
import scala.collection.mutable
6+
import scala.reflect.ClassTag
7+
8+
class IndexVector[V]
9+
(
10+
private val data: Array[V],
11+
val ordered: Boolean,
12+
val reversed: Boolean,
13+
)(implicit ord: Ordering[V], tag: ClassTag[V]) extends IndexVectorLike[V] {
14+
15+
private val valueToIdxMap = new mutable.HashMap[V,Int]()
16+
17+
if(ordered) {
18+
for (i <- 0 to (data.length - 2))
19+
if (ord.gt(data(i), data(i + 1)) ^ reversed) {
20+
throw new IllegalArgumentException(s"ordering violation idx1=$i idx2=${i + 1}")
21+
}
22+
}
23+
for (i <- data.indices) {
24+
var v = data(i)
25+
if(valueToIdxMap.contains(v)) {
26+
throw new IllegalArgumentException(s"duplicate idx=$i val=$v")
27+
}
28+
valueToIdxMap(v) = i
29+
}
30+
31+
override def copy: IndexVector[V] = {
32+
IndexVector[V](toArray, ordered, reversed)
33+
}
34+
35+
override def update(i: Int, v: V): Unit = {
36+
if (data(i) == v) {
37+
return
38+
}
39+
if (data.contains(v)) {
40+
throw new IllegalStateException(s"duplicate $i")
41+
}
42+
if (ordered) {
43+
if (i > 0) {
44+
val prev = apply(i - 1)
45+
if (ord.lt(prev, v) ^ reversed) {
46+
throw new IllegalArgumentException(s"ordering violation (prev, cur) idx=$i")
47+
}
48+
}
49+
if (i < length - 1) {
50+
val nxt = apply(i + 1)
51+
if (ord.gt(v, nxt) ^ reversed) {
52+
throw new IllegalArgumentException(s"ordering violation (cur, nxt) idx=$i")
53+
}
54+
}
55+
}
56+
valueToIdxMap.remove(data(i))
57+
data(i) = v
58+
valueToIdxMap(v) = i
59+
60+
SliceVector
61+
}
62+
63+
override def length: Int = data.length
64+
65+
override def apply(i: Int): V = data(i)
66+
67+
override def indexOfExact(v:V): Option[Int] = valueToIdxMap.get(v)
68+
69+
override def sliceSeq(idx: Iterator[Int]): SliceIndexVector[V] = new SliceIndexVector[V](this, idx.toIndexedSeq)
70+
71+
72+
}
73+
74+
object IndexVector {
75+
76+
def apply[V](data: Array[V], ordered: Boolean, reversed: Boolean)
77+
(implicit ord: Ordering[V], tag: ClassTag[V]) : IndexVector[V] = {
78+
new IndexVector[V](data, ordered, reversed)(ord, tag)
79+
}
80+
81+
def apply[V](sliceVector: SliceVector[Int, V])
82+
(implicit ord: Ordering[V], tag: ClassTag[V]) : IndexVector[V] = {
83+
sliceVector.tensor match {
84+
case t: IndexVector[V] =>
85+
val values = sliceVector.toArray
86+
apply(values, t.ordered, t.reversed)
87+
case _ =>
88+
throw new IllegalArgumentException("tensor of slice is not IndexVector")
89+
}
90+
}
91+
92+
def apply[V](vector: Vector[V], ordered: Boolean, reversed: Boolean)
93+
(implicit ord: Ordering[V], tag: ClassTag[V]) : IndexVector[V] = {
94+
val values = vector.toArray
95+
apply(values, ordered, reversed)
96+
}
97+
}
98+
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package qnt.breeze
2+
3+
import breeze.linalg.VectorLike
4+
5+
import scala.reflect.ClassTag
6+
7+
trait IndexVectorLike[V] extends breeze.linalg.Vector[V] with VectorLike[V, IndexVectorLike[V]] {
8+
9+
// TODO unique ?
10+
def ordered: Boolean
11+
12+
def reversed: Boolean
13+
14+
override def activeSize: Int = length
15+
16+
override def activeIterator: Iterator[(Int, V)] = iterator
17+
18+
override def activeValuesIterator: Iterator[V] = valuesIterator
19+
20+
override def activeKeysIterator: Iterator[Int] = keysIterator
21+
22+
override def repr: IndexVectorLike[V] = this
23+
24+
override def toString: String = {
25+
valuesIterator.mkString(s"IndexVector(ordered=$ordered, reversed=$reversed, data=[", ", ", "])")
26+
}
27+
28+
override def copy: IndexVector[V]
29+
30+
def merge(other: IndexVectorLike[V])(implicit ord: Ordering[V], tag: ClassTag[V]): IndexVectorLike[V] = {
31+
var vals = Array.concat(toArray, other.toArray)
32+
vals = vals.distinct
33+
if (ordered) {
34+
vals = vals.sorted(ord)
35+
if (reversed) {
36+
vals = vals.reverse
37+
}
38+
}
39+
IndexVector[V](vals, ordered, reversed)
40+
}
41+
42+
def indexOfUnexact(value: V)(implicit ord: Ordering[V]): Option[(Int, Int)] = {
43+
var exact = indexOfExact(value)
44+
if (exact.isDefined) {
45+
Some((exact.get, exact.get))
46+
} else {
47+
indexOfBinarySearch(value)
48+
}
49+
}
50+
51+
def indexOfExact(value: V): Option[Int]
52+
53+
def indexOfBinarySearch(value: V)(implicit ord: Ordering[V]): Option[(Int, Int)] = {
54+
if (!ordered) {
55+
return None
56+
}
57+
58+
var leftIdx = 0
59+
var rightIdx = this.length - 1
60+
61+
if (leftIdx > rightIdx) {
62+
return None
63+
}
64+
65+
var leftVal: V = apply(leftIdx)
66+
var rightVal: V = apply(rightIdx)
67+
68+
if (leftVal == value) {
69+
return Some((leftIdx, leftIdx))
70+
}
71+
72+
if (rightVal == value) {
73+
return Some((rightIdx, rightIdx))
74+
}
75+
76+
if (ord.lt(rightVal, value) ^ reversed) {
77+
return None
78+
}
79+
80+
if (ord.gt(leftVal, value) ^ reversed) {
81+
return None
82+
}
83+
84+
while (rightIdx - leftIdx > 1) {
85+
val midIdx = (rightIdx + leftIdx) / 2
86+
val midVal = apply(midIdx)
87+
if (midVal == value) {
88+
return Some((midIdx, midIdx))
89+
} else if (ord.lt(midVal, value) ^ reversed) {
90+
leftIdx = midIdx
91+
leftVal = midVal
92+
} else if (ord.gt(midVal, value) ^ reversed) {
93+
rightIdx = midIdx
94+
rightVal = midVal
95+
}
96+
}
97+
Some((leftIdx, rightIdx))
98+
}
99+
100+
def sliceMask(mask: breeze.linalg.Vector[Boolean]): SliceIndexVector[V] = sliceMask(mask.valuesIterator)
101+
def sliceMask(mask: Boolean*): SliceIndexVector[V] = sliceMask(mask.iterator)
102+
def sliceMask(mask: Iterable[Boolean]): SliceIndexVector[V] = sliceMask(mask.iterator)
103+
def sliceMask(mask: Iterator[Boolean]): SliceIndexVector[V] = sliceSeq(mask.zipWithIndex.filter(_._1).map(_._2))
104+
105+
def sliceSeq(idx: breeze.linalg.Vector[Int]): SliceIndexVector[V] = sliceSeq(idx.valuesIterator)
106+
def sliceSeq(idx: Int*): SliceIndexVector[V] = sliceSeq(idx.iterator)
107+
def sliceSeq(idx: Iterable[Int]): SliceIndexVector[V] = sliceSeq(idx.iterator)
108+
def sliceSeq(idx: Iterator[Int]): SliceIndexVector[V]
109+
110+
def sliceRange(start: Int, end: Int, step: Int, left: Boolean, right: Boolean, round: Boolean)
111+
: SliceIndexVector[V] = sliceSeq(RoundArrayRange(length, start, end, step, left, right, round))
112+
113+
def loc(v: V): Option[Int] = indexOfExact(v)
114+
115+
def sliceLoc(v: breeze.linalg.Vector[V]): SliceIndexVector[V] = sliceLoc(v.valuesIterator)
116+
def sliceLoc(v: V*):SliceIndexVector[V] = sliceLoc(v.iterator)
117+
def sliceLoc(v: Iterable[V]): SliceIndexVector[V] = sliceLoc(v.iterator)
118+
def sliceLoc(v: Iterator[V]): SliceIndexVector[V] = {
119+
var idxo = v.map(indexOfExact).filter(_.isDefined).map(_.get)
120+
sliceSeq(idxo)
121+
}
122+
123+
def sliceLocRange(start: V, end: V, step: Int = 1,
124+
left: Boolean = true, right: Boolean = true, round: Boolean = true)
125+
(implicit ord: Ordering[V], tag: ClassTag[V]): SliceIndexVector[V] = {
126+
val startIdx = indexOfUnexact(start)(ord)
127+
val endIdx = indexOfUnexact(end)(ord)
128+
if(startIdx.isEmpty || endIdx.isEmpty) {
129+
IndexVectorLike.empty[V].sliceSeq(Seq())
130+
} else {
131+
sliceRange(
132+
if(step > 0) startIdx.get._2 else startIdx.get._1,
133+
if(step > 0) endIdx.get._1 else endIdx.get._2,
134+
step,
135+
left, right, round
136+
)
137+
}
138+
}
139+
}
140+
141+
object IndexVectorLike {
142+
def empty[V](implicit ord: Ordering[V], tag: ClassTag[V])
143+
= new IndexVector[V](Array[V](), true, false)(ord, tag)
144+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package qnt.breeze
2+
3+
class RoundArrayRange(limit:Int, start:Int, step:Int, count:Int) extends IndexedSeq[Int]{
4+
5+
override def apply(i: Int): Int = {
6+
if(i < 0 || i >= count) {
7+
new IllegalArgumentException("out of range")
8+
}
9+
(start + i * step) % limit
10+
}
11+
12+
override def length: Int = count
13+
}
14+
15+
object RoundArrayRange {
16+
17+
def apply(
18+
length:Int,
19+
20+
start:Int = 0,
21+
end:Int = -1,
22+
step:Int = 1,
23+
24+
left:Boolean = true,
25+
right: Boolean = true,
26+
round: Boolean = true
27+
):RoundArrayRange = {
28+
29+
val realStart = (if(start > 0) start else (length + start)) + (if(left) 0 else 1)
30+
31+
if(realStart < 0 || realStart >= length) {
32+
throw new IllegalArgumentException("out of range start")
33+
}
34+
35+
val realEnd = (if(end > 0) end else (length + end)) + (if(right) 0 else -1)
36+
if(realEnd < 0 || realEnd >= length) {
37+
if(realStart < 0 || realStart >= length) {
38+
throw new IllegalArgumentException("out of range end")
39+
}
40+
}
41+
42+
val dist = realEnd - realStart
43+
44+
val ustep = step * (if (step < 0) -1 else 1)
45+
var udist = step * (if (step < 0) -1 else 1)
46+
47+
if(round && udist < 0) {
48+
udist += length
49+
}
50+
51+
val cnt = if (udist < 0 || length < 1) 0 else udist / ustep
52+
53+
new RoundArrayRange(length, realStart, step, cnt)
54+
}
55+
56+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package qnt.breeze
2+
3+
import scala.reflect.ClassTag
4+
5+
class SliceIndexVector[V](
6+
tensor: IndexVector[V],
7+
slices: IndexedSeq[Int]
8+
)
9+
(implicit ord: Ordering[V], tag: ClassTag[V])
10+
extends IndexVectorLike[V] {
11+
12+
override val ordered: Boolean = tensor.ordered && (
13+
slices.indices.forall(i => i == slices.length - 1 || slices(i) < slices(i + 1))
14+
||
15+
slices.indices.forall(i => i == slices.length - 1 || slices(i) > slices(i + 1))
16+
)
17+
18+
override val reversed: Boolean = tensor.reversed ^ slices.indices.forall(i => i == slices.length - 1 || slices(i) > slices(i + 1))
19+
20+
private val tensorToLocalIdxMap = slices.zipWithIndex.toMap
21+
22+
override def indexOfExact(value: V): Option[Int] = {
23+
var origIdx = tensor.indexOfExact(value)
24+
if (origIdx.isEmpty) origIdx else tensorToLocalIdxMap.get(origIdx.get)
25+
}
26+
27+
override def copy: IndexVector[V] = IndexVector(toArray, ordered, reversed)
28+
29+
override def length: Int = slices.length
30+
31+
override def apply(i: Int): V = tensor(slices(i))
32+
33+
override def update(i: Int, v: V): Unit = tensor(slices(i))
34+
35+
override def sliceSeq(idx: Iterator[Int]): SliceIndexVector[V]
36+
= new SliceIndexVector[V](tensor, idx.map(i => slices(i)).toIndexedSeq)
37+
38+
}

0 commit comments

Comments
 (0)