Skip to content

Commit ab6d4e8

Browse files
committed
open up possibilities for different density models
1 parent 98767cf commit ab6d4e8

File tree

12 files changed

+177
-73
lines changed

12 files changed

+177
-73
lines changed

addressing_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func doAddressing(heads []*Head, memory [][]Unit) (weights [][]float64, reads []
8181
wc := make([]float64, len(memory))
8282
var sum float64 = 0
8383
for j := 0; j < len(wc); j++ {
84-
wc[j] = math.Exp(beta * cosineSimilarity(unitVals(h.K()), unitVals(memory[j])))
84+
wc[j] = math.Exp(beta * cosineSimilarity(UnitVals(h.K()), UnitVals(memory[j])))
8585
sum += wc[j]
8686
}
8787
for j := 0; j < len(wc); j++ {

cntl1.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ func (old *controller1) Forward(reads []*memRead, x []float64) Controller {
9393
v += wyh1ij.Val * c.H1[j].Val
9494
}
9595
v += c.Wyh1[i][len(c.H1)].Val
96-
c.y[i].Val = Sigmoid(v)
96+
c.y[i].Val = v
9797
}
9898
memoryM := len(reads[0].Top)
9999
for i, wuh1i := range c.Wuh1 {

cntl1_test.go

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
"testing"
77
)
88

9-
func TestController1(t *testing.T) {
9+
func TestLogisticModel(t *testing.T) {
1010
times := 10
1111
x := MakeTensor2(times, 4)
1212
for i := 0; i < len(x); i++ {
@@ -26,12 +26,40 @@ func TestController1(t *testing.T) {
2626
numHeads := 2
2727
c := NewEmptyController1(len(x[0]), len(y[0]), h1Size, numHeads, n, m)
2828
c.Weights(func(u *Unit) { u.Val = 2 * rand.Float64() })
29-
ForwardBackward(c, x, y)
3029

31-
l := loss(c, Controller1Forward, x, y)
32-
checkGradients(t, c, Controller1Forward, x, y, l)
30+
model := &LogisticModel{Y: y}
31+
ForwardBackward(c, x, model)
32+
checkGradients(t, c, Controller1Forward, x, model)
3333
}
3434

35+
func TestMultinomialModel(t *testing.T) {
36+
times := 10
37+
x := MakeTensor2(times, 4)
38+
for i := 0; i < len(x); i++ {
39+
for j := 0; j < len(x[i]); j++ {
40+
x[i][j] = rand.Float64()
41+
}
42+
}
43+
outputSize := 4
44+
y := make([]int, times)
45+
for i := range y {
46+
y[i] = rand.Intn(outputSize)
47+
}
48+
n := 3
49+
m := 2
50+
h1Size := 3
51+
numHeads := 2
52+
c := NewEmptyController1(len(x[0]), outputSize, h1Size, numHeads, n, m)
53+
c.Weights(func(u *Unit) { u.Val = 2 * rand.Float64() })
54+
55+
model := &MultinomialModel{Y: y}
56+
ForwardBackward(c, x, model)
57+
checkGradients(t, c, Controller1Forward, x, model)
58+
}
59+
60+
// A ControllerForward is a ground truth implementation of the forward pass of a controller.
61+
type ControllerForward func(c Controller, reads [][]float64, x []float64) (prediction []float64, heads []*Head)
62+
3563
func Controller1Forward(c1 Controller, reads [][]float64, x []float64) ([]float64, []*Head) {
3664
c := c1.(*controller1)
3765
h1Size := len(c.Wh1r)
@@ -57,7 +85,7 @@ func Controller1Forward(c1 Controller, reads [][]float64, x []float64) ([]float6
5785
v += c.Wyh1[i][j].Val * h1[j]
5886
}
5987
v += c.Wyh1[i][maxJ].Val
60-
prediction[i] = Sigmoid(v)
88+
prediction[i] = v
6189
}
6290
numHeads := len(c.Wh1r[0])
6391
m := len(c.Wh1r[0][0])
@@ -75,7 +103,7 @@ func Controller1Forward(c1 Controller, reads [][]float64, x []float64) ([]float6
75103
return prediction, heads
76104
}
77105

78-
func loss(c Controller, forward func(Controller, [][]float64, []float64) ([]float64, []*Head), in, out [][]float64) float64 {
106+
func loss(c Controller, forward ControllerForward, in [][]float64, model DensityModel) float64 {
79107
// Initialize memory as in the function ForwardBackward
80108
mem := c.Mtm1BiasV().Top
81109
wtm1Bs := c.Wtm1BiasV()
@@ -102,10 +130,11 @@ func loss(c Controller, forward func(Controller, [][]float64, []float64) ([]floa
102130
}
103131
}
104132

105-
prediction := make([][]float64, len(out))
133+
prediction := make([][]float64, len(in))
106134
var heads []*Head
107135
for t := 0; t < len(in); t++ {
108136
prediction[t], heads = forward(c, reads, in[t])
137+
prediction[t] = computeDensity(t, prediction[t], model)
109138
for i := 0; i < len(heads); i++ {
110139
heads[i].Wtm1 = wtm1s[i]
111140
}
@@ -115,24 +144,27 @@ func loss(c Controller, forward func(Controller, [][]float64, []float64) ([]floa
115144
mem = transformMemFloat64(memFloat64)
116145
}
117146

118-
var llh float64 = 0 // log likelihood
119-
for t := 0; t < len(out); t++ {
120-
for i := 0; i < len(out[t]); i++ {
121-
p := prediction[t][i]
122-
y := out[t][i]
123-
llh += y*math.Log(p) + (1-y)*math.Log(1-p)
124-
}
147+
return model.Loss(prediction)
148+
}
149+
150+
func computeDensity(timestep int, pred []float64, model DensityModel) []float64 {
151+
units := make([]Unit, len(pred))
152+
for j := range units {
153+
units[j].Val = pred[j]
125154
}
126-
return -llh
155+
model.Model(timestep, units)
156+
return UnitVals(units)
127157
}
128158

129-
func checkGradients(t *testing.T, c Controller, forward func(Controller, [][]float64, []float64) ([]float64, []*Head), in, out [][]float64, lx float64) {
159+
func checkGradients(t *testing.T, c Controller, forward ControllerForward, in [][]float64, model DensityModel) {
160+
lx := loss(c, forward, in, model)
161+
130162
c.WeightsVerbose(func(tag string, w *Unit) {
131163
x := w.Val
132164
h := machineEpsilonSqrt * math.Max(math.Abs(x), 1)
133165
xph := x + h
134166
w.Val = xph
135-
lxph := loss(c, forward, in, out)
167+
lxph := loss(c, forward, in, model)
136168
w.Val = x
137169
grad := (lxph - lx) / (xph - x)
138170

copytask/test/main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ func main() {
4545
runs := make([]Run, 0, len(seqLens))
4646
for _, seql := range seqLens {
4747
x, y := copytask.GenSeq(seql, vectorSize)
48-
machines := ntm.ForwardBackward(c, x, y)
49-
l := ntm.Loss(y, machines)
48+
model := &ntm.LogisticModel{Y: y}
49+
machines := ntm.ForwardBackward(c, x, model)
50+
l := model.Loss(ntm.Predictions(machines))
5051
bps := l / float64(len(y)*len(y[0]))
5152
log.Printf("sequence length: %d, loss: %f", seql, bps)
5253

copytask/train/main.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,10 @@ func main() {
7575
log.Printf("numweights: %d", c.NumWeights())
7676
for i := 1; ; i++ {
7777
x, y := copytask.GenSeq(rand.Intn(20)+1, vectorSize)
78-
//machines := sgd.Train(x, y, 1e-4, 0.9)
79-
machines := rmsp.Train(x, y, 0.95, 0.5, 1e-3, 1e-3)
80-
l := ntm.Loss(y, machines)
78+
model := &ntm.LogisticModel{Y: y}
79+
//machines := sgd.Train(x, model, 1e-4, 0.9)
80+
machines := rmsp.Train(x, model, 0.95, 0.5, 1e-3, 1e-3)
81+
l := model.Loss(ntm.Predictions(machines))
8182
if i%1000 == 0 {
8283
bpc := l / float64(len(y)*len(y[0]))
8384
losses = append(losses, bpc)

math.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ func Sigmoid(x float64) float64 {
1515
return 1.0 / (1 + math.Exp(-x))
1616
}
1717

18+
func delta(a, b int) float64 {
19+
if a == b {
20+
return 1
21+
}
22+
return 0
23+
}
24+
1825
func cosineSimilarity(u, v []float64) float64 {
1926
var sum float64 = 0
2027
var usum float64 = 0

ngram/test/main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ func main() {
4949
sampletimes := 100
5050
for j := 0; j < sampletimes; j++ {
5151
x, y = ngram.GenSeq(prob)
52-
machines = ntm.ForwardBackward(c, x, y)
53-
l += ntm.Loss(y, machines)
52+
model := &ntm.LogisticModel{Y: y}
53+
machines = ntm.ForwardBackward(c, x, model)
54+
l += model.Loss(ntm.Predictions(machines))
5455
if (j+1)%10 == 0 {
5556
log.Printf("%d %d %f", i, j+1, l/float64(j+1))
5657
}

ngram/train/main.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,17 @@ func main() {
7171
log.Printf("seed: %d, numweights: %d, numHeads: %d", seed, c.NumWeights(), c.NumHeads())
7272
for i := 1; ; i++ {
7373
x, y := ngram.GenSeq(ngram.GenProb())
74-
machines := rmsp.Train(x, y, 0.95, 0.5, 1e-3, 1e-3)
74+
machines := rmsp.Train(x, &ntm.LogisticModel{Y: y}, 0.95, 0.5, 1e-3, 1e-3)
7575

7676
if i%10000 == 0 {
7777
prob := ngram.GenProb()
7878
var l float64 = 0
7979
samn := 1000
8080
for j := 0; j < samn; j++ {
8181
x, y = ngram.GenSeq(prob)
82-
machines = ntm.ForwardBackward(c, x, y)
83-
l += ntm.Loss(y, machines)
82+
model := &ntm.LogisticModel{Y: y}
83+
machines = ntm.ForwardBackward(c, x, model)
84+
l += model.Loss(ntm.Predictions(machines))
8485
}
8586
l = l / float64(samn)
8687
losses = append(losses, l)

0 commit comments

Comments
 (0)