diff --git a/Chapter06/alice_chargen_rnn.py b/Chapter06/alice_chargen_rnn.py
new file mode 100644
index 0000000..a6d4c41
--- /dev/null
+++ b/Chapter06/alice_chargen_rnn.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+# Adapted from lstm_text_generation.py in keras/examples
+from __future__ import print_function
+from keras.layers.recurrent import SimpleRNN
+from keras.models import Sequential
+from keras.layers import Dense, Activation
+import numpy as np
+
+INPUT_FILE = "../data/alice_in_wonderland.txt"
+
+# extract the input as a stream of characters
+print("Extracting text from input...")
+fin = open(INPUT_FILE, 'rb')
+lines = []
+for line in fin:
+    line = line.strip().lower()
+    line = line.decode("ascii", "ignore")
+    if len(line) == 0:
+        continue
+    lines.append(line)
+fin.close()
+text = " ".join(lines)
+
+# creating lookup tables
+# Here chars is the number of features in our character "vocabulary"
+chars = set([c for c in text])
+nb_chars = len(chars)
+char2index = dict((c, i) for i, c in enumerate(chars))
+index2char = dict((i, c) for i, c in enumerate(chars))
+
+# create inputs and labels from the text. We do this by stepping
+# through the text ${step} character at a time, and extracting a 
+# sequence of size ${seqlen} and the next output char. For example,
+# assuming an input text "The sky was falling", we would get the 
+# following sequence of input_chars and label_chars (first 5 only)
+#   The sky wa -> s
+#   he sky was ->  
+#   e sky was  -> f
+#    sky was f -> a
+#   sky was fa -> l
+print("Creating input and label text...")
+SEQLEN = 10
+STEP = 1
+
+input_chars = []
+label_chars = []
+for i in range(0, len(text) - SEQLEN, STEP):
+    input_chars.append(text[i:i + SEQLEN])
+    label_chars.append(text[i + SEQLEN])
+
+# vectorize the input and label chars
+# Each row of the input is represented by seqlen characters, each 
+# represented as a 1-hot encoding of size len(char). There are 
+# len(input_chars) such rows, so shape(X) is (len(input_chars),
+# seqlen, nb_chars).
+# Each row of output is a single character, also represented as a
+# dense encoding of size len(char). Hence shape(y) is (len(input_chars),
+# nb_chars).
+print("Vectorizing input and label text...")
+X = np.zeros((len(input_chars), SEQLEN, nb_chars), dtype=np.bool)
+y = np.zeros((len(input_chars), nb_chars), dtype=np.bool)
+for i, input_char in enumerate(input_chars):
+    for j, ch in enumerate(input_char):
+        X[i, j, char2index[ch]] = 1
+    y[i, char2index[label_chars[i]]] = 1
+
+# Build the model. We use a single RNN with a fully connected layer
+# to compute the most likely predicted output char
+HIDDEN_SIZE = 128
+BATCH_SIZE = 128
+NUM_ITERATIONS = 25
+NUM_EPOCHS_PER_ITERATION = 1
+NUM_PREDS_PER_EPOCH = 100
+
+model = Sequential()
+model.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False,
+                    input_shape=(SEQLEN, nb_chars),
+                    unroll=True))
+model.add(Dense(nb_chars))
+model.add(Activation("softmax"))
+
+model.compile(loss="categorical_crossentropy", optimizer="rmsprop")
+
+# We train the model in batches and test output generated at each step
+for iteration in range(NUM_ITERATIONS):
+    print("=" * 50)
+    print("Iteration #: %d" % (iteration))
+    model.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
+    
+    # testing model
+    # randomly choose a row from input_chars, then use it to 
+    # generate text from model for next 100 chars
+    test_idx = np.random.randint(len(input_chars))
+    test_chars = input_chars[test_idx]
+    print("Generating from seed: %s" % (test_chars))
+    print(test_chars, end="")
+    for i in range(NUM_PREDS_PER_EPOCH):
+        Xtest = np.zeros((1, SEQLEN, nb_chars))
+        for i, ch in enumerate(test_chars):
+            Xtest[0, i, char2index[ch]] = 1
+        pred = model.predict(Xtest, verbose=0)[0]
+        ypred = index2char[np.argmax(pred)]
+        print(ypred, end="")
+        # move forward with test_chars + ypred
+        test_chars = test_chars[1:] + ypred
+    print()
diff --git a/Chapter06/econs_data.py b/Chapter06/econs_data.py
new file mode 100644
index 0000000..2f79c76
--- /dev/null
+++ b/Chapter06/econs_data.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, print_function
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import re
+
+DATA_DIR = "../data"
+
+fld = open(os.path.join(DATA_DIR, "LD2011_2014.txt"), "rb")
+data = []
+line_num = 0
+#cid = np.random.randint(0, 370, 1)
+cid = 250
+for line in fld:
+    if line.startswith("\"\";"):
+        continue
+    if line_num % 100 == 0:
+        print("{:d} lines read".format(line_num))
+    cols = [float(re.sub(",", ".", x)) for x in 
+            line.strip().split(";")[1:]]
+    data.append(cols[cid])
+    line_num += 1
+fld.close()
+
+NUM_ENTRIES = 1000
+plt.plot(range(NUM_ENTRIES), data[0:NUM_ENTRIES])
+plt.ylabel("electricity consumption")
+plt.xlabel("time (1pt = 15 mins)")
+plt.show()
+
+np.save(os.path.join(DATA_DIR, "LD_250.npy"), np.array(data))
diff --git a/Chapter06/econs_stateful.py b/Chapter06/econs_stateful.py
new file mode 100644
index 0000000..b554000
--- /dev/null
+++ b/Chapter06/econs_stateful.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, print_function
+from keras.layers.core import Dense
+from keras.layers.recurrent import LSTM
+from keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+import numpy as np
+import math
+import os
+
+DATA_DIR = "../data"
+
+data = np.load(os.path.join(DATA_DIR, "LD_250.npy"))
+
+STATELESS = False
+
+NUM_TIMESTEPS = 20
+HIDDEN_SIZE = 10
+BATCH_SIZE = 96  # 24 hours (15 min intervals)
+NUM_EPOCHS = 5
+
+# scale the data to be in the range (0, 1)
+data = data.reshape(-1, 1)
+scaler = MinMaxScaler(feature_range=(0, 1), copy=False)
+data = scaler.fit_transform(data)
+
+# transform to 4 inputs -> 1 label format
+X = np.zeros((data.shape[0], NUM_TIMESTEPS))
+Y = np.zeros((data.shape[0], 1))
+for i in range(len(data) - NUM_TIMESTEPS - 1):
+    X[i] = data[i:i + NUM_TIMESTEPS].T
+    Y[i] = data[i + NUM_TIMESTEPS + 1]
+
+# reshape X to three dimensions (samples, timesteps, features)
+X = np.expand_dims(X, axis=2)
+
+# split into training and test sets (add the extra offsets so 
+# we can use batch size of 5)
+sp = int(0.7 * len(data))
+Xtrain, Xtest, Ytrain, Ytest = X[0:sp], X[sp:], Y[0:sp], Y[sp:]
+print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape)
+
+if STATELESS:
+    # stateless
+    model = Sequential()
+    model.add(LSTM(HIDDEN_SIZE, input_shape=(NUM_TIMESTEPS, 1), 
+                   return_sequences=False))
+    model.add(Dense(1))
+else:
+    # stateful
+    model = Sequential()
+    model.add(LSTM(HIDDEN_SIZE, stateful=True,
+                   batch_input_shape=(BATCH_SIZE, NUM_TIMESTEPS, 1), 
+                   return_sequences=False))
+    model.add(Dense(1))
+
+model.compile(loss="mean_squared_error", optimizer="adam",
+              metrics=["mean_squared_error"])
+
+if STATELESS:
+    # stateless
+    model.fit(Xtrain, Ytrain, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE,
+              validation_data=(Xtest, Ytest),
+              shuffle=False)
+else:
+    # stateful
+    # need to make training and test data to multiple of BATCH_SIZE
+    train_size = (Xtrain.shape[0] // BATCH_SIZE) * BATCH_SIZE
+    test_size = (Xtest.shape[0] // BATCH_SIZE) * BATCH_SIZE
+    Xtrain, Ytrain = Xtrain[0:train_size], Ytrain[0:train_size]
+    Xtest, Ytest = Xtest[0:test_size], Ytest[0:test_size]
+    print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape)
+    for i in range(NUM_EPOCHS):
+        print("Epoch {:d}/{:d}".format(i+1, NUM_EPOCHS))
+        model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, epochs=1,
+                  validation_data=(Xtest, Ytest),
+                  shuffle=False)
+        model.reset_states()
+
+score, _ = model.evaluate(Xtest, Ytest, batch_size=BATCH_SIZE)
+rmse = math.sqrt(score)
+print("\nMSE: {:.3f}, RMSE: {:.3f}".format(score, rmse))
diff --git a/Chapter06/pos-tagging-explore.py b/Chapter06/pos-tagging-explore.py
new file mode 100644
index 0000000..e170cf4
--- /dev/null
+++ b/Chapter06/pos-tagging-explore.py
@@ -0,0 +1,170 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, print_function
+from keras.layers.core import Activation, Dense, Dropout, RepeatVector, SpatialDropout1D
+from keras.layers.embeddings import Embedding
+from keras.layers.recurrent import GRU, LSTM
+from keras.layers.wrappers import TimeDistributed, Bidirectional
+from keras.models import Sequential
+from keras.preprocessing import sequence
+from keras.utils import np_utils
+import collections
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+
+def explore_data(datadir, datafiles):
+    counter = collections.Counter()
+    maxlen = 0
+    for datafile in datafiles:
+        fdata = open(os.path.join(datadir, datafile), "rb")
+        for line in fdata:
+            words = line.strip().split()
+            if len(words) > maxlen:
+                maxlen = len(words)
+            for word in words:
+                counter[word] += 1
+        fdata.close()
+    return maxlen, counter
+    
+def build_tensor(filename, numrecs, word2index, maxlen, 
+                 make_categorical=False):
+    data = np.empty((numrecs, ), dtype=list)
+    fin = open(filename, "rb")
+    i = 0
+    for line in fin:
+        wids = []
+        for word in line.strip().split():
+            if word2index.has_key(word):
+                wids.append(word2index[word])
+            else:
+                wids.append(word2index["UNK"])
+        if make_categorical:
+            data[i] = np_utils.to_categorical(
+                wids, num_classes=len(word2index))
+        else:
+            data[i] = wids
+        i += 1
+    fin.close()
+    pdata = sequence.pad_sequences(data, maxlen=maxlen)
+    return pdata
+    
+def evaluate_model(model, Xtest, Ytest, batch_size):
+    pass
+
+DATA_DIR = "../data"
+
+s_maxlen, s_counter = explore_data(DATA_DIR, ["babi-sent-train.txt", 
+                                              "babi-sent-test.txt"])
+t_maxlen, t_counter = explore_data(DATA_DIR, ["babi-pos-train.txt", 
+                                              "babi-pos-test.txt"])
+
+print(s_maxlen, len(s_counter), t_maxlen, len(t_counter))
+# 7 21 7 9
+# maxlen: 7
+# size of source vocab: 21
+# size of target vocab: 9
+
+# lookup tables
+s_word2id = {k:v+1 for v, (k, _) in enumerate(s_counter.most_common())}
+s_word2id["PAD"] = 0
+s_id2word = {v:k for k, v in s_word2id.items()}
+t_pos2id = {k:v+1 for v, (k, _) in enumerate(t_counter.most_common())}
+t_pos2id["PAD"] = 0
+t_id2pos = {v:k for k, v in t_pos2id.items()}
+
+# vectorize data
+MAX_SEQLEN = 10
+
+Xtrain = build_tensor(os.path.join(DATA_DIR, "babi-sent-train.txt"),
+                      30000, s_word2id, MAX_SEQLEN)
+Xtest = build_tensor(os.path.join(DATA_DIR, "babi-sent-test.txt"),
+                     3000, s_word2id, MAX_SEQLEN)
+Ytrain = build_tensor(os.path.join(DATA_DIR, "babi-pos-train.txt"),
+                      30000, t_pos2id, MAX_SEQLEN, make_categorical=True)
+Ytest = build_tensor(os.path.join(DATA_DIR, "babi-pos-test.txt"),
+                     3000, t_pos2id, MAX_SEQLEN, make_categorical=True)
+print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape)
+
+# define network
+EMBED_SIZE = 32
+HIDDEN_SIZE = 32
+
+BATCH_SIZE = 32
+NUM_EPOCHS = 5
+
+model = Sequential()
+model.add(Embedding(len(s_word2id), EMBED_SIZE,
+                    input_length=MAX_SEQLEN))
+model.add(SpatialDropout1D(Dropout(0.2)))
+model.add(LSTM(HIDDEN_SIZE, dropout=0.2, recurrent_dropout=0.2))
+#model.add(GRU(HIDDEN_SIZE, dropout=0.2, recurrent_dropout=0.2))
+#model.add(Bidirectional(LSTM(HIDDEN_SIZE, dropout=0.2, recurrent_dropout=0.2)))
+model.add(RepeatVector(MAX_SEQLEN))
+model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
+#model.add(GRU(HIDDEN_SIZE, return_sequences=True))
+#model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True)))
+model.add(TimeDistributed(Dense(len(t_pos2id))))
+model.add(Activation("softmax"))
+
+model.compile(loss="categorical_crossentropy", optimizer="adam",
+             metrics=["accuracy"])
+             
+history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
+                    validation_data=[Xtest, Ytest])
+
+# plot loss and accuracy
+plt.subplot(211)
+plt.title("Accuracy")
+plt.plot(history.history["acc"], color="g", label="Train")
+plt.plot(history.history["val_acc"], color="b", label="Validation")
+plt.legend(loc="best")
+
+plt.subplot(212)
+plt.title("Loss")
+plt.plot(history.history["loss"], color="g", label="Train")
+plt.plot(history.history["val_loss"], color="b", label="Validation")
+plt.legend(loc="best")
+
+plt.tight_layout()
+plt.show()
+                    
+# evaluate model
+score, acc = model.evaluate(Xtest, Ytest, batch_size=BATCH_SIZE)
+print("Test score: %.3f, accuracy: %.3f" % (score, acc))
+
+# custom evaluate
+hit_rates = []
+num_iters = Xtest.shape[0] // BATCH_SIZE
+for i in range(num_iters - 1):
+    xtest = Xtest[i * BATCH_SIZE : (i + 1) * BATCH_SIZE]
+    ytest = np.argmax(Ytest[i * BATCH_SIZE : (i + 1) * BATCH_SIZE], axis=2)
+    ytest_ = np.argmax(model.predict(xtest), axis=2)
+#    print(ytest.shape, ytest_.shape)
+    for j in range(BATCH_SIZE):
+#        print("sentence:  " + " ".join([s_id2word[x] for x in xtest[j].tolist()]))
+#        print("predicted: " + " ".join([t_id2pos[y] for y in ytest_[j].tolist()]))
+#        print("label:     " + " ".join([t_id2pos[y] for y in ytest[j].tolist()]))
+        word_indices = np.nonzero(xtest[j])
+        pos_labels = ytest[j][word_indices]
+        pos_pred = ytest_[j][word_indices]
+        hit_rates.append(np.sum(pos_labels == pos_pred) / len(pos_pred))
+    break
+
+accuracy = sum(hit_rates) / len(hit_rates)
+print("accuracy: {:.3f}".format(accuracy))        
+
+# prediction
+pred_ids = np.random.randint(0, 3000, 5)
+for pred_id in pred_ids:
+    xtest = Xtest[pred_id].reshape(1, 10)
+    ytest_ = np.argmax(model.predict(xtest), axis=2)
+    ytest = np.argmax(Ytest[pred_id], axis=1)
+    print("sentence:  " + " ".join([s_id2word[x] for x in xtest[0].tolist()]))
+    print("predicted: " + " ".join([t_id2pos[y] for y in ytest_[0].tolist()]))
+    print("label:     " + " ".join([t_id2pos[y] for y in ytest.tolist()]))
+    word_indices = np.nonzero(xtest)[1]
+    ypred_tags = ytest_[0][word_indices]
+    ytrue_tags = ytest[word_indices]
+    hit_rate = np.sum(ypred_tags == ytrue_tags) / len(ypred_tags)
+    print("hit rate: {:.3f}".format(hit_rate))
+    print()
diff --git a/Chapter06/pos_tagging_data.py b/Chapter06/pos_tagging_data.py
new file mode 100644
index 0000000..2cd14ea
--- /dev/null
+++ b/Chapter06/pos_tagging_data.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+# Copied from: Out of core classification of Text Documents
+# from the scikit-learn documentation.
+# http://scikit-learn.org/stable/auto_examples/applications/plot_out_of_core_classification.html
+#
+from __future__ import division, print_function
+from sklearn.externals.six.moves import html_parser
+from glob import glob
+import collections
+import nltk
+import os
+import re
+
+class ReutersParser(html_parser.HTMLParser):
+    """ Utility class to parse a SGML file and yield documents one at 
+        a time. 
+    """
+    def __init__(self, encoding='latin-1'):
+        html_parser.HTMLParser.__init__(self)
+        self._reset()
+        self.encoding = encoding
+
+    def handle_starttag(self, tag, attrs):
+        method = 'start_' + tag
+        getattr(self, method, lambda x: None)(attrs)
+
+    def handle_endtag(self, tag):
+        method = 'end_' + tag
+        getattr(self, method, lambda: None)()
+
+    def _reset(self):
+        self.in_title = 0
+        self.in_body = 0
+        self.in_topics = 0
+        self.in_topic_d = 0
+        self.title = ""
+        self.body = ""
+        self.topics = []
+        self.topic_d = ""
+
+    def parse(self, fd):
+        self.docs = []
+        for chunk in fd:
+            self.feed(chunk.decode(self.encoding))
+            for doc in self.docs:
+                yield doc
+            self.docs = []
+        self.close()
+
+    def handle_data(self, data):
+        if self.in_body:
+            self.body += data
+        elif self.in_title:
+            self.title += data
+        elif self.in_topic_d:
+            self.topic_d += data
+
+    def start_reuters(self, attributes):
+        pass
+
+    def end_reuters(self):
+        self.body = re.sub(r'\s+', r' ', self.body)
+        self.docs.append({'title': self.title,
+                          'body': self.body,
+                          'topics': self.topics})
+        self._reset()
+
+    def start_title(self, attributes):
+        self.in_title = 1
+
+    def end_title(self):
+        self.in_title = 0
+
+    def start_body(self, attributes):
+        self.in_body = 1
+
+    def end_body(self):
+        self.in_body = 0
+
+    def start_topics(self, attributes):
+        self.in_topics = 1
+
+    def end_topics(self):
+        self.in_topics = 0
+
+    def start_d(self, attributes):
+        self.in_topic_d = 1
+
+    def end_d(self):
+        self.in_topic_d = 0
+        self.topics.append(self.topic_d)
+        self.topic_d = ""
+
+
+def stream_reuters_documents(reuters_dir):
+    """ Iterate over documents of the Reuters dataset.
+
+    The Reuters archive will automatically be downloaded and uncompressed if
+    the `data_path` directory does not exist.
+
+    Documents are represented as dictionaries with 'body' (str),
+    'title' (str), 'topics' (list(str)) keys.
+
+    """
+    parser = ReutersParser()
+    for filename in glob(os.path.join(reuters_dir, "*.sgm")):
+        for doc in parser.parse(open(filename, 'rb')):
+            yield doc
+
+
+##################### main ######################
+
+DATA_DIR = "../data"
+REUTERS_DIR = os.path.join(DATA_DIR, "reuters-21578")
+
+num_read = 0
+num_sents = 0
+
+fsent = open(os.path.join(DATA_DIR, "reuters-sent.txt"), "wb")
+fpos  = open(os.path.join(DATA_DIR, "reuters-pos.txt"), "wb")
+tagger = nltk.tag.PerceptronTagger()
+
+for doc in stream_reuters_documents(REUTERS_DIR):
+    # skip docs without specified topic
+    topics = doc["topics"]
+    if len(topics) == 0:
+        continue
+    title = doc["title"]
+    body = doc["body"]
+    sents = nltk.sent_tokenize(body)
+    for sent in sents:
+        if num_sents % 100 == 0:
+            print("{:d} sentences written".format(num_sents))
+        if len(sent) <= 20:
+            continue
+        sent = sent.encode("utf8").decode("ascii", "ignore")
+        words = nltk.word_tokenize(sent)
+        fsent.write("{:s}\n".format(" ".join(words)))
+        tokentags = nltk.tag._pos_tag(words, None, tagger)
+        fpos.write("{:s}\n".format(" ".join([x[1] for x in tokentags])))
+        num_sents += 1
+
+fsent.close()
+fpos.close()
+print("{:d} sentences written, COMPLETE".format(num_sents))
diff --git a/Chapter06/pos_tagging_gru.py b/Chapter06/pos_tagging_gru.py
new file mode 100644
index 0000000..9dc7b6e
--- /dev/null
+++ b/Chapter06/pos_tagging_gru.py
@@ -0,0 +1,196 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, print_function
+from keras.layers.core import Activation, Dense, RepeatVector
+from keras.layers.embeddings import Embedding
+from keras.layers.recurrent import GRU, LSTM
+from keras.layers.wrappers import TimeDistributed, Bidirectional
+from keras.metrics import top_k_categorical_accuracy
+from keras.models import Sequential
+from keras.optimizers import Adam
+from keras.preprocessing import sequence
+from keras.utils import np_utils
+from sklearn.model_selection import train_test_split
+import collections
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+
+def parse_sentences(filename):
+    sents = []
+    word_freqs = collections.Counter()
+    fin = open(filename, "rb")
+    for line in fin:
+        words = line.strip().lower().split()
+        for word in words:
+            word_freqs[word] += 1
+        sents.append(words)
+    fin.close()
+    return sents, word_freqs
+
+def get_or_else(dictionary, key, default_value):
+    try:
+        return dictionary[key]
+    except KeyError:
+        return default_value
+        
+def generate_batch(s_sents, s_word2index, t_sents, t_word2index, 
+                   batch_size, maxlen):
+    while True:
+        # shuffle the input
+        indices = np.random.permutation(np.arange(len(s_sents)))
+        ss_sents = [s_sents[ix] for ix in indices]
+        ts_sents = [t_sents[ix] for ix in indices]
+        # convert to word indices
+        si_sents = [[get_or_else(s_word2index, word, s_word2index["UNK"]) 
+                    for word in sent] 
+                    for sent in ss_sents]
+        ti_sents = [[t_word2index[word] for word in sent]
+                    for sent in ts_sents]
+        # inner loop should run for an epoch
+        num_batches = len(s_sents) // batch_size
+        for i in range(num_batches):
+            s_batch = si_sents[i * batch_size : (i + 1) * batch_size]
+            t_batch = ti_sents[i * batch_size : (i + 1) * batch_size]
+            sp_batch = sequence.pad_sequences(s_batch, maxlen=maxlen)
+            tp_batch = sequence.pad_sequences(t_batch, maxlen=maxlen)
+            tpc_batch = np_utils.to_categorical(tp_batch.reshape(-1, 1), 
+                num_classes=len(t_word2index)).reshape(batch_size, 
+                -1, len(t_word2index))
+            yield sp_batch, tpc_batch
+            
+    
+def top_3_categorical_accuracy(ytrue, ypred):
+    return top_k_categorical_accuracy(ytrue, ypred, k=3)
+    
+
+########################## main ##########################
+
+DATA_DIR = "../data"
+
+# data exploration, set constants
+s_sents, s_wordfreqs = parse_sentences(os.path.join(DATA_DIR, "reuters-sent.txt"))
+t_sents, t_wordfreqs = parse_sentences(os.path.join(DATA_DIR, "reuters-pos.txt"))
+sent_lengths = np.array([len(sent) for sent in s_sents])
+
+print("# records: {:d}".format(len(s_sents)))
+print("# unique words: {:d}".format(len(s_wordfreqs)))
+print("# unique POS tags: {:d}".format(len(t_wordfreqs)))
+print("# words/sentence: min: {:d}, max: {:d}, mean: {:.3f}, median: {:.0f}"
+      .format(np.min(sent_lengths), np.max(sent_lengths),
+              np.mean(sent_lengths), np.median(sent_lengths)))
+
+## records: 103126
+## unique words: 67749
+## unique POS tags: 44
+## words/sentence: min: 3, max: 429, mean: 26.694, median: 26
+## np.where(sent_lengths <= 50)[0].shape
+## (100343,)
+## Gives rise to the following constants
+
+MAX_SEQLEN = 50
+S_MAX_FEATURES = 50000
+T_MAX_FEATURES = 45
+
+EMBED_SIZE = 300
+HIDDEN_SIZE = 100
+
+BATCH_SIZE = 64
+
+# run for 1000 epochs, show sample results every 50
+NUM_EPOCHS = 50
+NUM_ITERATIONS = 20
+
+# lookup tables
+s_vocabsize = min(len(s_wordfreqs), S_MAX_FEATURES) + 2
+s_word2index = {x[0]:i+2 for i, x in 
+    enumerate(s_wordfreqs.most_common(S_MAX_FEATURES))}
+s_word2index["PAD"] = 0
+s_word2index["UNK"] = 1
+s_index2word = {v:k for k, v in s_word2index.items()}
+
+t_vocabsize = len(t_wordfreqs) + 1
+t_word2index = {x[0]:i+1 for i, x in 
+    enumerate(t_wordfreqs.most_common(T_MAX_FEATURES))}
+t_word2index["PAD"] = 0
+t_index2word = {v:k for k, v in t_word2index.items()}
+
+# split into train and test
+test_size = int(0.3 * len(s_sents))
+s_sents_train, s_sents_test = s_sents[0:-test_size], s_sents[-test_size:]
+t_sents_train, t_sents_test = t_sents[0:-test_size], t_sents[-test_size:]
+train_gen = generate_batch(s_sents_train, s_word2index, t_sents_train,
+                           t_word2index, BATCH_SIZE, MAX_SEQLEN)
+test_gen = generate_batch(s_sents_test, s_word2index, t_sents_test,
+                          t_word2index, BATCH_SIZE, MAX_SEQLEN)
+print(len(s_sents_train), len(s_sents_test))
+
+# define network
+model = Sequential()
+model.add(Embedding(s_vocabsize, EMBED_SIZE,
+                    input_length=MAX_SEQLEN,
+                    embeddings_initializer="glorot_uniform"))
+#model.add(GRU(HIDDEN_SIZE)) 
+model.add(LSTM(HIDDEN_SIZE))
+#model.add(Bidirectional(LSTM(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2)))
+model.add(RepeatVector(MAX_SEQLEN))
+#model.add(GRU(HIDDEN_SIZE, return_sequences=True))
+model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
+#model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True)))
+model.add(TimeDistributed(Dense(t_vocabsize)))
+model.add(Activation("softmax"))
+
+model.compile(loss="categorical_crossentropy", optimizer="adam",
+              metrics=["accuracy"])
+
+num_train_samples = len(s_sents_train) // BATCH_SIZE
+num_test_samples = len(s_sents_test) // BATCH_SIZE
+
+hist_acc, hist_val_acc, hist_loss, hist_val_loss = [], [], [], []
+for i in range(NUM_ITERATIONS):    
+    history = model.fit_generator(train_gen, 
+                                  steps_per_epoch=num_train_samples, 
+                                  epochs=NUM_EPOCHS,
+                                  validation_data=test_gen, 
+                                  validation_steps=num_test_samples)
+    # save off history data
+    hist_acc.extend(history.history["acc"])
+    hist_val_acc.extend(history.history["val_acc"])
+    hist_loss.extend(history.history["loss"])
+    hist_val_loss.extend(history.history["val_loss"])
+    # show some predictions
+    Xtest, Ytest = test_gen.next()
+    Ytest_ = model.predict(Xtest)
+    ytest = np.argmax(Ytest, axis=2)
+    ytest_ = np.argmax(Ytest_, axis=2)
+    print("=" * 80)
+    print("Iteration # {:d}".format(i + 1))
+    print("-" * 80)
+    for i in range(min(5, Ytest.shape[0])):
+        sent_ids = Xtest[i]
+        sent_words = [s_index2word[x] for x in sent_ids.tolist()]
+        pos_labels = [t_index2word[x] for x in ytest[i].tolist()]
+        pos_preds = [t_index2word[x] for x in ytest_[i].tolist()]
+        triples = [x for x in zip(sent_words, pos_labels, pos_preds)
+            if x[0] != "PAD"]
+        print("label:     " + " ".join([x[0] + x[1].upper() 
+            for x in triples]))
+        print("predicted: " + " ".join([x[0] + x[2].upper() 
+            for x in triples]))
+        print("-" * 80)
+
+# plot loss and accuracy
+plt.subplot(211)
+plt.title("Accuracy")
+plt.plot(hist_acc, color="g", label="Train")
+plt.plot(hist_val_acc, color="b", label="Validation")
+plt.legend(loc="best")
+
+plt.subplot(212)
+plt.title("Loss")
+plt.plot(hist_loss, color="g", label="Train")
+plt.plot(hist_val_loss, color="b", label="Validation")
+plt.legend(loc="best")
+
+plt.tight_layout()
+plt.show()
+
diff --git a/Chapter06/umich_sentiment_lstm.py b/Chapter06/umich_sentiment_lstm.py
new file mode 100644
index 0000000..38f5410
--- /dev/null
+++ b/Chapter06/umich_sentiment_lstm.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+from keras.layers.core import Activation, Dense, Dropout, SpatialDropout1D
+from keras.layers.embeddings import Embedding
+from keras.layers.recurrent import LSTM
+from keras.models import Sequential
+from keras.preprocessing import sequence
+from sklearn.model_selection import train_test_split
+import collections
+import matplotlib.pyplot as plt
+import nltk
+import numpy as np
+import os
+
+DATA_DIR = "../data"
+
+MAX_FEATURES = 2000
+MAX_SENTENCE_LENGTH = 40
+
+EMBEDDING_SIZE = 128
+HIDDEN_LAYER_SIZE = 64
+BATCH_SIZE = 32
+NUM_EPOCHS = 10
+
+# Read training data and generate vocabulary
+maxlen = 0
+word_freqs = collections.Counter()
+num_recs = 0
+ftrain = open(os.path.join(DATA_DIR, "umich-sentiment-train.txt"), 'rb')
+for line in ftrain:
+    label, sentence = line.strip().split("\t")
+    words = nltk.word_tokenize(sentence.decode("ascii", "ignore").lower())
+    if len(words) > maxlen:
+        maxlen = len(words)
+    for word in words:
+        word_freqs[word] += 1
+    num_recs += 1
+ftrain.close()
+
+## Get some information about our corpus
+#print maxlen            # 42
+#print len(word_freqs)   # 2313
+
+# 1 is UNK, 0 is PAD
+# We take MAX_FEATURES-1 featurs to accound for PAD
+vocab_size = min(MAX_FEATURES, len(word_freqs)) + 2
+word2index = {x[0]: i+2 for i, x in 
+                enumerate(word_freqs.most_common(MAX_FEATURES))}
+word2index["PAD"] = 0
+word2index["UNK"] = 1
+index2word = {v:k for k, v in word2index.items()}
+
+# convert sentences to sequences
+X = np.empty((num_recs, ), dtype=list)
+y = np.zeros((num_recs, ))
+i = 0
+ftrain = open(os.path.join(DATA_DIR, "umich-sentiment-train.txt"), 'rb')
+for line in ftrain:
+    label, sentence = line.strip().split("\t")
+    words = nltk.word_tokenize(sentence.decode("ascii", "ignore").lower())
+    seqs = []
+    for word in words:
+        if word2index.has_key(word):
+            seqs.append(word2index[word])
+        else:
+            seqs.append(word2index["UNK"])
+    X[i] = seqs
+    y[i] = int(label)
+    i += 1
+ftrain.close()
+
+# Pad the sequences (left padded with zeros)
+X = sequence.pad_sequences(X, maxlen=MAX_SENTENCE_LENGTH)
+
+# Split input into training and test
+Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, 
+                                                random_state=42)
+print(Xtrain.shape, Xtest.shape, ytrain.shape, ytest.shape)
+
+# Build model
+model = Sequential()
+model.add(Embedding(vocab_size, EMBEDDING_SIZE, 
+                    input_length=MAX_SENTENCE_LENGTH))
+model.add(SpatialDropout1D(Dropout(0.2)))
+model.add(LSTM(HIDDEN_LAYER_SIZE, dropout=0.2, recurrent_dropout=0.2))
+model.add(Dense(1))
+model.add(Activation("sigmoid"))
+
+model.compile(loss="binary_crossentropy", optimizer="adam", 
+              metrics=["accuracy"])
+
+history = model.fit(Xtrain, ytrain, batch_size=BATCH_SIZE, 
+                    epochs=NUM_EPOCHS,
+                    validation_data=(Xtest, ytest))
+
+# plot loss and accuracy
+plt.subplot(211)
+plt.title("Accuracy")
+plt.plot(history.history["acc"], color="g", label="Train")
+plt.plot(history.history["val_acc"], color="b", label="Validation")
+plt.legend(loc="best")
+
+plt.subplot(212)
+plt.title("Loss")
+plt.plot(history.history["loss"], color="g", label="Train")
+plt.plot(history.history["val_loss"], color="b", label="Validation")
+plt.legend(loc="best")
+
+plt.tight_layout()
+plt.show()
+
+# evaluate
+score, acc = model.evaluate(Xtest, ytest, batch_size=BATCH_SIZE)
+print("Test score: %.3f, accuracy: %.3f" % (score, acc))
+
+for i in range(5):
+    idx = np.random.randint(len(Xtest))
+    xtest = Xtest[idx].reshape(1,40)
+    ylabel = ytest[idx]
+    ypred = model.predict(xtest)[0][0]
+    sent = " ".join([index2word[x] for x in xtest[0].tolist() if x != 0])
+    print("%.0f\t%d\t%s" % (ypred, ylabel, sent))