Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

New problem: mathematical language understanding #1290

Merged
merged 14 commits into from
Dec 11, 2018
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
# of contributors, see the revision history in source control.

Google Inc.
Artit Wangperawong
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pip install tensor2tensor && t2t-trainer \
### Contents

* [Suggested Datasets and Models](#suggested-datasets-and-models)
* [Mathematical Language Understanding](#mathematical-language-understanding)
* [Story, Question and Answer](#story-question-and-answer)
* [Image Classification](#image-classification)
* [Image Generation](#image-generation)
Expand Down Expand Up @@ -79,6 +80,24 @@ hyperparameters that we know works well in our setup. We usually
run either on Cloud TPUs or on 8-GPU machines; you might need
to modify the hyperparameters if you run on a different setup.

### Mathematical Language Understanding

For evaluating mathematical expressions at the character level involving addition, subtraction and multiplication of both positive and negative decimal numbers with variable digits assigned to symbolic variables, use

* the [MLU](https://art.wangperawong.com/mathematical_language_understanding_train.tar.gz) data-set:
`--problem=mathematical_language_understanding`

You can try solving the problem with different transformer models and hyperparameters as described in the [paper](https://arxiv.org/abs/1812.02825):
* Standard transformer:
`--model=transformer`
`--hparams_set=transformer_tiny`
* Universal transformer:
`--model=universal_transformer`
`--hparams_set=universal_transformer_tiny`
* Adaptive universal transformer:
`--model=universal_transformer`
`--hparams_set=adaptive_universal_transformer_tiny`

### Story, Question and Answer

For answering questions based on a story, use
Expand Down Expand Up @@ -464,5 +483,6 @@ T2T](https://research.googleblog.com/2017/06/accelerating-deep-learning-research
* [Fast Decoding in Sequence Models using Discrete Latent Variables](https://arxiv.org/abs/1803.03382)
* [Adafactor: Adaptive Learning Rates with Sublinear Memory Cost](https://arxiv.org/abs/1804.04235)
* [Universal Transformers](https://arxiv.org/abs/1807.03819)
* [Attending to Mathematical Language with Transformers](https://arxiv.org/abs/1812.02825)

*Note: This is not an official Google product.*
1 change: 1 addition & 0 deletions tensor2tensor/data_generators/all_problems.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"tensor2tensor.data_generators.lm1b",
"tensor2tensor.data_generators.lm1b_imdb",
"tensor2tensor.data_generators.lm1b_mnli",
"tensor2tensor.data_generators.mathematical_language_understanding",
"tensor2tensor.data_generators.mnist",
"tensor2tensor.data_generators.mrpc",
"tensor2tensor.data_generators.mscoco",
Expand Down
5 changes: 2 additions & 3 deletions tensor2tensor/data_generators/babi_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def _prepare_babi_data(tmp_dir, data_dir):
tf.gfile.MakeDirs(data_dir)

file_path = os.path.join(tmp_dir, _TAR)
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"} # pylint: disable=line-too-long
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
resp = requests.get(_URL, headers=headers)
with open(file_path, "wb") as f:
with open(file_path, 'wb') as f:
f.write(resp.content)

tar = tarfile.open(file_path)
Expand Down Expand Up @@ -459,7 +459,6 @@ def hparams(self, defaults, unused_model_hparams):
if "context" in p.vocab_size:
del p.vocab_size["context"]


def _problems_to_register():
"""Problems for which we want to create datasets.

Expand Down
104 changes: 104 additions & 0 deletions tensor2tensor/data_generators/mathematical_language_understanding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# coding=utf-8
# Copyright 2018 Artit Wangperawong artitw@gmail.com
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

r"""Data generators for the Mathematical Language Understanding dataset.

The training and test data were generated by assigning symbolic variables
either positive or negative decimal integers and then describing the algebraic
operation to perform. We restrict our variable assignments to the range
x,y->[-1000,1000) and the operations to the set {+,-,*}. To ensure that the
model embraces symbolic variables, the order in which x and y appears in the
expression is randomly chosen. For instance, an input string contrasting from
the example shown above might be y=129,x=531,x-y. Each input string is
accompanied by its target string, which is the evaluation of the mathematical
expression. For this study, all targets considered are decimal integers
represented at the character level. About 12 million unique samples were thus
generated and randomly split into training and test sets at an approximate
ratio of 9:1, respectively.

For more information check the following paper:
Artit Wangperawong. Attending to Mathematical Language with Transformers,
arXiv:1812.02825.
Available at: https://arxiv.org/abs/1812.02825

"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

from tensor2tensor.data_generators import generator_utils
from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_problems
from tensor2tensor.utils import registry

import tensorflow as tf

@registry.register_problem
class MathematicalLanguageUnderstanding(text_problems.Text2TextProblem):
URL = "https://art.wangperawong.com/mathematical_language_understanding_train.tar.gz"

@property
def vocab_type(self):
return text_problems.VocabType.CHARACTER

@property
def dataset_splits(self):
return [{
"split": problem.DatasetSplit.TRAIN,
"shards": 10,
}, {
"split": problem.DatasetSplit.EVAL,
"shards": 1,
}]

@property
def is_generate_per_split(self):
return False

def generate_samples(self, data_dir, tmp_dir, dataset_split):
"""Downloads and extracts the dataset and generates examples

Args:
tmp_dir: temp directory to download and extract the dataset
data_dir: The base directory where data and vocab files are stored.

Returns:
data generator
"""

if not tf.gfile.Exists(tmp_dir):
tf.gfile.MakeDirs(tmp_dir)

if not tf.gfile.Exists(data_dir):
tf.gfile.MakeDirs(data_dir)

# Download and extract
compressed_filename = os.path.basename(self.URL)
download_path = generator_utils.maybe_download(tmp_dir, compressed_filename,
self.URL)

with tarfile.open(download_path, "r:gz") as tar:
tar.extractall(tmp_dir)

filepath = os.path.join(tmp_dir, "mathematical_language_understanding_train.txt")

with open(filepath, 'r') as fp:
for l in fp:
prob, ans = l.strip().split(':')
yield {"inputs": prob, "targets": ans}

3 changes: 1 addition & 2 deletions tensor2tensor/models/research/universal_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,7 @@ def _greedy_infer(self, features, decode_length, use_tpu=False):
return (self._slow_greedy_infer_tpu(features, decode_length) if use_tpu else
self._slow_greedy_infer(features, decode_length))

def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha,
use_tpu=False):
def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha, use_tpu=False):
"""Beam search decoding.

Args:
Expand Down