Skip to content

Paddle V4 - label_semantic_roles #10216

@sidgoyal78

Description

@sidgoyal78
class SemanticRoles(fluid.Program):

  def _predict(self, word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
    predicate_embedding = fluid.layers.embedding(
        input=predicate,
        size=[pred_dict_len, word_dim],
        dtype='float32',
        is_sparse=IS_SPARSE,
        param_attr='vemb')

    mark_embedding = fluid.layers.embedding(
        input=mark,
        size=[mark_dict_len, mark_dim],
        dtype='float32',
        is_sparse=self.is_sparse)

    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
    emb_layers = [
        fluid.layers.embedding(
            size=[word_dict_len, word_dim],
            input=x,
            param_attr=fluid.ParamAttr(
                name=embedding_name, trainable=False)) for x in word_input
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)

    hidden_0_layers = [
        fluid.layers.fc(input=emb, size=hidden_dim, act='tanh')
        for emb in emb_layers
    ]

    hidden_0 = fluid.layers.sums(input=hidden_0_layers)

    lstm_0 = fluid.layers.dynamic_lstm(
        input=hidden_0,
        size=hidden_dim,
        candidate_activation='relu',
        gate_activation='sigmoid',
        cell_activation='sigmoid')

    # stack L-LSTM and R-LSTM with direct edges
    input_tmp = [hidden_0, lstm_0]

    for i in range(1, depth):
        mix_hidden = fluid.layers.sums(input=[
            fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'),
            fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh')
        ])

        lstm = fluid.layers.dynamic_lstm(
            input=mix_hidden,
            size=hidden_dim,
            candidate_activation='relu',
            gate_activation='sigmoid',
            cell_activation='sigmoid',
            is_reverse=((i % 2) == 1))

        input_tmp = [mix_hidden, lstm]

    feature_out = fluid.layers.sums(input=[
        fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'),
        fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh')
    ])

    return feature_out


  @network("word_data", "verb_data", "ctx_n2_data", "ctx_n1_data", "ctx_0_data",
           "ctx_p1_data", "ctx_p2_data", "mark_data", "target")
  def train_step(self):
      word = fluid.layers.data(
        name='word_data', shape=[1], dtype='int64', lod_level=1)
      predicate = fluid.layers.data(
        name='verb_data', shape=[1], dtype='int64', lod_level=1)
      ctx_n2 = fluid.layers.data(
        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
      ctx_n1 = fluid.layers.data(
        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
      ctx_0 = fluid.layers.data(
        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
      ctx_p1 = fluid.layers.data(
        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
      ctx_p2 = fluid.layers.data(
        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
      mark = fluid.layers.data(
        name='mark_data', shape=[1], dtype='int64', lod_level=1)

      with fluid.var_scope("predict"):
         feature_out = self._predict(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark)

      crf_cost = fluid.layers.linear_chain_crf(
        input=feature_out,
        label=target,
        param_attr=fluid.ParamAttr(
            name='crfw', learning_rate=mix_hidden_lr))
      avg_cost = fluid.layers.mean(crf_cost)

      sgd_optimizer = fluid.optimizer.SGD(
        learning_rate=fluid.layers.exponential_decay(
            learning_rate=0.01,
            decay_steps=100000,
            decay_rate=0.5,
            staircase=True))

      sgd_optimizer.minimize(avg_cost)
      return avg_cost

  @network("word_data", "verb_data", "ctx_n2_data", "ctx_n1_data", "ctx_0_data",
           "ctx_p1_data", "ctx_p2_data", "mark_data")
  def infer(self):
      word = fluid.layers.data(
        name='word_data', shape=[1], dtype='int64', lod_level=1)
      predicate = fluid.layers.data(
        name='verb_data', shape=[1], dtype='int64', lod_level=1)
      ctx_n2 = fluid.layers.data(
        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
      ctx_n1 = fluid.layers.data(
        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
      ctx_0 = fluid.layers.data(
        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
      ctx_p1 = fluid.layers.data(
        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
      ctx_p2 = fluid.layers.data(
        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
      mark = fluid.layers.data(
        name='mark_data', shape=[1], dtype='int64', lod_level=1)

      with fluid.var_scope("predict"):
         feature_out = self._predict(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark)
      crf_decode = fluid.layers.crf_decoding(
        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
      return crf_decode

label_semantic_role = SemanticRoles().Compile()
train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192),
        batch_size=BATCH_SIZE)

for epoch_id in range(NUM_EPOCHS):
    for data in train_reader():
        avg_cost = label_semantic_role.train_step() # TODO: split data

label = label_semantic_role.infer() #TODO

Metadata

Metadata

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions