Skip to content

Commit 869763a

Browse files
author
cchyun
committed
pytorch_pretrained_BERT add
1 parent 076f298 commit 869763a

10 files changed

+4348
-0
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import os
2+
import logging
3+
import json
4+
import collections
5+
6+
import examples.run_squad as run_squad
7+
from pytorch_pretrained_bert.tokenization import BertTokenizer
8+
9+
10+
if __name__ == "__main__":
11+
logging.basicConfig(level=logging.DEBUG)
12+
13+
input_file = "./samples/SQuAD/train-simple.json"
14+
# input_file = "./samples/KorQuAD/KorQuAD_simple_train.json"
15+
bert_model = "bert-base-multilingual-cased"
16+
do_lower_case = False
17+
max_seq_length = 128
18+
doc_stride = 128
19+
max_query_length = 64
20+
21+
train_examples = run_squad.read_squad_examples(input_file=input_file, is_training=True, version_2_with_negative=False)
22+
23+
tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=do_lower_case)
24+
25+
run_squad.convert_examples_to_features(
26+
examples=train_examples,
27+
tokenizer=tokenizer,
28+
max_seq_length=max_seq_length,
29+
doc_stride=doc_stride,
30+
max_query_length=max_query_length,
31+
is_training=True)
32+

codes/pytorch-pretrained-BERT/extract_features.ipynb

Lines changed: 411 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)