-
Notifications
You must be signed in to change notification settings - Fork 0
/
examples.py
51 lines (38 loc) · 3.16 KB
/
examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from lib.runners.asr_probing import *
from lib.runners.nlp_probing import *
def main():
import numpy as np
##simple ASR example
# SimpleASRPipeline()(model2probe = Wav2Vec2Prober, morph_call = "common_voice", model_path = None,
# save_checkpoints = False, dataset_language = ["ru"], dataset_name = None,
# preprocessing_fn=prepare_probing_task_,
# features = ['gender'], layers = list(np.arange(1, 3, 1)), dataset_split= "test",
# tokenizer= Wav2Vec2OProcessor, data_path= "SP", device = torch.device('cuda'), data_column = "data", revision = "1.10.0")
##simple nlp examples
# SimpleNLPPipeline(model2probe = T5Prober, dataset_name = "t5", ##important!!!!
# model_path = None,
# dataset_type = "person", save_checkpoints = True, download_data = True,
# checkpoint_path = torch.load("t5small.pth").state_dict(),
# feature = 'label', layers = list(np.arange(1, 5, 1)),
# tokenizer= T5Processor, data_path= "person.tsv", device = torch.device('cuda'), data_column = "text")
# SimpleNLPPipeline(model2probe = T5EncoderDecoderProber, dataset_name = "t5",
# model_path = None,
# dataset_type = "person", save_checkpoints = False, download_data = False,
# # checkpoint_path = torch.load("t5small.pth").state_dict(),
# feature = 'label', layers = {"encoder": list(np.arange(1, 5, 1)), "decoder": list(np.arange(1, 5, 1))},
# tokenizer= T5Processor, data_path= "person.tsv", device = torch.device('cuda'), data_column = "text")
# SimpleNLPPipeline(model2probe = BertOProber, dataset_name = "lovethisdataset2", model_path = 'bert-base-cased',
# morph_call = "DiscoEval", save_checkpoints = False,
# feature = 'label', layers = list(np.arange(1, 3, 1)), dataset_split= "all",
# tokenizer= BertProcessor, data_path= "SP", device = torch.device('cuda'), data_column = "data")
# SimpleNLPPipeline(model2probe = BertOProber, dataset_name = "lovethisdataset2", model_path = 'bert-base-cased',
# checkpoint = "bert-base-cased",
# morph_call = "DiscoEval", save_checkpoints = False,
# feature = 'label', layers = list(np.arange(1, 3, 1)), dataset_split= "dev;test", download_data = True,
# tokenizer= BertProcessor, data_path= "DC", device = torch.device('cuda'), data_column = "data")
layers = list(np.arange(1, 25, 1))
SimpleNLPPipeline(model2probe = BertOProber, dataset_name = "top_constituents", model_path = "bert-large-cased",
morph_call = "senteval", save_checkpoints = False, dataset_split = "all", download_data=True, is_prepared=False,
feature = 'label', layers = layers,
tokenizer= BertProcessor, data_path= "top_constituents.txt", device = torch.device('cuda'), data_column = "data")
if __name__ == "__main__": main()