diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index c264dfe462aae7..79f0652e192f2a 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -18,21 +18,17 @@ jobs: name: Benchmark runs-on: group: aws-g5-4xlarge-cache + if: | + (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )|| + (github.event_name == 'push' && github.ref == 'refs/heads/main') container: image: huggingface/transformers-pytorch-gpu options: --gpus all --privileged --ipc host steps: - name: Get repo - if: github.event_name == 'pull_request' uses: actions/checkout@v4 with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Get repo - if: github.event_name == 'push' - uses: actions/checkout@v4 - with: - ref: ${{ github.sha }} + ref: ${{ github.event.pull_request.head.sha || github.sha }} - name: Install libpq-dev & psql run: | diff --git a/scripts/deberta_scrtipt.py b/scripts/deberta_scrtipt.py new file mode 100644 index 00000000000000..b910d8de3f52b5 --- /dev/null +++ b/scripts/deberta_scrtipt.py @@ -0,0 +1,82 @@ +import torch +from transformers import pipeline, AutoTokenizer, AutoModel, AutoModelForMaskedLM +import time + +test_sentence = 'Do you [MASK] the muffin man?' + +# for comparison +bert = pipeline('fill-mask', model = 'bert-base-uncased') +print('\n'.join([d['sequence'] for d in bert(test_sentence)])) + + +deberta = pipeline('fill-mask', model = 'microsoft/deberta-v3-base', model_kwargs={"legacy": False}) +print('\n'.join([d['sequence'] for d in deberta(test_sentence)])) + + +tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base") + +tokenized_dict = tokenizer( + ["Is this working",], ["Not yet",], + return_tensors="pt" +) + +deberta.model.forward = torch.compile(deberta.model.forward) +start=time.time() +deberta.model(**tokenized_dict) +end=time.time() +print(end-start) + + +start=time.time() +deberta.model(**tokenized_dict) +end=time.time() +print(end-start) + + +start=time.time() +deberta.model(**tokenized_dict) +end=time.time() +print(end-start) + + +model = AutoModel.from_pretrained('microsoft/deberta-base') +model.config.return_dict = False +model.config.output_hidden_states=False +input_tuple = (tokenized_dict['input_ids'], tokenized_dict['attention_mask']) + + +start=time.time() +traced_model = torch.jit.trace(model, input_tuple) +end=time.time() +print(end-start) + + +start=time.time() +traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask']) +end=time.time() +print(end-start) + + +start=time.time() +traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask']) +end=time.time() +print(end-start) + + +start=time.time() +traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask']) +end=time.time() +print(end-start) + + +start=time.time() +traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask']) +end=time.time() +print(end-start) + + +torch.jit.save(traced_model, "compiled_deberta.pt") + + + +# my_script_module = torch.jit.script(model)