Skip to content

Commit 59371fe

Browse files
authored
update example dataloader for transformers 4.31.x (#1125)
Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com>
1 parent 9f80e61 commit 59371fe

File tree

10 files changed

+150
-8
lines changed

10 files changed

+150
-8
lines changed

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_clm.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,22 @@ def compute_metrics(eval_preds):
536536
else None,
537537
)
538538

539+
eval_dataloader = trainer.get_eval_dataloader()
540+
# transformer issue #1
541+
# for transformers 4.31.0: accelerate dataloader
542+
# *** ValueError: batch_size attribute should not be set
543+
# after DataLoaderShard is initialized
544+
if eval_dataloader.batch_size is None:
545+
def _build_inc_dataloader(dataloader):
546+
class INCDataLoader:
547+
__iter__ = dataloader.__iter__
548+
def __init__(self) -> None:
549+
self.dataloader = dataloader
550+
self.batch_size = dataloader.total_batch_size
551+
return INCDataLoader()
552+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
553+
batch_size = eval_dataloader.batch_size
554+
539555
# Tune
540556
def eval_func_for_nc(model_tuned):
541557
trainer.model = model_tuned
@@ -564,7 +580,7 @@ def eval_func_for_nc(model_tuned):
564580
conf = PostTrainingQuantConfig(accuracy_criterion=accuracy_criterion)
565581
q_model = quantization.fit(model,
566582
conf,
567-
calib_dataloader=trainer.get_eval_dataloader(),
583+
calib_dataloader=eval_dataloader,
568584
eval_func=eval_func_for_nc)
569585
q_model.save(training_args.output_dir)
570586
exit(0)
@@ -582,7 +598,7 @@ def eval_func_for_nc(model_tuned):
582598
from neural_compressor.config import BenchmarkConfig
583599
from neural_compressor import benchmark
584600
b_conf = BenchmarkConfig(warmup=5, iteration=100, cores_per_instance=4, num_of_instance=1)
585-
benchmark.fit(new_model, b_conf, b_dataloader=trainer.get_eval_dataloader())
601+
benchmark.fit(new_model, b_conf, b_dataloader=eval_dataloader)
586602
else:
587603
eval_func_for_nc(new_model)
588604

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_weight_only/run_clm.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,22 @@ def compute_metrics(eval_preds):
552552
else None,
553553
)
554554

555+
eval_dataloader = trainer.get_eval_dataloader()
556+
# transformer issue #1
557+
# for transformers 4.31.0: accelerate dataloader
558+
# *** ValueError: batch_size attribute should not be set
559+
# after DataLoaderShard is initialized
560+
if eval_dataloader.batch_size is None:
561+
def _build_inc_dataloader(dataloader):
562+
class INCDataLoader:
563+
__iter__ = dataloader.__iter__
564+
def __init__(self) -> None:
565+
self.dataloader = dataloader
566+
self.batch_size = dataloader.total_batch_size
567+
return INCDataLoader()
568+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
569+
batch_size = eval_dataloader.batch_size
570+
555571
# Tune
556572
def eval_func_for_nc(model_tuned):
557573
trainer.model = model_tuned
@@ -592,7 +608,7 @@ def eval_func_for_nc(model_tuned):
592608
op_type_dict=op_type_dict)
593609
q_model = quantization.fit(model,
594610
conf,
595-
calib_dataloader=trainer.get_eval_dataloader(),
611+
calib_dataloader=eval_dataloader,
596612
eval_func=eval_func_for_nc)
597613
q_model.save(training_args.output_dir)
598614
exit(0)
@@ -610,7 +626,7 @@ def eval_func_for_nc(model_tuned):
610626
from neural_compressor.config import BenchmarkConfig
611627
from neural_compressor import benchmark
612628
b_conf = BenchmarkConfig(warmup=5, iteration=100, cores_per_instance=4, num_of_instance=1)
613-
benchmark.fit(new_model, b_conf, b_dataloader=trainer.get_eval_dataloader())
629+
benchmark.fit(new_model, b_conf, b_dataloader=eval_dataloader)
614630
else:
615631
eval_func_for_nc(new_model)
616632

examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_qa.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,19 @@ def compute_metrics(p: EvalPrediction):
605605
)
606606

607607
eval_dataloader = trainer.get_eval_dataloader()
608+
# transformer issue #1
609+
# for transformers 4.31.0: accelerate dataloader
610+
# *** ValueError: batch_size attribute should not be set
611+
# after DataLoaderShard is initialized
612+
if eval_dataloader.batch_size is None:
613+
def _build_inc_dataloader(dataloader):
614+
class INCDataLoader:
615+
__iter__ = dataloader.__iter__
616+
def __init__(self) -> None:
617+
self.dataloader = dataloader
618+
self.batch_size = dataloader.total_batch_size
619+
return INCDataLoader()
620+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
608621
batch_size = eval_dataloader.batch_size
609622
metric_name = "eval_f1"
610623

examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,19 @@ def compute_metrics(p: EvalPrediction):
612612
)
613613

614614
eval_dataloader = trainer.get_eval_dataloader()
615+
# transformer issue #1
616+
# for transformers 4.31.0: accelerate dataloader
617+
# *** ValueError: batch_size attribute should not be set
618+
# after DataLoaderShard is initialized
619+
if eval_dataloader.batch_size is None:
620+
def _build_inc_dataloader(dataloader):
621+
class INCDataLoader:
622+
__iter__ = dataloader.__iter__
623+
def __init__(self) -> None:
624+
self.dataloader = dataloader
625+
self.batch_size = dataloader.total_batch_size
626+
return INCDataLoader()
627+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
615628
batch_size = eval_dataloader.batch_size
616629
metric_name = "eval_f1"
617630

examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_summarization.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,22 @@ def compute_metrics(eval_preds):
588588
)
589589
num_beams = data_args.num_beams if data_args.num_beams is not None else training_args.generation_num_beams
590590

591+
eval_dataloader = trainer.get_eval_dataloader()
592+
# transformer issue #1
593+
# for transformers 4.31.0: accelerate dataloader
594+
# *** ValueError: batch_size attribute should not be set
595+
# after DataLoaderShard is initialized
596+
if eval_dataloader.batch_size is None:
597+
def _build_inc_dataloader(dataloader):
598+
class INCDataLoader:
599+
__iter__ = dataloader.__iter__
600+
def __init__(self) -> None:
601+
self.dataloader = dataloader
602+
self.batch_size = dataloader.total_batch_size
603+
return INCDataLoader()
604+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
605+
batch_size = eval_dataloader.batch_size
606+
591607
def eval_func_for_nc(model):
592608
trainer.model = model
593609
results = trainer.evaluate(
@@ -616,7 +632,7 @@ def eval_func_for_nc(model):
616632
conf = PostTrainingQuantConfig(approach="dynamic")
617633
q_model = quantization.fit(model,
618634
conf,
619-
calib_dataloader=trainer.get_eval_dataloader(),
635+
calib_dataloader=eval_dataloader,
620636
eval_func=eval_func_for_nc)
621637
q_model.save(training_args.output_dir)
622638
exit(0)
@@ -634,7 +650,7 @@ def eval_func_for_nc(model):
634650
from neural_compressor.config import BenchmarkConfig
635651
from neural_compressor import benchmark
636652
b_conf = BenchmarkConfig(warmup=5, iteration=100, cores_per_instance=4, num_of_instance=1)
637-
benchmark.fit(new_model, b_conf, b_dataloader=trainer.get_eval_dataloader())
653+
benchmark.fit(new_model, b_conf, b_dataloader=eval_dataloader)
638654
else:
639655
eval_func_for_nc(new_model)
640656

examples/pytorch/nlp/huggingface_models/text-classification/export/fx/run_glue.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,19 @@ def compute_metrics(p: EvalPrediction):
494494
)
495495

496496
eval_dataloader = trainer.get_eval_dataloader()
497+
# transformer issue #1
498+
# for transformers 4.31.0: accelerate dataloader
499+
# *** ValueError: batch_size attribute should not be set
500+
# after DataLoaderShard is initialized
501+
if eval_dataloader.batch_size is None:
502+
def _build_inc_dataloader(dataloader):
503+
class INCDataLoader:
504+
__iter__ = dataloader.__iter__
505+
def __init__(self) -> None:
506+
self.dataloader = dataloader
507+
self.batch_size = dataloader.total_batch_size
508+
return INCDataLoader()
509+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
497510
batch_size = eval_dataloader.batch_size
498511

499512
def take_eval_steps(model, trainer, save_metrics=False):

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,19 @@ def compute_metrics(p: EvalPrediction):
479479
)
480480

481481
eval_dataloader = trainer.get_eval_dataloader()
482+
# transformer issue #1
483+
# for transformers 4.31.0: accelerate dataloader
484+
# *** ValueError: batch_size attribute should not be set
485+
# after DataLoaderShard is initialized
486+
if eval_dataloader.batch_size is None:
487+
def _build_inc_dataloader(dataloader):
488+
class INCDataLoader:
489+
__iter__ = dataloader.__iter__
490+
def __init__(self) -> None:
491+
self.dataloader = dataloader
492+
self.batch_size = dataloader.total_batch_size
493+
return INCDataLoader()
494+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
482495
batch_size = eval_dataloader.batch_size
483496

484497
def take_eval_steps(model, trainer, save_metrics=False):

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,19 @@ def compute_metrics(p: EvalPrediction):
479479
)
480480

481481
eval_dataloader = trainer.get_eval_dataloader()
482+
# transformer issue #1
483+
# for transformers 4.31.0: accelerate dataloader
484+
# *** ValueError: batch_size attribute should not be set
485+
# after DataLoaderShard is initialized
486+
if eval_dataloader.batch_size is None:
487+
def _build_inc_dataloader(dataloader):
488+
class INCDataLoader:
489+
__iter__ = dataloader.__iter__
490+
def __init__(self) -> None:
491+
self.dataloader = dataloader
492+
self.batch_size = dataloader.total_batch_size
493+
return INCDataLoader()
494+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
482495
batch_size = eval_dataloader.batch_size
483496

484497
def take_eval_steps(model, trainer, save_metrics=False):

examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,19 @@ def compute_metrics(p: EvalPrediction):
506506
early_stopping_threshold))
507507

508508
eval_dataloader = trainer.get_eval_dataloader()
509+
# transformer issue #1
510+
# for transformers 4.31.0: accelerate dataloader
511+
# *** ValueError: batch_size attribute should not be set
512+
# after DataLoaderShard is initialized
513+
if eval_dataloader.batch_size is None:
514+
def _build_inc_dataloader(dataloader):
515+
class INCDataLoader:
516+
__iter__ = dataloader.__iter__
517+
def __init__(self) -> None:
518+
self.dataloader = dataloader
519+
self.batch_size = dataloader.total_batch_size
520+
return INCDataLoader()
521+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
509522
batch_size = eval_dataloader.batch_size
510523

511524
def eval_func(model):

examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_translation.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,22 @@ def compute_metrics(eval_preds):
543543
compute_metrics=compute_metrics if training_args.predict_with_generate else None
544544
)
545545

546+
eval_dataloader = trainer.get_eval_dataloader()
547+
# transformer issue #1
548+
# for transformers 4.31.0: accelerate dataloader
549+
# *** ValueError: batch_size attribute should not be set
550+
# after DataLoaderShard is initialized
551+
if eval_dataloader.batch_size is None:
552+
def _build_inc_dataloader(dataloader):
553+
class INCDataLoader:
554+
__iter__ = dataloader.__iter__
555+
def __init__(self) -> None:
556+
self.dataloader = dataloader
557+
self.batch_size = dataloader.total_batch_size
558+
return INCDataLoader()
559+
eval_dataloader = _build_inc_dataloader(eval_dataloader)
560+
batch_size = eval_dataloader.batch_size
561+
546562
results = {}
547563
max_length = (
548564
training_args.generation_max_length
@@ -576,7 +592,7 @@ def eval_func_for_nc(model):
576592
conf = PostTrainingQuantConfig(approach="dynamic")
577593
q_model = quantization.fit(model,
578594
conf,
579-
calib_dataloader=trainer.get_eval_dataloader(),
595+
calib_dataloader=eval_dataloader,
580596
eval_func=eval_func_for_nc)
581597
q_model.save(training_args.output_dir)
582598
exit(0)
@@ -595,7 +611,7 @@ def eval_func_for_nc(model):
595611
from neural_compressor.config import BenchmarkConfig
596612
from neural_compressor import benchmark
597613
b_conf = BenchmarkConfig(warmup=5, iteration=100, cores_per_instance=4, num_of_instance=1)
598-
benchmark.fit(new_model, b_conf, b_dataloader=trainer.get_eval_dataloader())
614+
benchmark.fit(new_model, b_conf, b_dataloader=eval_dataloader)
599615
else:
600616
eval_func_for_nc(new_model)
601617
exit(0)

0 commit comments

Comments
 (0)