From e55750949bf0126e2e5702aec616d2c8cf496463 Mon Sep 17 00:00:00 2001 From: mzio Date: Thu, 19 Sep 2024 18:19:34 -0700 Subject: [PATCH] Update lm eval model --- lm_eval_harness/eval_lm_harness_big.py | 30 +++++++++++++++++++++----- lm_eval_harness/models.py | 19 ++++++++++++++++ 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/lm_eval_harness/eval_lm_harness_big.py b/lm_eval_harness/eval_lm_harness_big.py index d759380..6f539e0 100644 --- a/lm_eval_harness/eval_lm_harness_big.py +++ b/lm_eval_harness/eval_lm_harness_big.py @@ -207,6 +207,20 @@ def count_params(module) -> int: return sum(p.numel() for p in module.parameters()) +def check_state_dict_keys(_keys, layer_idx, rank=0): + try: + assert len(_keys.unexpected_keys) == 0 + if rank == 0: + print_header(f'*** All expected keys matched successfully {layer_idx} ***') + except Exception as e: + if rank == 0: + print(e) + print_header('*** Error: unexpected keys in checkpoint ***') + print(f'Unexpected keys at {layer_idx}:') + for k in _keys.unexpected_keys: + print(k) + + def main(): sys.path.append(LM_EVALUATION_HARNESS_PATH) from lm_eval import evaluator @@ -344,7 +358,8 @@ def main(): peft_gradient_checkpointing=not args.no_peft_grad_ckpt, train_attention=False) if True: # rank == 0: - if distill_config.trainer.name is not None or args.attn_mlp_checkpoint_path is not None: + # if distill_config.trainer.name is not None or args.attn_mlp_checkpoint_path is not None: + if distill_config.trainer.name is not None and args.attn_mlp_checkpoint_path is not None: # if args.replicate == 64: # distill_config.model_name = distill_config.model_name.replace(f'-se={args.seed}', '-se=0').replace(f'-s={args.seed}', '-s=0') # else: @@ -366,10 +381,15 @@ def main(): merge_loras=False, peft_gradient_checkpointing=not args.no_peft_grad_ckpt) if True: # rank == 0: - model = load_sharded_model_single_gpu(model, model_path=args.finetune_checkpoint_path, # None, - cfg=finetune_config, rank=rank) + if '.pt' in args.finetune_checkpoint_path: + with torch.no_grad(): + _keys = model.load_state_dict(torch.load(args.finetune_checkpoint_path), strict=False) + check_state_dict_keys(_keys, 0) + else: + model = load_sharded_model_single_gpu(model, model_path=args.finetune_checkpoint_path, # None, + cfg=finetune_config, rank=rank) - if rank == 0: + if True: # if rank == 0: print_header('** Sanity check model weights **') for n, p in model.named_parameters(): # if ('layers.0.' in n and ('feature_map' in n or 'lora' in n)): @@ -421,4 +441,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/lm_eval_harness/models.py b/lm_eval_harness/models.py index 2b8fc68..f9a6d9c 100644 --- a/lm_eval_harness/models.py +++ b/lm_eval_harness/models.py @@ -9,6 +9,7 @@ from src.model.modeling_mistral import LooooolcatsMistralForCausalLM as LOOOOOLCATS_MISTRAL_MODEL_CLASS from src.model.modeling_llama_sharded import ShardedLolcatsLlamaForCausalLM as SHARDED_LOLCATS_LLAMA_MODEL_CLASS +from src.model.modeling_llama_sharded_roll import ShardedRollLolcatsLlamaForCausalLM as SHARDED_ROLL_LOLCATS_LLAMA_MODEL_CLASS class LolcatsLlamaForCausalLM(AutoCausalLM): @@ -63,6 +64,24 @@ def add_special_tokens(self) -> bool: return self._add_special_tokens else: return False + + +class ShardedRollLolcatsLlamaForCausalLM(AutoCausalLM): + """ + Wrapper for Llama or Mistral-like autoregressive language model + """ + AUTO_MODEL_CLASS = SHARDED_ROLL_LOLCATS_LLAMA_MODEL_CLASS + @property + def add_special_tokens(self) -> bool: + """Whether to include special tokens in encoded text. This should be + determined by whether or not the model was trained with special tokens. + TODO: Remove these conditionals once HuggingFace supports a way to + check whether or not an arbitrary model was trained with special tokens. + """ + if self._add_special_tokens is not None: + return self._add_special_tokens + else: + return False class LooooolcatsLlamaForCausalLM(AutoCausalLM):