@@ -357,7 +357,7 @@ class T5UniGramTokenizer {
357357
358358 BuildTrie (&pieces);
359359 }
360- ~T5UniGramTokenizer (){};
360+ ~T5UniGramTokenizer () {};
361361
362362 std::string Normalize (const std::string& input) const {
363363 // Ref: https://github.com/huggingface/tokenizers/blob/1ff56c0c70b045f0cd82da1af9ac08cd4c7a6f9f/bindings/python/py_src/tokenizers/implementations/sentencepiece_unigram.py#L29
@@ -701,22 +701,27 @@ struct T5Stack : public GGMLBlock {
701701 auto final_layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks[" final_layer_norm" ]);
702702
703703 x = final_layer_norm->forward (ctx, x);
704-
704+
705705 return x;
706706 }
707707};
708708
709709struct T5 : public GGMLBlock {
710+ bool final_proj = false ;
711+
710712public:
713+ T5 () {}
711714 T5 (int64_t num_layers,
712715 int64_t model_dim,
713716 int64_t ff_dim,
714717 int64_t num_heads,
715718 int64_t vocab_size,
716- int64_t projection_dim) {
719+ int64_t projection_dim) : final_proj(projection_dim > 0 ) {
717720 blocks[" encoder" ] = std::shared_ptr<GGMLBlock>(new T5Stack (num_layers, model_dim, model_dim, ff_dim, num_heads));
718721 blocks[" shared" ] = std::shared_ptr<GGMLBlock>(new Embedding (vocab_size, model_dim));
719- blocks[" final_projection" ] = std::shared_ptr<GGMLBlock>(new T5Projection (model_dim, projection_dim));
722+ if (final_proj) {
723+ blocks[" final_projection" ] = std::shared_ptr<GGMLBlock>(new T5Projection (model_dim, projection_dim));
724+ }
720725 }
721726
722727 struct ggml_tensor * forward (struct ggml_context * ctx,
@@ -731,9 +736,10 @@ struct T5 : public GGMLBlock {
731736
732737 auto x = shared->forward (ctx, input_ids);
733738 x = encoder->forward (ctx, x, past_bias, attention_mask, relative_position_bucket);
734-
735- auto final_projection = std::dynamic_pointer_cast<T5Projection>(blocks[" final_projection" ]);
736- x = final_projection->forward (ctx, x);
739+ if (final_proj) {
740+ auto final_projection = std::dynamic_pointer_cast<T5Projection>(blocks[" final_projection" ]);
741+ x = final_projection->forward (ctx, x);
742+ }
737743 return x;
738744 }
739745};
@@ -745,13 +751,23 @@ struct T5Runner : public GGMLRunner {
745751 T5Runner (ggml_backend_t backend,
746752 std::map<std::string, enum ggml_type>& tensor_types,
747753 const std::string prefix,
748- int64_t num_layers = 12 ,
749- int64_t model_dim = 768 ,
750- int64_t ff_dim = 2048 ,
751- int64_t num_heads = 12 ,
752- int64_t vocab_size = 32128 ,
753- int64_t projection_dim = 4096 )
754- : GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size, projection_dim) {
754+ int64_t num_layers = 24 ,
755+ int64_t model_dim = 4096 ,
756+ int64_t ff_dim = 10240 ,
757+ int64_t num_heads = 64 ,
758+ int64_t vocab_size = 32128 ,
759+ int64_t projection_dim = -1 )
760+ : GGMLRunner(backend) {
761+ if (tensor_types.find (prefix + " .final_projection.0.weight" ) != tensor_types.end ()) {
762+ num_layers = 12 ;
763+ model_dim = 768 ;
764+ ff_dim = 2048 ;
765+ num_heads = 12 ;
766+ vocab_size = 32128 ;
767+ projection_dim = 4096 ;
768+ }
769+
770+ model = T5 (num_layers, model_dim, ff_dim, num_heads, vocab_size, projection_dim);
755771 model.init (params_ctx, tensor_types, prefix);
756772 }
757773
0 commit comments