Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te
### Models

1. **[ALBERT](https://huggingface.co/docs/transformers/model_doc/albert)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://huggingface.co/papers/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.
1. **[Arcee](https://huggingface.co/docs/transformers/model_doc/arcee)** (from Arcee AI) released with the blog post [Announcing Arcee Foundation Models](https://www.arcee.ai/blog/announcing-the-arcee-foundation-model-family) by Fernando Fernandes, Varun Singh, Charles Goddard, Lucas Atkins, Mark McQuade, Maziyar Panahi, Conner Stewart, Colin Kealty, Raghav Ravishankar, Lucas Krauss, Anneketh Vij, Pranav Veldurthi, Abhishek Thakur, Julien Simon, Scott Zembsch, Benjamin Langer, Aleksiej Cecocho, Maitri Patel.
1. **[Audio Spectrogram Transformer](https://huggingface.co/docs/transformers/model_doc/audio-spectrogram-transformer)** (from MIT) released with the paper [AST: Audio Spectrogram Transformer](https://huggingface.co/papers/2104.01778) by Yuan Gong, Yu-An Chung, James Glass.
1. **[BART](https://huggingface.co/docs/transformers/model_doc/bart)** (from Facebook) released with the paper [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://huggingface.co/papers/1910.13461) by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer.
1. **[BEiT](https://huggingface.co/docs/transformers/model_doc/beit)** (from Microsoft) released with the paper [BEiT: BERT Pre-Training of Image Transformers](https://huggingface.co/papers/2106.08254) by Hangbo Bao, Li Dong, Furu Wei.
Expand Down
1 change: 1 addition & 0 deletions docs/snippets/6_supported-models.snippet
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
### Models

1. **[ALBERT](https://huggingface.co/docs/transformers/model_doc/albert)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://huggingface.co/papers/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.
1. **[Arcee](https://huggingface.co/docs/transformers/model_doc/arcee)** (from Arcee AI) released with the blog post [Announcing Arcee Foundation Models](https://www.arcee.ai/blog/announcing-the-arcee-foundation-model-family) by Fernando Fernandes, Varun Singh, Charles Goddard, Lucas Atkins, Mark McQuade, Maziyar Panahi, Conner Stewart, Colin Kealty, Raghav Ravishankar, Lucas Krauss, Anneketh Vij, Pranav Veldurthi, Abhishek Thakur, Julien Simon, Scott Zembsch, Benjamin Langer, Aleksiej Cecocho, Maitri Patel.
1. **[Audio Spectrogram Transformer](https://huggingface.co/docs/transformers/model_doc/audio-spectrogram-transformer)** (from MIT) released with the paper [AST: Audio Spectrogram Transformer](https://huggingface.co/papers/2104.01778) by Yuan Gong, Yu-An Chung, James Glass.
1. **[BART](https://huggingface.co/docs/transformers/model_doc/bart)** (from Facebook) released with the paper [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://huggingface.co/papers/1910.13461) by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer.
1. **[BEiT](https://huggingface.co/docs/transformers/model_doc/beit)** (from Microsoft) released with the paper [BEiT: BERT Pre-Training of Image Transformers](https://huggingface.co/papers/2106.08254) by Hangbo Bao, Li Dong, Furu Wei.
Expand Down
1 change: 1 addition & 0 deletions src/configs.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ function getNormalizedConfig(config) {
mapping['hidden_size'] = 'hidden_size';
break;
case 'llama':
case 'arcee':
case 'lfm2':
case 'smollm3':
case 'olmo':
Expand Down
9 changes: 9 additions & 0 deletions src/models.js
Original file line number Diff line number Diff line change
Expand Up @@ -4594,6 +4594,13 @@ export class LlamaModel extends LlamaPreTrainedModel { }
export class LlamaForCausalLM extends LlamaPreTrainedModel { }
//////////////////////////////////////////////////

//////////////////////////////////////////////////
// Arcee models
export class ArceePreTrainedModel extends PreTrainedModel { }
export class ArceeModel extends ArceePreTrainedModel { }
export class ArceeForCausalLM extends ArceePreTrainedModel { }
//////////////////////////////////////////////////

//////////////////////////////////////////////////
// LFM2 models
export class Lfm2PreTrainedModel extends PreTrainedModel { }
Expand Down Expand Up @@ -7820,6 +7827,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
['codegen', ['CodeGenModel', CodeGenModel]],
['llama', ['LlamaModel', LlamaModel]],
['arcee', ['ArceeModel', ArceeModel]],
['lfm2', ['Lfm2Model', Lfm2Model]],
['smollm3', ['SmolLM3Model', SmolLM3Model]],
['exaone', ['ExaoneModel', ExaoneModel]],
Expand Down Expand Up @@ -7927,6 +7935,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
['arcee', ['ArceeForCausalLM', ArceeForCausalLM]],
['lfm2', ['Lfm2ForCausalLM', Lfm2ForCausalLM]],
['smollm3', ['SmolLM3ForCausalLM', SmolLM3ForCausalLM]],
['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]],
Expand Down
51 changes: 51 additions & 0 deletions tests/models/arcee/test_modeling_arcee.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { PreTrainedTokenizer, ArceeForCausalLM } from "../../../src/transformers.js";

import { MAX_MODEL_LOAD_TIME, MAX_TEST_EXECUTION_TIME, MAX_MODEL_DISPOSE_TIME, DEFAULT_MODEL_OPTIONS } from "../../init.js";

export default () => {
describe("ArceeForCausalLM", () => {
const model_id = "onnx-internal-testing/tiny-random-ArceeForCausalLM";
/** @type {ArceeForCausalLM} */
let model;
/** @type {PreTrainedTokenizer} */
let tokenizer;
beforeAll(async () => {
model = await ArceeForCausalLM.from_pretrained(model_id, DEFAULT_MODEL_OPTIONS);
tokenizer = await PreTrainedTokenizer.from_pretrained(model_id);
tokenizer.padding_side = "left";
}, MAX_MODEL_LOAD_TIME);

it(
"batch_size=1",
async () => {
const inputs = tokenizer("hello");
const outputs = await model.generate({
...inputs,
max_length: 10,
});
expect(outputs.tolist()).toEqual([[1n, 22172n, 1316n, 11038n, 25378n, 11619n, 7959n, 15231n, 15231n, 23659n]]);
},
MAX_TEST_EXECUTION_TIME,
);

it(
"batch_size>1",
async () => {
const inputs = tokenizer(["hello", "hello world"], { padding: true });
const outputs = await model.generate({
...inputs,
max_length: 10,
});
expect(outputs.tolist()).toEqual([
[2n, 1n, 22172n, 5706n, 3803n, 11619n, 28763n, 4015n, 18904n, 7959n],
[1n, 22172n, 3186n, 1316n, 11038n, 22918n, 9469n, 25671n, 22918n, 2687n],
]);
},
MAX_TEST_EXECUTION_TIME,
);

afterAll(async () => {
await model?.dispose();
}, MAX_MODEL_DISPOSE_TIME);
});
};