Skip to content
Merged
  •  
  •  
  •  
7 changes: 7 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"homepage": "https://github.com/huggingface/transformers.js#readme",
"dependencies": {
"@huggingface/jinja": "^0.5.3",
"@huggingface/tokenizers": "^0.1.0",
"onnxruntime-node": "1.24.0-dev.20251104-75d35474d5",
"onnxruntime-web": "1.24.0-dev.20251104-75d35474d5",
"sharp": "^0.34.3"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { FEATURE_EXTRACTOR_NAME } from '../utils/constants.js';
import { Callable } from '../utils/generic.js';
import { getModelJSON } from '../utils/hub.js';
import { FEATURE_EXTRACTOR_NAME } from './utils/constants.js';
import { Callable } from './utils/generic.js';
import { getModelJSON } from './utils/hub.js';

/**
* Base class for feature extractors.
Expand All @@ -27,7 +27,7 @@ export class FeatureExtractor extends Callable {
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
* user or organization name, like `dbmdz/bert-base-german-cased`.
* - A path to a *directory* containing feature_extractor files, e.g., `./my_model_directory/`.
* @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the feature_extractor.
* @param {import('./utils/hub.js').PretrainedOptions} options Additional options for loading the feature_extractor.
*
* @returns {Promise<FeatureExtractor>} A new instance of the Feature Extractor class.
*/
Expand Down
15 changes: 12 additions & 3 deletions src/generation/streamers.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,18 @@
*/

import { mergeArrays } from '../utils/core.js';
import { is_chinese_char } from '../tokenizers.js';
import { apis } from '../env.js';

const is_chinese_char = (cp) =>
(cp >= 0x4e00 && cp <= 0x9fff) ||
(cp >= 0x3400 && cp <= 0x4dbf) ||
(cp >= 0x20000 && cp <= 0x2a6df) ||
(cp >= 0x2a700 && cp <= 0x2b73f) ||
(cp >= 0x2b740 && cp <= 0x2b81f) ||
(cp >= 0x2b820 && cp <= 0x2ceaf) ||
(cp >= 0xf900 && cp <= 0xfaff) ||
(cp >= 0x2f800 && cp <= 0x2fa1f);

export class BaseStreamer {
/**
* Function that is called by `.generate()` to push new tokens
Expand All @@ -31,7 +40,7 @@ const stdout_write = apis.IS_PROCESS_AVAILABLE ? (x) => process.stdout.write(x)
export class TextStreamer extends BaseStreamer {
/**
*
* @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
* @param {import('../tokenization_utils.js').PreTrainedTokenizer} tokenizer
* @param {Object} options
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
* @param {boolean} [options.skip_special_tokens=true] Whether to skip special tokens when decoding
Expand Down Expand Up @@ -147,7 +156,7 @@ export class TextStreamer extends BaseStreamer {
*/
export class WhisperTextStreamer extends TextStreamer {
/**
* @param {import('../tokenizers.js').WhisperTokenizer} tokenizer
* @param {import('../models/whisper/tokenization_whisper.js').WhisperTokenizer} tokenizer
* @param {Object} options
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { Callable } from '../utils/generic.js';
import { Tensor, interpolate, stack } from '../utils/tensor.js';
import { bankers_round, max, min, softmax } from '../utils/maths.js';
import { RawImage } from '../utils/image.js';
import { calculateReflectOffset } from '../utils/core.js';
import { getModelJSON } from '../utils/hub.js';
import { IMAGE_PROCESSOR_NAME } from '../utils/constants.js';
import { Callable } from './utils/generic.js';
import { Tensor, interpolate, stack } from './utils/tensor.js';
import { bankers_round, max, min, softmax } from './utils/maths.js';
import { RawImage } from './utils/image.js';
import { calculateReflectOffset } from './utils/core.js';
import { getModelJSON } from './utils/hub.js';
import { IMAGE_PROCESSOR_NAME } from './utils/constants.js';

/**
* Named tuple to indicate the order we are using is (height x width),
Expand Down Expand Up @@ -1069,7 +1069,7 @@ export class ImageProcessor extends Callable {
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
* user or organization name, like `dbmdz/bert-base-german-cased`.
* - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
* @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
* @param {import('./utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
*
* @returns {Promise<ImageProcessor>} A new instance of the Processor class.
*/
Expand Down
1 change: 0 additions & 1 deletion src/models/albert/index.js

This file was deleted.

5 changes: 5 additions & 0 deletions src/models/albert/tokenization_albert.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class AlbertTokenizer extends PreTrainedTokenizer {
return_token_type_ids = true;
}
1 change: 0 additions & 1 deletion src/models/apertus/index.js

This file was deleted.

1 change: 0 additions & 1 deletion src/models/arcee/index.js

This file was deleted.

1 change: 0 additions & 1 deletion src/models/ast/index.js

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { FeatureExtractor, validate_audio_inputs } from '../../base/feature_extraction_utils.js';
import { FeatureExtractor, validate_audio_inputs } from '../../feature_extraction_utils.js';
import { Tensor } from '../../utils/tensor.js';
import { mel_filter_bank, spectrogram, window_function } from '../../utils/audio.js';

Expand Down
1 change: 0 additions & 1 deletion src/models/audio_spectrogram_transformer/index.js

This file was deleted.

2 changes: 1 addition & 1 deletion src/models/auto/feature_extraction_auto.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { FEATURE_EXTRACTOR_NAME, GITHUB_ISSUE_URL } from '../../utils/constants.js';
import { getModelJSON } from '../../utils/hub.js';
import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
import { FeatureExtractor } from '../../feature_extraction_utils.js';
import * as AllFeatureExtractors from '../feature_extractors.js';

export class AutoFeatureExtractor {
Expand Down
2 changes: 1 addition & 1 deletion src/models/auto/image_processing_auto.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { getModelJSON } from '../../utils/hub.js';
import { ImageProcessor } from '../../base/image_processors_utils.js';
import { ImageProcessor } from '../../image_processors_utils.js';
import * as AllImageProcessors from '../image_processors.js';
import { GITHUB_ISSUE_URL, IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';

Expand Down
12 changes: 5 additions & 7 deletions src/models.js → src/models/auto/modeling_auto.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,18 @@
* @module models
*/

import { AutoConfig } from './configs.js';
import { PreTrainedModel } from './models/modeling_utils.js';
import { AutoConfig } from '../../configs.js';
import { PreTrainedModel } from '../modeling_utils.js';

import { CUSTOM_ARCHITECTURES, MODEL_CLASS_TYPE_MAPPING, MODEL_MAPPINGS } from './models/registry.js';
import { CUSTOM_ARCHITECTURES, MODEL_CLASS_TYPE_MAPPING, MODEL_MAPPINGS } from '../registry.js';

import * as ALL_MODEL_FILES from './models/index.js';
import * as ALL_MODEL_FILES from '../models.js';

/**
* Base class of all AutoModels. Contains the `from_pretrained` function
* which is used to instantiate pretrained models.
*/
export class PretrainedMixin {
class PretrainedMixin {
/**
* Mapping from model type to model class.
* @type {Map<string, Object>[]}
Expand Down Expand Up @@ -368,5 +368,3 @@ export class AutoModelForImageTextToText extends PretrainedMixin {
export class AutoModelForAudioTextToText extends PretrainedMixin {
static MODEL_CLASS_MAPPINGS = [MODEL_MAPPINGS.MODEL_FOR_AUDIO_TEXT_TO_TEXT_MAPPING_NAMES];
}

export * from './models/index.js';
4 changes: 2 additions & 2 deletions src/models/auto/processing_auto.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';
import { getModelJSON } from '../../utils/hub.js';
import { Processor } from '../../base/processing_utils.js';
import { Processor } from '../../processing_utils.js';

import * as AllProcessors from '../processors.js';
import * as AllImageProcessors from '../image_processors.js';
import * as AllFeatureExtractors from '../feature_extractors.js';

/**
* @typedef {import('../../base/processing_utils.js').PretrainedProcessorOptions} PretrainedProcessorOptions
* @typedef {import('../../processing_utils.js').PretrainedProcessorOptions} PretrainedProcessorOptions
*/

/**
Expand Down
62 changes: 62 additions & 0 deletions src/models/auto/tokenization_auto.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { PreTrainedTokenizer, loadTokenizer } from '../../tokenization_utils.js';
import * as AllTokenizers from '../tokenizers.js';

/**
* Helper class which is used to instantiate pretrained tokenizers with the `from_pretrained` function.
* The chosen tokenizer class is determined by the type specified in the tokenizer config.
*
* **Example:** Create an `AutoTokenizer` and use it to tokenize a sentence.
* This will automatically detect the tokenizer type based on the tokenizer class defined in `tokenizer_config.json`.
*
* ```javascript
* import { AutoTokenizer } from '@huggingface/transformers';
*
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
* const { input_ids } = await tokenizer('I love transformers!');
* // Tensor {
* // data: BigInt64Array(6) [101n, 1045n, 2293n, 19081n, 999n, 102n],
* // dims: [1, 6],
* // type: 'int64',
* // size: 6,
* // }
* ```
*/
export class AutoTokenizer {
/**
* Instantiate one of the tokenizer classes of the library from a pretrained model.
*
* The tokenizer class to instantiate is selected based on the `tokenizer_class` property of the config object
* (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
*
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
* - A string, the *model id* of a pretrained tokenizer hosted inside a model repo on huggingface.co.
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
* user or organization name, like `dbmdz/bert-base-german-cased`.
* - A path to a *directory* containing tokenizer files, e.g., `./my_model_directory/`.
* @param {import('../../tokenization_utils.js').PretrainedTokenizerOptions} options Additional options for loading the tokenizer.
*
* @returns {Promise<PreTrainedTokenizer>} A new instance of the PreTrainedTokenizer class.
*/
static async from_pretrained(
pretrained_model_name_or_path,
{ progress_callback = null, config = null, cache_dir = null, local_files_only = false, revision = 'main' } = {},
) {
const [tokenizerJSON, tokenizerConfig] = await loadTokenizer(pretrained_model_name_or_path, {
progress_callback,
config,
cache_dir,
local_files_only,
revision,
});

// Some tokenizers are saved with the "Fast" suffix, so we remove that if present.
const tokenizerName = tokenizerConfig.tokenizer_class?.replace(/Fast$/, '') ?? 'PreTrainedTokenizer';

let cls = AllTokenizers[tokenizerName];
if (!cls) {
console.warn(`Unknown tokenizer class "${tokenizerName}", attempting to construct from base class.`);
cls = PreTrainedTokenizer;
}
return new cls(tokenizerJSON, tokenizerConfig);
}
}
1 change: 0 additions & 1 deletion src/models/bart/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/bart/tokenization_bart.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class BartTokenizer extends PreTrainedTokenizer {}
2 changes: 1 addition & 1 deletion src/models/beit/image_processing_beit.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import { ImageProcessor } from '../../base/image_processors_utils.js';
import { ImageProcessor } from '../../image_processors_utils.js';

export class BeitFeatureExtractor extends ImageProcessor {}
2 changes: 0 additions & 2 deletions src/models/beit/index.js

This file was deleted.

1 change: 0 additions & 1 deletion src/models/bert/index.js

This file was deleted.

5 changes: 5 additions & 0 deletions src/models/bert/tokenization_bert.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class BertTokenizer extends PreTrainedTokenizer {
return_token_type_ids = true;
}
2 changes: 1 addition & 1 deletion src/models/bit/image_processing_bit.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import { ImageProcessor } from '../../base/image_processors_utils.js';
import { ImageProcessor } from '../../image_processors_utils.js';

export class BitImageProcessor extends ImageProcessor {}
1 change: 0 additions & 1 deletion src/models/bit/index.js

This file was deleted.

1 change: 0 additions & 1 deletion src/models/blenderbot/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/blenderbot/tokenization_blenderbot.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class BlenderbotTokenizer extends PreTrainedTokenizer {}
1 change: 0 additions & 1 deletion src/models/blenderbot_small/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/blenderbot_small/tokenization_blenderbot_small.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class BlenderbotSmallTokenizer extends PreTrainedTokenizer {}
1 change: 0 additions & 1 deletion src/models/bloom/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/bloom/tokenization_bloom.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class BloomTokenizer extends PreTrainedTokenizer {}
1 change: 0 additions & 1 deletion src/models/camembert/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/camembert/tokenization_camembert.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class CamembertTokenizer extends PreTrainedTokenizer {}
2 changes: 1 addition & 1 deletion src/models/chatterbox/feature_extraction_chatterbox.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { FeatureExtractor, validate_audio_inputs } from '../../base/feature_extraction_utils.js';
import { FeatureExtractor, validate_audio_inputs } from '../../feature_extraction_utils.js';
import { Tensor } from '../../utils/tensor.js';

export class ChatterboxFeatureExtractor extends FeatureExtractor {
Expand Down
3 changes: 0 additions & 3 deletions src/models/chatterbox/index.js

This file was deleted.

4 changes: 2 additions & 2 deletions src/models/chatterbox/processing_chatterbox.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { AutoFeatureExtractor } from '../auto/feature_extraction_auto.js';
import { AutoTokenizer } from '../../tokenizers.js';
import { Processor } from '../../base/processing_utils.js';
import { AutoTokenizer } from '../auto/tokenization_auto.js';
import { Processor } from '../../processing_utils.js';

/**
* Represents a ChatterboxProcessor that extracts features from an audio input.
Expand Down
2 changes: 1 addition & 1 deletion src/models/chinese_clip/image_processing_chinese_clip.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import { ImageProcessor } from '../../base/image_processors_utils.js';
import { ImageProcessor } from '../../image_processors_utils.js';

export class ChineseCLIPFeatureExtractor extends ImageProcessor {}
2 changes: 0 additions & 2 deletions src/models/chinese_clip/index.js

This file was deleted.

2 changes: 1 addition & 1 deletion src/models/clap/feature_extraction_clap.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { FeatureExtractor, validate_audio_inputs } from '../../base/feature_extraction_utils.js';
import { FeatureExtractor, validate_audio_inputs } from '../../feature_extraction_utils.js';
import { Tensor } from '../../utils/tensor.js';
import { mel_filter_bank, spectrogram, window_function } from '../../utils/audio.js';

Expand Down
2 changes: 0 additions & 2 deletions src/models/clap/index.js

This file was deleted.

2 changes: 1 addition & 1 deletion src/models/clip/image_processing_clip.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ImageProcessor } from '../../base/image_processors_utils.js';
import { ImageProcessor } from '../../image_processors_utils.js';

export class CLIPImageProcessor extends ImageProcessor {}
export class CLIPFeatureExtractor extends CLIPImageProcessor {}
2 changes: 0 additions & 2 deletions src/models/clip/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/clip/tokenization_clip.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class CLIPTokenizer extends PreTrainedTokenizer {}
1 change: 0 additions & 1 deletion src/models/clipseg/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/code_llama/tokenization_code_llama.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class CodeLlamaTokenizer extends PreTrainedTokenizer {}
1 change: 0 additions & 1 deletion src/models/codegen/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/codegen/tokenization_codegen.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class CodeGenTokenizer extends PreTrainedTokenizer {}
1 change: 0 additions & 1 deletion src/models/cohere/index.js

This file was deleted.

3 changes: 3 additions & 0 deletions src/models/cohere/tokenization_cohere.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class CohereTokenizer extends PreTrainedTokenizer {}
1 change: 0 additions & 1 deletion src/models/convbert/index.js

This file was deleted.

5 changes: 5 additions & 0 deletions src/models/convbert/tokenization_convbert.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import { PreTrainedTokenizer } from '../../tokenization_utils.js';

export class ConvBertTokenizer extends PreTrainedTokenizer {
return_token_type_ids = true;
}
2 changes: 1 addition & 1 deletion src/models/convnext/image_processing_convnext.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ImageProcessor } from '../../base/image_processors_utils.js';
import { ImageProcessor } from '../../image_processors_utils.js';

export class ConvNextImageProcessor extends ImageProcessor {
constructor(config) {
Expand Down
2 changes: 0 additions & 2 deletions src/models/convnext/index.js

This file was deleted.

1 change: 0 additions & 1 deletion src/models/convnextv2/index.js

This file was deleted.

1 change: 0 additions & 1 deletion src/models/d_fine/index.js

This file was deleted.

Loading
Loading