huggingface · sroussey · Feb 14, 2025 · Feb 14, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/bun.lock b/bun.lock
diff --git a/src/configs.js b/src/configs.js
@@ -370,6 +370,7 @@ export class PretrainedConfig {
         cache_dir = null,
         local_files_only = false,
         revision = 'main',
+        abort_signal = null,
     } = {}) {
         if (config && !(config instanceof PretrainedConfig)) {
             config = new PretrainedConfig(config);
@@ -381,6 +382,7 @@ export class PretrainedConfig {
             cache_dir,
             local_files_only,
             revision,
+            abort_signal,
         })
         return new this(data);
     }

diff --git a/src/models.js b/src/models.js
@@ -1088,6 +1088,7 @@ export class PreTrainedModel extends Callable {
         local_files_only = false,
         revision = 'main',
         model_file_name = null,
+        abort_signal = null,
         subfolder = 'onnx',
         device = null,
         dtype = null,
@@ -1107,6 +1108,7 @@ export class PreTrainedModel extends Callable {
             dtype,
             use_external_data_format,
             session_options,
+            abort_signal,
         }
 
         const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
@@ -7322,6 +7324,7 @@ export class PretrainedMixin {
         dtype = null,
         use_external_data_format = null,
         session_options = {},
+        abort_signal = null,
     } = {}) {
 
         const options = {
@@ -7336,6 +7339,7 @@ export class PretrainedMixin {
             dtype,
             use_external_data_format,
             session_options,
+            abort_signal,
         }
         options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
 

diff --git a/src/models/janus/processing_janus.js b/src/models/janus/processing_janus.js
@@ -18,6 +18,7 @@ export class VLChatProcessor extends Processor {
         this.image_start_tag = this.config.image_start_tag;
         this.image_end_tag = this.config.image_end_tag;
         this.num_image_tokens = this.config.num_image_tokens;
+        this.abort_signal = this.config.abort_signal;
     }
 
     /**
@@ -50,7 +51,7 @@ export class VLChatProcessor extends Processor {
                 conversation
                     .filter((msg) => msg.images)
                     .flatMap((msg) => msg.images)
-                    .map((img) => RawImage.read(img))
+                    .map((img) => RawImage.read(img, { abort_signal: this.abort_signal }))
             );
         } else if (!Array.isArray(images)) {
             images = [images];

diff --git a/src/models/mgp_str/processing_mgp_str.js b/src/models/mgp_str/processing_mgp_str.js
@@ -143,12 +143,12 @@ export class MgpstrProcessor extends Processor {
         }
     }
     /** @type {typeof Processor.from_pretrained} */
-    static async from_pretrained(...args) {
-        const base = await super.from_pretrained(...args);
+    static async from_pretrained(pretrained_model_name_or_path, options) {
+        const base = await super.from_pretrained(pretrained_model_name_or_path, options);
 
         // Load Transformers.js-compatible versions of the BPE and WordPiece tokenizers
-        const bpe_tokenizer = await AutoTokenizer.from_pretrained("Xenova/gpt2") // openai-community/gpt2
-        const wp_tokenizer = await AutoTokenizer.from_pretrained("Xenova/bert-base-uncased") // google-bert/bert-base-uncased
+        const bpe_tokenizer = await AutoTokenizer.from_pretrained("Xenova/gpt2", { abort_signal: options?.abort_signal }) // openai-community/gpt2
+        const wp_tokenizer = await AutoTokenizer.from_pretrained("Xenova/bert-base-uncased", { abort_signal: options?.abort_signal }) // google-bert/bert-base-uncased
 
         // Update components
         base.components = {

diff --git a/src/pipelines.js b/src/pipelines.js
@@ -85,16 +85,18 @@ import { RawImage } from './utils/image.js';
 /**
  * Prepare images for further tasks.
  * @param {ImagePipelineInputs} images images to prepare.
+ * @param {Object} options Additional options for preparing the images.
+ * @param {AbortSignal} [options.abort_signal=null] An optional AbortSignal to cancel the request.
  * @returns {Promise<RawImage[]>} returns processed images.
  * @private
  */
-async function prepareImages(images) {
+async function prepareImages(images, { abort_signal = null } = {}) {
     if (!Array.isArray(images)) {
         images = [images];
     }
 
     // Possibly convert any non-images to images
-    return await Promise.all(images.map(x => RawImage.read(x)));
+    return await Promise.all(images.map(x => RawImage.read(x, { abort_signal })));
 }
 
 /**
@@ -106,17 +108,19 @@ async function prepareImages(images) {
  * Prepare audios for further tasks.
  * @param {AudioPipelineInputs} audios audios to prepare.
  * @param {number} sampling_rate sampling rate of the audios.
+ * @param {Object} options Additional options for preparing the audios.
+ * @param {AbortSignal} [options.abort_signal=null] An optional AbortSignal to cancel the request.
  * @returns {Promise<Float32Array[]>} The preprocessed audio data.
  * @private
  */
-async function prepareAudios(audios, sampling_rate) {
+async function prepareAudios(audios, sampling_rate, { abort_signal = null } = {}) {
     if (!Array.isArray(audios)) {
         audios = [audios];
     }
 
     return await Promise.all(audios.map(x => {
         if (typeof x === 'string' || x instanceof URL) {
-            return read_audio(x, sampling_rate);
+            return read_audio(x, sampling_rate, { abort_signal });
         } else if (x instanceof Float64Array) {
             return new Float32Array(x);
         }
@@ -169,13 +173,15 @@ export class Pipeline extends Callable {
      * @param {PreTrainedModel} [options.model] The model used by the pipeline.
      * @param {PreTrainedTokenizer} [options.tokenizer=null] The tokenizer used by the pipeline (if any).
      * @param {Processor} [options.processor=null] The processor used by the pipeline (if any).
+     * @param {AbortSignal} [options.abort_signal=null] An optional AbortSignal to cancel the request.
      */
-    constructor({ task, model, tokenizer = null, processor = null }) {
+    constructor({ task, model, tokenizer = null, processor = null, abort_signal = null }) {
         super();
         this.task = task;
         this.model = model;
         this.tokenizer = tokenizer;
         this.processor = processor;
+        this.abort_signal = abort_signal;
     }
 
     /** @type {DisposeType} */
@@ -198,6 +204,7 @@ export class Pipeline extends Callable {
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
  * @property {PreTrainedModel} model The model used by the pipeline.
  * @property {Processor} processor The processor used by the pipeline.
+ * @property {AbortSignal} [abort_signal=null] An optional AbortSignal to cancel the request.
  * 
  * @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
  * @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
@@ -210,6 +217,7 @@ export class Pipeline extends Callable {
  * @property {PreTrainedModel} model The model used by the pipeline.
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
  * @property {Processor} processor The processor used by the pipeline.
+ * @property {AbortSignal} [abort_signal=null] An optional AbortSignal to cancel the request.
  * 
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
@@ -1401,7 +1409,7 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
         pool = null,
     } = {}) {
 
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
         const { pixel_values } = await this.processor(preparedImages);
         const outputs = await this.model({ pixel_values });
 
@@ -1491,7 +1499,7 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
     } = {}) {
 
         const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
-        const preparedAudios = await prepareAudios(audio, sampling_rate);
+        const preparedAudios = await prepareAudios(audio, sampling_rate, { abort_signal: this.abort_signal });
 
         // @ts-expect-error TS2339
         const id2label = this.model.config.id2label;
@@ -1593,7 +1601,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
         });
 
         const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
-        const preparedAudios = await prepareAudios(audio, sampling_rate);
+        const preparedAudios = await prepareAudios(audio, sampling_rate, { abort_signal: this.abort_signal });
 
         const toReturn = [];
         for (const aud of preparedAudios) {
@@ -1765,7 +1773,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
         }
 
         const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
-        const preparedAudios = await prepareAudios(audio, sampling_rate);
+        const preparedAudios = await prepareAudios(audio, sampling_rate, { abort_signal: this.abort_signal });
 
         const toReturn = [];
         for (const aud of preparedAudios) {
@@ -1810,7 +1818,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
         const hop_length = this.processor.feature_extractor.config.hop_length;
 
         const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
-        const preparedAudios = await prepareAudios(audio, sampling_rate);
+        const preparedAudios = await prepareAudios(audio, sampling_rate, { abort_signal: this.abort_signal });
 
         const toReturn = [];
         for (const aud of preparedAudios) {
@@ -1907,7 +1915,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
             audio = [/** @type {AudioInput} */ (audio)];
         }
         const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
-        const preparedAudios = await prepareAudios(audio, sampling_rate);
+        const preparedAudios = await prepareAudios(audio, sampling_rate, { abort_signal: this.abort_signal });
         const toReturn = [];
         for (const aud of preparedAudios) {
             const inputs = await this.processor(aud);
@@ -1972,7 +1980,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
     async _call(images, generate_kwargs = {}) {
 
         const isBatched = Array.isArray(images);
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
 
         const { pixel_values } = await this.processor(preparedImages);
 
@@ -2062,7 +2070,7 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
         top_k = 5
     } = {}) {
 
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
 
         const { pixel_values } = await this.processor(preparedImages);
         const output = await this.model({ pixel_values });
@@ -2163,7 +2171,7 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
             throw Error("Image segmentation pipeline currently only supports a batch size of 1.");
         }
 
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
         const imageSizes = preparedImages.map(x => [x.height, x.width]);
 
         const inputs = await this.processor(preparedImages);
@@ -2318,7 +2326,7 @@ export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePi
             throw Error("Background removal pipeline currently only supports a batch size of 1.");
         }
 
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
 
         // @ts-expect-error TS2339
         const masks = await super._call(images, options);
@@ -2382,7 +2390,7 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
     } = {}) {
 
         const isBatched = Array.isArray(images);
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
 
         // Insert label into hypothesis template 
         const texts = candidate_labels.map(
@@ -2487,7 +2495,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
         if (isBatched && images.length !== 1) {
             throw Error("Object detection pipeline currently only supports a batch size of 1.");
         }
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
 
         const imageSizes = percentage ? null : preparedImages.map(x => [x.height, x.width]);
 
@@ -2620,7 +2628,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
     } = {}) {
 
         const isBatched = Array.isArray(images);
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
 
         // Run tokenization
         const text_inputs = this.tokenizer(candidate_labels, {
@@ -2726,7 +2734,7 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
         // NOTE: For now, we only support a batch size of 1
 
         // Preprocess image
-        const preparedImage = (await prepareImages(image))[0];
+        const preparedImage = (await prepareImages(image, { abort_signal: this.abort_signal }))[0];
         const { pixel_values } = await this.processor(preparedImage);
 
         // Run tokenization
@@ -2866,17 +2874,18 @@ export class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPi
 
     async _call_text_to_spectrogram(text_inputs, { speaker_embeddings }) {
 
+
         // Load vocoder, if not provided
         if (!this.vocoder) {
             console.log('No vocoder specified, using default HifiGan vocoder.');
-            this.vocoder = await AutoModel.from_pretrained(this.DEFAULT_VOCODER_ID, { dtype: 'fp32' });
+            this.vocoder = await AutoModel.from_pretrained(this.DEFAULT_VOCODER_ID, { dtype: 'fp32' , abort_signal : this.abort_signal });
         }
 
         // Load speaker embeddings as Float32Array from path/URL
         if (typeof speaker_embeddings === 'string' || speaker_embeddings instanceof URL) {
             // Load from URL with fetch
             speaker_embeddings = new Float32Array(
-                await (await fetch(speaker_embeddings)).arrayBuffer()
+                await (await fetch(speaker_embeddings, { signal: this.abort_signal })).arrayBuffer()
             );
         }
 
@@ -2944,7 +2953,7 @@ export class ImageToImagePipeline extends (/** @type {new (options: ImagePipelin
     /** @type {ImageToImagePipelineCallback} */
     async _call(images) {
 
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
         const inputs = await this.processor(preparedImages);
         const outputs = await this.model(inputs);
 
@@ -3007,7 +3016,7 @@ export class DepthEstimationPipeline extends (/** @type {new (options: ImagePipe
     /** @type {DepthEstimationPipelineCallback} */
     async _call(images) {
 
-        const preparedImages = await prepareImages(images);
+        const preparedImages = await prepareImages(images, { abort_signal: this.abort_signal });
 
         const inputs = await this.processor(preparedImages);
         const { predicted_depth } = await this.model(inputs);
@@ -3403,6 +3412,7 @@ export async function pipeline(
         use_external_data_format = null,
         model_file_name = null,
         session_options = {},
+        abort_signal = null,
     } = {}
 ) {
     // Helper method to construct pipeline
@@ -3435,6 +3445,7 @@ export async function pipeline(
         use_external_data_format,
         model_file_name,
         session_options,
+        abort_signal,
     }
 
     const classes = new Map([

diff --git a/src/tokenizers.js b/src/tokenizers.js
@@ -2696,6 +2696,7 @@ export class PreTrainedTokenizer extends Callable {
         local_files_only = false,
         revision = 'main',
         legacy = null,
+        abort_signal = null,
     } = {}) {
 
         const info = await loadTokenizer(pretrained_model_name_or_path, {
@@ -2705,6 +2706,7 @@ export class PreTrainedTokenizer extends Callable {
             local_files_only,
             revision,
             legacy,
+            abort_signal,
         })
 
         // @ts-ignore
@@ -4365,6 +4367,7 @@ export class AutoTokenizer {
         local_files_only = false,
         revision = 'main',
         legacy = null,
+        abort_signal = null,
     } = {}) {
 
         const [tokenizerJSON, tokenizerConfig] = await loadTokenizer(pretrained_model_name_or_path, {
@@ -4374,6 +4377,7 @@ export class AutoTokenizer {
             local_files_only,
             revision,
             legacy,
+            abort_signal,
         })
 
         // Some tokenizers are saved with the "Fast" suffix, so we remove that if present.

diff --git a/src/utils/audio.js b/src/utils/audio.js
@@ -23,9 +23,11 @@ import { Tensor, matmul } from './tensor.js';
  * Helper function to read audio from a path/URL.
  * @param {string|URL} url The path/URL to load the audio from.
  * @param {number} sampling_rate The sampling rate to use when decoding the audio.
+ * @param {Object} options Additional options for reading the audio.
+ * @param {AbortSignal} [options.abort_signal=null] An optional AbortSignal to cancel the request.
  * @returns {Promise<Float32Array>} The decoded audio as a `Float32Array`.
  */
-export async function read_audio(url, sampling_rate) {
+export async function read_audio(url, sampling_rate, {abort_signal = null} = {}) {
     if (typeof AudioContext === 'undefined') {
         // Running in node or an environment without AudioContext
         throw Error(
@@ -35,7 +37,7 @@ export async function read_audio(url, sampling_rate) {
         )
     }
 
-    const response = await (await getFile(url)).arrayBuffer();
+    const response = await (await getFile(url, { abort_signal })).arrayBuffer();
     const audioCTX = new AudioContext({ sampleRate: sampling_rate });
     if (typeof sampling_rate === 'undefined') {
         console.warn(`No sampling rate provided, using default of ${audioCTX.sampleRate}Hz.`)