Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cannot instantiate parameters on finetuned model #1694

Open
Ashh-Z opened this issue Apr 21, 2024 · 1 comment
Open

Cannot instantiate parameters on finetuned model #1694

Ashh-Z opened this issue Apr 21, 2024 · 1 comment

Comments

@Ashh-Z
Copy link

Ashh-Z commented Apr 21, 2024

Tested versions

  • Reproducible in 3.1

System information

linux - pyannote 3.1

Issue description

Unable to instantiate parameters on trained pipeline

Code for fine tuning :

os.environ["CUDA_VISIBLE_DEVICES"]="3"

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")


registry.load_database('finetune.yml')
protocol = registry.get_protocol("dis.SpeakerDiarization.aa", preprocessors={"audio":
                                                        FileFinder()})

from pyannote.audio import Model
model = Model.from_pretrained("pyannote/segmentation-3.0", use_auth_token="token_here")

from pyannote.audio.tasks import Segmentation
task = Segmentation(
    protocol, 
    duration=model.specifications.duration, 
    max_num_speakers=len(model.specifications.classes), 
    batch_size=32,
    num_workers=0, 
    loss="bce", 
    vad_loss="bce")
model.task = task
model.prepare_data()
model.setup()

model = model.to(device)

save_dir = 'save_folder'
# this takes approximately 15min to run on Google Colab GPU
from types import MethodType
from torch.optim import Adam
from pytorch_lightning.callbacks import (
    # EarlyStopping,
    ModelCheckpoint,
    RichProgressBar,
)

# we use Adam optimizer with 1e-4 learning rate
def configure_optimizers(self):
    return Adam(self.parameters(), lr=1e-4)

model.configure_optimizers = MethodType(configure_optimizers, model)

monitor, direction = task.val_monitor
checkpoint = ModelCheckpoint(
    monitor=monitor,
    mode=direction,
    # save_top_k=1,
    every_n_epochs=1,
    save_last=True,
    save_weights_only=False,
    filename="{epoch}",
    verbose=False,

callbacks = [RichProgressBar(), checkpoint]

from pytorch_lightning import Trainer

trainer = Trainer(accelerator="gpu", 
                  callbacks=callbacks, 
                  max_epochs=20,
                  limit_val_batches=0,
                  num_sanity_val_steps=0,
                  logger = False)
trainer.fit(model)

print('done training')

Running the model :

base_fine = 'last.ckpt'

from pyannote.audio.pipelines import SpeakerDiarization as SpeakerDiarizationPipeline

STEP=3.0

# hyperparameters used for AMI
PIPELINE_PARAMS = {
    "clustering": {
        "method": "centroid",
        "min_cluster_size": 15,
        "threshold": 0.6285824248662424,
    },
    "segmentation": {
        "min_duration_off": 0.0,
    },
}
print('setting up model')
adapted_model = Model.from_pretrained(base_fine)
pipeline_adapted = SpeakerDiarizationPipeline(adapted_model, embedding="speechbrain/spkrec-ecapa-voxceleb", clustering="AgglomerativeClustering").to(device)
pipeline_adapted.instantiate(PIPELINE_PARAMS)
print('done setting')

for file in protocol.test() : 
    fine_save = 'save_results_folder'

    uri = file['uri']
    
    fine_save = os.path.join(fine,f'{ uri }.rttm')

    print('getting output')
    out_adapted = pipeline_adapted(file)
    print('got it')
    print(f"writting fine tune rttm for {uri}")
    with open(fine_save,'w') as fine_rttm : 
        out_adapted.write_rttm(fine_rttm)
    print("done")

RuntimeError Traceback (most recent call last)
Cell In[34], line 37
34 # pre_save = os.path.join(pre,f'{ uri }.rttm')
36 print('getting output')
---> 37 out_adapted = pipeline_adapted(file)
38 print('got it')
39 print(f"writting fine tune rttm for {uri}")

File ~/anaconda3/envs/env2/lib/python3.10/site-packages/pyannote/audio/core/pipeline.py:304, in Pipeline.call(self, file, **kwargs)
302 default_parameters = self.default_parameters()
303 except NotImplementedError:
--> 304 raise RuntimeError(
305 "A pipeline must be instantiated with pipeline.instantiate(parameters) before it can be applied."
306 )
308 try:
309 self.instantiate(default_parameters)

RuntimeError: A pipeline must be instantiated with pipeline.instantiate(parameters) before it can be applied.

Minimal reproduction example (MRE)

https://colab.research.google.com/drive/1PECWh2shDzR0bGo_nbUz3o3kG2tie0_F?usp=sharing

@hbredin
Copy link
Member

hbredin commented Apr 21, 2024

Please update the MRE to also take care of installing dependencies.
It fails at the first cell.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants