Skip to content

Commit 5120cdd

Browse files
committed
Remove HF token from code, use .env instead as described in README.md
1 parent af0adbe commit 5120cdd

File tree

6 files changed

+11
-6
lines changed

6 files changed

+11
-6
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ execute the first pipline only. If your task is to generate text from a given au
3232
an audio file as input - use (1) and (3). If you are training a new model and features is of the highest importance -
3333
go for (1) and (2) and consider expanding the library when you are satisfied with the result (see example 3).
3434

35-
Configuration of all the components is made through the `pipline.yaml` configuration file.
35+
Configuration of all the components is made through the `pipline.yaml` configuration file. If using components that require a huggingface token,
36+
create a `.env` file in the root directory and add the token as `huggingface_ACCESS_TOKEN=<your_token>` (currently required for pyannote-embedding, pyannote-vad and pyannote-sd).
3637

3738
## Pre-processing components
3839
### Filelist-DataFrame Creator

src/notebook_examples/configs/example_pipeline.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ segment_name_separator: "_"
33
intermediate_payload_path: 'results/example'
44
# device: 'cpu' # 'cpu'/'cuda'
55
log_each_x_records: 100
6-
huggingface_ACCESS_TOKEN: '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'
76
sampling_rate: 16000
87

98
preprocessing:

src/notebook_examples/configs/pulp_fiction_pipeline.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ input_dir: '../cut_preprocessed' # 'speech_examples', 'speech_examples_small'
22
segment_name_separator: "_"
33
intermediate_payload_path: 'results/pulp_fiction'
44
# device: 'cpu' # 'cpu'/'cuda'
5-
huggingface_ACCESS_TOKEN: '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'
65
max_workers: 1 # set the number of workers for parallel threads
76
sampling_rate: 16000
87
latent_logger:
@@ -247,7 +246,7 @@ segment_classifier:
247246
gmm_clustering_diarization:
248247
classification_column_name: 'gmm_clustering_diarization'
249248
n_components: 6
250-
covariance_type: 'full' # ‘full’, ‘tied’, ‘diag’, ‘spherical’
249+
covariance_type: 'full' # full�, �tied�, �diag�, �spherical
251250
features_list: # assumes multiple column features are labeled 'i_<component_name>' where i is between
252251
# start_index (inclusive) and stop_index (exclusive)
253252
- speechbrain_embedding:

src/pipeline.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ input_dir: 'speech_examples_small' # 'speech_examples', 'speech_examples_small'
22
segment_name_separator: "_"
33
intermediate_payload_path: 'results'
44
# device: 'cpu' # 'cpu'/'cuda'
5-
huggingface_ACCESS_TOKEN: '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'
65
max_workers: 4 # set the number of workers for parallel threads
76
sampling_rate: 16000
87
latent_logger:

src/vanpy/utils/utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,20 @@ def yaml_placeholder_replacement(full, val=None, initial=True) -> yaml.YAMLObjec
9696
def load_config(config_yaml_path: str = 'pipeline.yaml') -> Dict:
9797
"""
9898
Load a YAML configuration file and replace any placeholders with their corresponding values.
99+
If there is a .env file, load it and add content to config
100+
99101
:param config_yaml_path: path of the configuration file
100102
:return: configuration as a dictionary
101103
"""
102104
with open(config_yaml_path, 'r') as f:
103105
config = yaml.load(f, Loader=yaml.FullLoader)
104106
config = yaml_placeholder_replacement(config)
107+
# if there is a .env file, load it and add content to config
108+
if os.path.exists('.env'):
109+
with open('.env', 'r') as f:
110+
for line in f:
111+
key, value = line.strip().split('=', 1)
112+
config[key] = value
105113
return config
106114

107115

tests/config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
config_test_PyannoteVAD = {
22
'model_params': {},
3-
'huggingface_ACCESS_TOKEN': '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',
43
'performance_measurement': False,
54
'add_segment_metadata': False,
65
'output_dir': 'segmented_audio',

0 commit comments

Comments
 (0)