Search code, repositories, users, issues, pull requests...

BibTex entry

@inproceedings{zhao:2022:naacl,
author = {Y. Zhao and J. Hessel and Y. Yu and X. Lu and R. Zellers and Y. Choi},
title = {Connecting the Dots between Audio and Text without Parallel Data through Visual Knowledge Transfer},
booktitle = {2022 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
month = {Jul.},
year = {2022}}

Joint Speech Recognition and Audio Captioning

Information

Reference

C. Narisetty, E. Tsunoo, X. Chang, Y. Kashiwagi, M. Hentschel, and S. Watanabe, "Joint Speech Recognition and Audio Captioning" in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) 2022, May 2022

Paper links

BibTex entry

@inproceedings{narisetty:2022:icassp,
author = {C. Narisetty and E. Tsunoo and X. Chang and Y. Kashiwagi and M. Hentschel and S. Watanabe},
title = {Joint Speech Recognition and Audio Captioning},
booktitle = {ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
month = {May},
year = {2022}}

Can Audio Captions Be Evaluated With Image Caption Metrics?

Information

Reference

Z. Zhou, Z. Zhang, X. Xu, Z. Xie, M. Wu, and K. Q. Zhu, "Can Audio Captions Be Evaluated With Image Caption Metrics?," in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) 2022, May 2022

Paper links

BibTex entry

@inproceedings{zhou:2022:icassp,
author = {Z. Zhou and Z. Zhang and X. Xu and Z. Xie and M. Wu and K. Q. Zhu},
title = {Can Audio Captions Be Evaluated With Image Caption Metrics?},
booktitle = {ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
month = {May},
year = {2022}}

Diverse Audio Captioning via Adversarial Training

Information

Reference

X. Mei, X. Liu, J. Sun, M. D. Plumbley, and W. Wang, "Diverse Audio Captioning via Adversarial Training," in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) 2022, May 2022

Paper links

BibTex entry

@inproceedings{mei:2022:icassp,
author = {X. Mei and X. Liu and J. Sun and M. D. Plumbley and W. Wang},
title = {Diverse Audio Captioning via Adversarial Training},
booktitle = {ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
month = {May},
year = {2022}}

Unsupervised Audio-Caption Aligning Learns Correspondences Between Individual Sound Events and Textual Phrases

Information

Reference

H. Xie, O. Räsänen, K. Drossos, and T. Virtanen, "Unsupervised Audio-Caption Aligning Learns Correspondences Between Individual Sound Events and Textual Phrases," in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) 2022, May 2022

Paper links

BibTex entry

@inproceedings{xie:2022:icassp,
author = {H. Xie and O. R\"{a}s\"{a}nen and K. Drossos and T. Virtanen},
title = {Unsupervised Audio-Caption Aligning Learns Correspondences Between Individual Sound Events and Textual Phrases},
booktitle = {ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
month = {May},
year = {2022}}

Automated Audio Captioning Using Transfer Learning and Reconstruction Latent Space Similarity Regularization

Information

Reference

A. Koh, X. Fuzhao, and C. E. Siong, "Automated Audio Captioning Using Transfer Learning and Reconstruction Latent Space Similarity Regularization," in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) 2022, May 2022

Paper links

BibTex entry

@inproceedings{koh:2022:icassp,
author = {A. Koh and X. Fuzhao and C. E. Siong},
title = {Automated Audio Captioning Using Transfer Learning and Reconstruction Latent Space Similarity Regularization},
booktitle = {ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
month = {May},
year = {2022}}

Year 2021

Improving the Performance of Automated Audio Captioning via Integrating the Acoustic and Semantic Information

Information

Reference

Z. Ye, H. Wang, D. Yang, and Y. Zou, "Improving the Performance of Automated Audio Captioning via Integrating the Acoustic and Semantic Information," in Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop, Nov. 2021

Paper links

BibTex entry

@inproceedings{ye:2021:dcase,
author = {Z. Ye and H. Wang and D. Yang and Y. Zou},
title = {Improving the Performance of Automated Audio Captioning via Integrating the Acoustic and Semantic Information},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop},
month = {Nov.},
year = {2021}}

Continual Learning For Automated Audio Captioning Using The Learning Without Forgetting Approach

Information

Reference

J. Berg and K. Drossos, "Continual Learning For Automated Audio Captioning Using The Learning Without Forgetting Approach," in Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop, Nov. 2021

Paper links

Code

BibTex entry

@inproceedings{berg:2021:dcase,
author = {J. Berg and K. Drossos},
title = {Continual Learning For Automated Audio Captioning Using The Learning Without Forgetting Approach},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop},
month = {Nov.},
year = {2021}}

Evaluating Off-the-Shelf Machine Listening and Natural Language Models for Automated Audio Captioning

Information

Reference

B. Weck, X. Favory, K. Drossos, and X. Serra, "Evaluating Off-the-Shelf Machine Listening and Natural Language Models for Automated Audio Captioning," in Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop, Nov. 2021

Paper links

BibTex entry

@inproceedings{weck:2021:dcase,
author = {B. Weck and X. Favory and K. Drossos and X. Serra},
title = {Evaluating Off-the-Shelf Machine Listening and Natural Language Models for Automated Audio Captioning},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop},
month = {Nov.},
year = {2021}}

Audio Captioning Transformer

Information

Reference

X. Mei, X. Liu, Q. Huang, M. D. Plumbley, and W. Wang, "Audio Captioning Transformer," in Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop, Nov. 2021

Paper links

BibTex entry

@inproceedings{mei:2021:dcase,
author = {X. Mei and X. Liu and Q. Huang and M. D. Plumbley and W. Wang},
title = {Audio Captioning Transformer},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop},
month = {Nov.},
year = {2021}}

CL4AC: A Contrastive Loss For Audio Captioning

Information

Reference

X. Liu et al,, "CL4AC: A Contrastive Loss For Audio Captioning," in Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop, Nov. 2021

Paper links

Code

BibTex entry

@inproceedings{liu:2021:dcase,
author = {X. Liu and Q. Huang and X. Mei and T. Ko and H. L. Tang and M. D. Plumbley and W. Wang},
title = {{CL4AC}: A Contrastive Loss For Audio Captioning},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Workshop},
month = {Nov.},
year = {2021}}

Visually Exploring Multi-Purpose Audio Data

Information

Reference

D. Heise and H. L. Bear, "Visually Exploring Multi-Purpose Audio Data,"" IEEE 23rd International Workshop on Multimedia Signal Processing (MMSP), Oct. 2021

Paper links

BibTex entry

@inproceedings{heise:2021:mmsp,
author = {D. Heise and H. L. Bear},
title = {Visually Exploring Multi-Purpose Audio Data},
booktitle = {2021 IEEE 23rd International Workshop on Multimedia Signal Processing (MMSP)},
month = {Oct.},
year = {2021}}

An Automated Audio Captioning Approach Utilising a Resnet-Based Encoder

Information

Reference

A. Gebhard, A. Triantafyllopoulos, A. Baird, and B. Schuller, "An Automated Audio Captioning Approach Utilising a Resnet-Based Encoder," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

BibTex entry

@techreport{gebhard:2021:dcase:tech-report,
	author = {A. Gebhard and A. Triantafyllopoulos and A. Baird and B. Schuller},
	title = {An Automated Audio Captioning Approach Utilising a Resnet-Based Encoder},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

Improving The Performance Of Automated Audio Captioning Via Integrating The Acoustic And Textual Information

Information

Reference

Z. Ye, H. Wang, D. Yang, and Y. Zou, "Improving The Performance Of Automated Audio Captioning Via Integrating The Acoustic And Textual Information," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

Code

BibTex entry

@techreport{ye:2021:dcase:tech-report,
	author = {Z. Ye and H. Wang and D. Yang and Y. Zou},
	title = {Improving The Performance Of Automated Audio Captioning Via Integrating The Acoustic And Textual Information},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

Automated Audio Captioning With MLP-Mixer And Pre-Trained Encoder

Information

Reference

F. Xiao, J. Guan, and Q. Kong, "Automated Audio Captioning With MLP-Mixer And Pre-Trained Encoder," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

BibTex entry

@techreport{xiao:2021:dcase:tech-report,
	author = {F. Xiao and J. Guan and Q. Kong},
	title = {Automated Audio Captioning With {MLP}-Mixer And Pre-Trained Encoder},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

The SJTU System For DCASE2021 Challenge Task 6: Audio Captioning Based On Encoder Pre-Training And Reinforcement Learning

Information

Reference

X. Xu, Z. Xie, M. Wu, and Kai Yu, "The SJTU System For DCASE2021 Challenge Task 6: Audio Captioning Based On Encoder Pre-Training And Reinforcement Learning," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

BibTex entry

@techreport{xu:2021:dcase:tech-report,
	author = {X. Xu and Z. Xie and M. Wu and Kai Yu},
	title = {The {SJTU} System For {DCASE}2021 Challenge Task 6: Audio Captioning Based On Encoder Pre-Training And Reinforcement Learning},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

Audio Captioning Using Sound Event Detection

Information

Reference

A. Ö. Eren and M. Sert, "Audio Captioning Using Sound Event Detection," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

BibTex entry

@techreport{eren:2021:dcase:tech-report,
	author = {A. \"{O}. Eren and M. Sert},
	title = {Audio Captioning Using Sound Event Detection},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

An Encoder-Decoder Based Audio Captioning System With Transfer And Reinforcement Learning For DCASE Challenge 2021 Task 6

Information

Reference

X. Mei et al, "An Encoder-Decoder Based Audio Captioning System With Transfer And Reinforcement Learning For DCASE Challenge 2021 Task 6," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

Code

BibTex entry

@techreport{mei:2021:dcase:tech-report,
	author = {X. Mei and Q. Huang and X. Liu and G. Chen and J. Wu and Y. Wu and J. Zhao and S. Li and T. Ko and H. L. Tang and X. Shao and M. D. Plumbley and W. Wang},
	title = {An Encoder-Decoder Based Audio Captioning System With Transfer And Reinforcement Learning For {DCASE} Challenge 2021 Task 6},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

IRIT-UPS DCASE 2021 Audio Captioning System

Information

Reference

E. Labbé and T. Pellegrini, "IRIT-UPS DCASE 2021 Audio Captioning System," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

Code

BibTex entry

@techreport{labbe:2021:dcase:tech-report,
	author = {E. Labbe\'{e} and T. Pellegrini},
	title = {{IRIT-UPS DCASE} 2021 Audio Captioning System},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

The DCASE 2021 Challenge Task 6 System: Automated Audio Captioning With Weakly Supervised Pre-Traing And Word Selection Methods

Information

Reference

W. Yuan, Q. Han, D. Liu, X. Li, and Z. Yang, "The DCASE 2021 Challenge Task 6 System: Automated Audio Captioning With Weakly Supervised Pre-Traing And Word Selection Methods," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

BibTex entry

@techreport{yuan:2021:dcase:tech-report,
	author = {W. Yuan and Q. Han and D. Liu and X. Li and Z. Yang},
	title = {The {DCASE} 2021 Challenge Task 6 System: Automated Audio Captioning With Weakly Supervised Pre-Traing And Word Selection Methods},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

Leveraging State-Of-The-Art ASR Techniques To Audio Captioning

Information

Reference

C. Narisetty, T. Hayashi, R. Ishizaki, S. Watanabe, and K. Takeda, "Leveraging State-Of-The-Art ASR Techniques To Audio Captioning," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

BibTex entry

@techreport{narisetty:2021:dcase:tech-report,
	author = {C. Narisetty and T. Hayashi and R. Ishizaki and S. Watanabe and K. Takeda},
	title = {Leveraging State-Of-The-Art {ASR} Techniques To Audio Captioning},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

CAU Submission To DCASE 2021 Task6: Transformer Followed By Transfer Learning For Audio Captioning

Information

Reference

H. Won, B. Kim, I. -Y. Kwak, and C. Lim, "CAU Submission To DCASE 2021 Task6: Transformer Followed By Transfer Learning For Audio Captioning," DCASE2021 Challenge, Tech. Rep., Jun. 2021

Paper links

BibTex entry

@techreport{won:2021:dcase:tech-report,
	author = {H. Won and B. Kim and I. -Y. Kwak and C. Lim},
	title = {{CAU} Submission To {DCASE} 2021 Task6: Transformer Followed By Transfer Learning For Audio Captioning},
	institution = {DCASE2021 Challenge},
	year = {2021},
	month = {Jun.}}

Audio Retrieval with Natural Language Queries

Information

Reference

A. - M. Oncescu, A. S. Koepke, J. F. Henriques, Z. Akata, and S. Albanie, "Audio Retrieval with Natural Language Queries," in arXiv:2105.02192[cs.IR], 2021, May 2021

Paper links

Online demo

Online demo

BibTex entry

@misc{eren:2021:arxiv,
  title={Audio Retrieval with Natural Language Queries}, 
  author={Andreea-Maria Oncescu and A. Sophia Koepke and Jo\~{a}o F. Henriques and Zeynep Akata and Samuel Albanie},
  year={2021},
  eprint={2105.02192},
  archivePrefix={arXiv},
  primaryClass={cs.SD}}

Audio Captioning with Composition of Acoustic and Semantic Information

Information

Reference

A. Ö. Eren and M. Sert, "Audio Captioning with Composition of Acoustic and Semantic Information," in arXiv:2105.06355[cs.SD], 2021, May 2021

Paper links

BibTex entry

@misc{eren:2021:arxiv,
  title={Audio Captioning with Composition of Acoustic and Semantic Information}, 
  author={Ay\c{s}e\"{u} \"{O}zkaya Eren and Mustafa Sert},
  year={2021},
  eprint={2105.06355},
  archivePrefix={arXiv},
  primaryClass={cs.SD}}

Text-to-Audio Grounding: Building Correspondence Between Captions and Sound Events

Information

Reference

X. Xu, H. Dinkel, M. Wu, and K. Yu, "Text-to-Audio Grounding: Building Correspondence Between Captions and Sound Events," in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Jun. 2021, pp. 606-610

Paper links

BibTex entry

@inproceedings{xu:2021:ICASSP:01,
  author={Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Kai Yu},
  booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
  title={Text-to-Audio Grounding: Building Correspondence Between Captions and Sound Events}, 
  year={2021},
  pages={606--610},
  doi={10.1109/ICASSP39728.2021.9414834}}

Investigating Local and Global Information for Automated Audio Captioning with Transfer Learning

Information

Reference

X. Xu, H. Dinkel, M. Wu, Z. Xie, and K. Yu, "Investigating Local and Global Information for Automated Audio Captioning with Transfer Learning," in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Jun. 2021, pp. 905-909

Paper links

BibTex entry

@inproceedings{xu:2021:ICASSP:02,
  author={Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Zeyu Xie and Kai Yu},
  booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
  title={Investigating Local and Global Information for Automated Audio Captioning with Transfer Learning}, 
  year={2021},
  pages={905--909},
  doi={10.1109/ICASSP39728.2021.9413982}}

Year 2020

Audio Captioning Based on Combined Audio and Semantic Embeddings

Information

Reference

A. Ö. Eren and M. Sert, "Audio Captioning Based on Combined Audio and Semantic Embeddings," in proceedings of 2020 IEEE International Symposium on Multimedia (ISM), Dec. 2020

Paper links

BibTex entry

@INPROCEEDINGS{eren:2020:ism,
author={A. \"{O}. {Eren} and M. {Sert}},
booktitle={2020 IEEE International Symposium on Multimedia (ISM)}, 
title={Audio Captioning Based on Combined Audio and Semantic Embeddings}, 
year={2020},
pages={41-48},
doi={10.1109/ISM.2020.00014}}

Audio Captioning using Pre-Trained Large-Scale Language Model Guided by Audio-based Similar Caption Retrieval

Information

Reference

Y. Koizumi, Y. Ohishi, D. Niizumi, D. Takeuchi, and M. Yasuda, "Audio Captioning using Pre-Trained Large-Scale Language Model Guided by Audio-based Similar Caption Retrieval," in arXiv:2012.07331 [eess.AS], 2020, Nov. 2020

Paper links

BibTex entry

@misc{koizumi:2020:arxiv,
  author = {Y. Koizumi and Y. Ohishi and D. Niizumi and D. Takeuchi and M. Yasuda},
  title = {Audio Captioning using Pre-Trained Large-Scale Language Model Guided by Audio-based Similar Caption Retrieval},
  year={2020},
  eprint={2012.07331},
  archivePrefix={arXiv},
  primaryClass={eess.AS}}

A CRNN-GRU Based Reinforcement Learning Approach to Audio Captioning

Information

Reference

X. Xu, H. Dinkel, M. Wu, and K. Yu, "A CRNN-GRU Based Reinforcement Learning Approach to Audio Captioning," in Detection and Classification of Acoustic Scenes and Events (DCASE) 2020, Nov. 2020

Paper links

Code

BibTex entry

@inproceedings{xu:2020:dcase,
  author = {X. Xu and H. Dinkel and M. Wu and K. Yu},
  title = {A {CRNN-GRU} Based Reinforcement Learning Approach to Audio Captioning},
  booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop ({DCASE2020})},
  address = {Tokyo, Japan},
  month = {Nov.},
  year = {2020},
  pages = {225--229},}

Audio Captioning Based on Transformer and Pre-Trained CNN

Information

Reference

K. Chen, Y. Wu, Z. Wang, X. Zhang, F. Nian, S. Li, and X. Shao, "Audio Captioning Based on Transformer and Pre-Trained CNN," in Detection and Classification of Acoustic Scenes and Events (DCASE) 2020, Nov. 2020

Paper links

Code

BibTex entry

@inproceedings{chen:2020:dcase,
  author = {K. Chen and Y. Wu and Z. Wang and X. Zhang and F. Nian and S. Li and X. Shao},
  title = {Audio Captioning Based on Transformer and Pre-Trained {CNN}},
  booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop ({DCASE2020})},
  address = {Tokyo, Japan},
  month = {Nov.},
  year = {2020},
  pages = {21--25},}

WaveTransformer: A Novel Architecture for Audio Captioning Based on Learning Temporal and Time-Frequency Information

Information

Reference

A. Tran, K. Drossos, and T. Virtanen, "WaveTransformer: A Novel Architecture for Audio Captioning Based on Learning Temporal and Time-Frequency Information," in arXiv:2010.11098 [cs.SD], 2020

Paper links

Code

Online demo

Online demo

BibTex entry

@misc{tran:2020:wavetransformer,
  title={WaveTransformer: A Novel Architecture for Audio Captioning Based on Learning Temporal and Time-Frequency Information}, 
  author={A. Tran and K. Drossos and T. Virtanen},
  year={2020},
  eprint={2010.11098},
  archivePrefix={arXiv},
  primaryClass={cs.SD}}

Effects of Word-frequency based Pre- and Post- Processings for Audio Captioning

Information

Reference

D. Takeuchi, Y. Koizumi, Y. Ohishi, N. Harada, and K. Kashino, "Effects of Word-frequency based Pre- and Post- Processings for Audio Captioning," in Detection and Classification of Acoustic Scenes and Events (DCASE) 2020, Nov. 2020

Paper links

BibTex entry

@inproceedings{takeuchi:2020:dcase,
title={Effects of Word-frequency based Pre- and Post- Processings for Audio Captioning},
author={D. Takeuchi and Y. Koizumi and Y. Ohishi and N. Harada and K. Kashino},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop ({DCASE2020})},
address = {Tokyo, Japan},
month = {Nov.},
year = {2020},
pages = {190--194},}

A Transformer-based Audio Captioning Model with Keyword Estimation

Information

Reference

Y. Koizumi, R. Masumura, K. Nishida, M. Yasuda, and S. Saito, "A Transformer-based Audio Captioning Model with Keyword Estimation," in INTERSPEECH, 2020

Paper links

INTERSPEECH 2020

BibTex entry

@inproceedings{koizumi:2020:interspeech,
title={A Transformer-based Audio Captioning Model with Keyword Estimation},
author={Y. Koizumi and R. Masumura and K. Nishida and M. Yasuda and S. Saito},
year={2020},
booktitle={INTERSPEECH 2020},
month={Oct.},}

Multi-task Regularization Based on Infrequent Classes for Audio Captioning

Information

Reference

E. Çakır, K. Drossos, and T. Virtanen, "Multi-task Regularization Based on Infrequent Classes for Audio Captioning," in Detection and Classification of Acoustic Scenes and Events (DCASE) 2020, Nov. 2020

Paper links

BibTex entry

@inproceedings{cakir:2020:arxiv-a,
title={Multi-task Regularization Based on Infrequent Classes for Audio Captioning},
author={E. \c{C}ak{\i}r and K. Drossos and T. Virtanen},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop ({DCASE2020})},
address = {Tokyo, Japan},
month = {Nov.},
year = {2020},
pages = {6--10},}

Temporal Sub-sampling of Audio Feature Sequences for Automated Audio Captioning

Information

Reference

K. Nguyen, K. Drossos, and T. Virtanen, "Temporal Sub-sampling of Audio Feature Sequences for Automated Audio Captioning," in Detection and Classification of Acoustic Scenes and Events (DCASE) 2020, Nov. 2020

Paper links

BibTex entry

@inproceedings{nguyen:2020:dcase,
title={Temporal Sub-sampling of Audio Feature Sequences for Automated Audio Captioning},
author={K. Nguyen and K. Drossos and T. Virtanen},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop ({DCASE2020})},
address = {Tokyo, Japan},
month = {Nov.},
year = {2020},
pages = {110--114},}

The SJTU Submission for DCASE2020 Task 6: A CRNN-GRU Based Reinforcement Learning Approach to Audiocaption

Information

Reference

X. Xu, H. Dinkel, M. Wu, and K. Yu, "The SJTU Submission for DCASE2020 Task 6: A CRNN-GRU Based Reinforcement Learning Approach to Audiocaption," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

BibTex entry

@techreport{xu:2020:dcase:tech-report,
author = {X. Xu and H. Dinkel and M. Wu and K. Yu},
title = {The SJTU Submission for DCASE2020 Task 6: A CRNN-GRU Based Reinforcement Learning Approach to Audiocaption},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

Audio Captioning Based on Transformer and Pre-Training for 2020 DCASE Audio Captioning Challenge

Information

Reference

Y. Wu, K. Chen, Z. Wang, X. Zhang, F. Nian, S. Li, and X. Shao, "Audio Captioning Based on Transformer and Pre-Training for 2020 DCASE Audio Captioning Challenge," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

Code

Data

BibTex entry

@techreport{wu-y:2020:dcase:tech-report,
author = {Y. Wu and K. Chen and Z. Wang and X. Zhang and F. Nian and S. Li and X. Shao},
title = {Audio Captioning Based on Transformer and Pre-Training for 2020 DCASE Audio Captioning Challenge},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

Automatic Audio Captioning System Based on Convolutional Neural Network

Information

Reference

Q. Wu, S. Tao, and X. Yang, "Automatic Audio Captioning System Based on Convolutional Neural Network," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

Code

Data

Zenodo

BibTex entry

@techreport{wu-q:2020:dcase:tech-report,
author = {Q. Wu and S. Tao and X. Yang},
title = {Automatic Audio Captioning System Based on Convolutional Neural Network},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

Automated Audio Captioning With Temporal Attention

Information

Reference

H. Wang, B. Yang, Y. Zou, and D. Chong, "Automated Audio Captioning With Temporal Attention," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

BibTex entry

@techreport{wang:2020:dcase:tech-report,
author = {H. Wang and B. Yang and  Y. Zou and D. Chong},
title = {Automated Audio Captioning With Temporal Attention},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

Audio Captioning With the Transformer

Information

Reference

Anna Shi, "Audio Captioning With the Transformer," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

BibTex entry

@techreport{shi:2020:dcase:tech-report,
author = {A. Shi},
title = {Audio Captioning With the TransformerAutomated Audio Captioning},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

Automated Audio Captioning

Information

Reference

A. Sampathkumar and D. Kowerko, "Automated Audio Captioning," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

BibTex entry

@techreport{sampathkumar:2020:dcase:tech-report,
author = {A. Sampathkumar and D. Kowerko},
title = {Automated Audio Captioning},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

IRIT-UPS DCASE 2020 audio captioning system

Information

Reference

Thomas Pellegrini, "IRIT-UPS DCASE 2020 audio captioning system," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

Code

Data

Zenodo

BibTex entry

@techreport{pellegrini:2020:dcase:tech-report,
author = {T. Pellegrini},
title = {IRIT-UPS DCASE 2020 audio captioning system},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

Task 6 DCASE 2020: Listen Carefully and Tell: An Audio Captioning System Based on Residual Learning and Gammatone Audio Representation

Information

Reference

J. Naranjo-Alcazar, S. Perez-Castanos, P. Zuccarello, and M. Cobos, "Task 6 DCASE 2020: Listen Carefully and Tell: An Audio Captioning System Based on Residual Learning and Gammatone Audio Representation," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

BibTex entry

@techreport{naranjo-alcazar:2020:dcase:tech-report,
author = {J. Naranjo-Alcazar and S. Perez-Castanos and P. Zuccarello and M. Cobos},
title = {Task 6 DCASE 2020: Listen Carefully and Tell: An Audio Captioning System Based on Residual Learning and Gammatone Audio Representation},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

Automated Audio Captioning

Information

Reference

N. Kuzmin and A. Dyakonov, "Automated Audio Captioning," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

Code

Data

Zenodo

BibTex entry

@techreport{kuzmin:2020:dcase:tech-report,
author = {N. Kuzmin and A. Dyakonov},
title = {Automated Audio Captioning},
institution = {DCASE2020 Challenge},
year = {2020},
month = {Jun.}}

The NTT DCASE2020 Challenge Task 6 System: Automated Audio Captioning With Keywords and Sentence Length Estimation

Information

Reference

Y. Koizumi, D. Takeuchi, Y. Ohishi, N. Harada, and K. Kashino, "The NTT DCASE2020 Challenge Task 6 System: Automated Audio Captioning With Keywords and Sentence Length Estimation," DCASE2020 Challenge, Tech. Rep., Jun. 2020

Paper links

BibTex entry

 @techreport{koizumi:2020:dcase:tech-report,
 author = {Y. Koizumi and D. Takeuchi and Y. Ohishi and N. Harada and K. Kashino},
 title = {The {NTT} {DCASE2020} Challenge Task 6 System: Automated Audio Captioning With Keywords and Sentence Length Estimation},
 institution = {DCASE2020 Challenge},
 year = {2020},
 month = {Jun.}}

Audio Captioning using Gated Recurrent Units

Information

Reference

A. O. Eren and M. Sert, "Audio Captioning using Gated Recurrent Units," in arXiv:2006.03391 [cs.SD], 2020

Paper links

BibTex entry

@misc{eren:2020:arxiv,
title={Audio Captioning using Gated Recurrent Units},
author={A. \"{O}. Eren and M. Sert},
year={2020},
eprint={2006.03391},
archivePrefix={arXiv},
primaryClass={cs.SD}}

Clotho: An Audio Captioning Dataset

Information

Reference

K. Drossos, S. Lipping, and T. Virtanen, "Clotho: An audio captioning dataset," in ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2020, pp. 736–740

Paper links

BibTex entry

@inproceedings{drossos:2020:icassp,
title={Clotho: An Audio Captioning Dataset},
author={Drossos, K. and Lipping, S. and Virtanen, T.},
booktitle={ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages={736--740},
year={2020}}

Year 2019

Audio Caption in a Car Setting with a Sentence-Level Loss

Information

Reference

X. Xu, H. Dinkel, M. Wu, and K. Yu, "Audio Caption in a Car Setting with a Sentence-Level Loss," in 12th International Symposium on Chinese Spoken Language Processing (ISCSLP), 2021, pp. 1-5

Paper links

BibTex entry

@inproceedings{xu:2021:ISCSLP,
author={Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Kai Yu},
booktitle={12th International Symposium on Chinese Spoken Language Processing (ISCSLP)}, 
title={Audio Caption in a Car Setting with a Sentence-Level Loss}, 
year={2021},
pages={1-5},
doi={10.1109/ISCSLP49672.2021.9362117}}

Crowdsourcing a Dataset of Audio Captions

Information

Reference

S. Lipping, K. Drossos, and T. Virtanen, "Crowdsourcing a dataset of audio captions," in Detection and Classification of Acoustic Scenes and Events (DCASE) 2019, Oct. 2019

Paper links

BibTex entry

@inproceedings{lipping:2019:dcase,
author={S. Lipping and K. Drossos and T. Virtanen},
title={Crowdsourcing a Dataset of Audio Captions},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events 2019 Workshop (DCASE2019)},
address = {New York University, NY, USA},
month = {Oct.},
year = {2019},
pages = {139--143},
ISSN={2379-190X}}

Neural Audio Captioning Based On Conditional Sequence-to-Sequence Model

Information

Reference

Shota Ikawa and Kunio Kashino, "Neural Audio Captioning Based On Conditional Sequence-to-Sequence Model," in Workshop of Detection and Classification of Acoustic Scenes and Events (DCASE), Oct. 2019.

Paper links

BibTex entry

@inproceedings{ikawa:2019:dcase,
author = {S. Ikawa and K. Kashino},
title = {Neural Audio Captioning Based On Conditional Sequence-to-Sequence Model},
booktitle = {Proceedings of the Detection and Classification of Acoustic Scenes and Events 2019 Workshop ({DCASE2019})},
address = {New York University, NY, USA},
month = {Oct.},
year = {2019},
pages = {99--103},
ISSN={2379-190X}}

AudioCaps: Generating captions for audios in the wild

Information

Reference

C. D. Kim, B. Kim, H. Lee, and G. Kim, "AudioCaps: Generating captions for audios in the wild,” in Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), Minneapolis, Minnesota, Jun. 2019, pp. 119–132, Association for Computational Linguistics

Paper links

ACLweb

Code

Data

BibTex entry

@inproceedings{kim:2019:nacacl,
title = {{A}udio{C}aps: Generating Captions for Audios in The Wild},
author = {C. D. Kim and B. Kim and H. Lee and G. Ki}",
booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
month = {Jun.},
year = {2019},
address = {Minneapolis, Minnesota},
publisher = {Association for Computational Linguistics},
doi = {10.18653/v1/N19-1011},
pages = {119--132}}

Audio caption: Listen and tell

Information

Reference

M. Wu, H. Dinkel, and K. Yu, "Audio caption: Listen and tell," in 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), May 2019, pp. 830–834

Paper links

BibTex entry

@inproceedings{wu:2019:icassp,
author={M. {Wu} and H. {Dinkel} and K. {Yu}},
booktitle={2019 IEEE International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
title={Audio Caption: Listen and Tell},
year={2019},
pages={830-834},
doi={10.1109/ICASSP.2019.8682377},
ISSN={2379-190X},
month={May}}

Year 2017

Automated Audio Captioning with Recurrent Neural Networks

Information

Reference

K. Drossos, S. Adavanne, and T. Virtanen, "Automated audio captioning with recurrent neural networks," in 2017 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), Oct. 2017, pp. 374–378

Paper links