@@ -389,7 +389,8 @@ <h2 id="zero-shot-vs-one-shot-demonstrations">Zero-Shot vs. One-Shot Demonstrati
389
389
< div class ="article-block ">
390
390
< h2 id ="multilingual-and-cross-lingual-capabilities-demonstrations "> Multilingual and Cross-Lingual Capabilities
391
391
Demonstrations</ h2 >
392
- < p > Speech-02-HD maintain high naturalness in less common languages while demonstrating significant advantages in
392
+ < p > Speech-02-HD maintains high naturalness in less common languages while demonstrating significant advantages
393
+ in
393
394
Standard
394
395
Chinese pronunciation accuracy.</ p >
395
396
< div class ="scroll-wrapper " style ="margin-top: 2rem; ">
@@ -514,7 +515,8 @@ <h2 id="multilingual-and-cross-lingual-capabilities-demonstrations">Multilingual
514
515
</ tbody >
515
516
</ table >
516
517
</ div >
517
- < div class ="scroll-wrapper " style ="margin-top: 3rem; ">
518
+ < p style ="margin-top: 4rem; "> Speech-02-HD has superior performance in zero-shot cross-lingual scenarios.</ p >
519
+ < div class ="scroll-wrapper " style ="margin-top: 2rem; ">
518
520
< table style ="width: 100%; ">
519
521
< tbody >
520
522
< tr class ="border-bottom-thin ">
@@ -528,7 +530,7 @@ <h2 id="multilingual-and-cross-lingual-capabilities-demonstrations">Multilingual
528
530
</ tr >
529
531
< tr class ="border-bottom-thin ">
530
532
< th > English</ th >
531
- < td > Mandarin</ td >
533
+ < th > Mandarin</ th >
532
534
< td >
533
535
< audio class ="audio-sm " src ="assets/audios/Wong_Sourse.mp3 " controls > </ audio >
534
536
</ td >
@@ -551,7 +553,7 @@ <h2 id="multilingual-and-cross-lingual-capabilities-demonstrations">Multilingual
551
553
</ tr >
552
554
< tr class ="border-bottom-thin ">
553
555
< th > Mandarin</ th >
554
- < td > Cantonese</ td >
556
+ < th > Cantonese</ th >
555
557
< td >
556
558
< audio class ="audio-sm " src ="assets/audios/ShiBanYu_Sourse.mp3 " controls > </ audio >
557
559
</ td >
@@ -572,7 +574,7 @@ <h2 id="multilingual-and-cross-lingual-capabilities-demonstrations">Multilingual
572
574
</ tr >
573
575
< tr class ="border-bottom-thin ">
574
576
< th > Mandarin</ th >
575
- < td > English</ td >
577
+ < th > English</ th >
576
578
< td >
577
579
< audio class ="audio-sm " src ="assets/audios/ShuanQ_Sourse.mp3 " controls > </ audio >
578
580
</ td >
@@ -593,7 +595,7 @@ <h2 id="multilingual-and-cross-lingual-capabilities-demonstrations">Multilingual
593
595
</ tr >
594
596
< tr class ="border-bottom-thin ">
595
597
< th > English</ th >
596
- < td > Spanish</ td >
598
+ < th > Spanish</ th >
597
599
< td >
598
600
< audio class ="audio-sm " src ="assets/audios/CoCo_Sourse.mp3 " controls > </ audio >
599
601
</ td >
@@ -614,7 +616,7 @@ <h2 id="multilingual-and-cross-lingual-capabilities-demonstrations">Multilingual
614
616
</ tr >
615
617
< tr class ="border-bottom-thin ">
616
618
< th > Japanese</ th >
617
- < td > Korean</ td >
619
+ < th > Korean</ th >
618
620
< td >
619
621
< audio class ="audio-sm " src ="assets/audios/Powerful_Girl_Sourse.mp3 " controls > </ audio >
620
622
</ td >
@@ -773,7 +775,7 @@ <h3>DEMO</h3>
773
775
< th scope ="col "> Neutral</ th >
774
776
< th scope ="col " style ="min-width: 120px; "> Emotion</ th >
775
777
< th scope ="col "> Text</ th >
776
- < th scope ="col "> Emotion-Infused Audio</ th >
778
+ < th scope ="col "> Emotion Control Audio</ th >
777
779
</ tr >
778
780
< tr class ="border-bottom-thin ">
779
781
< td >
0 commit comments