1
+ const request = require ( 'request' ) ;
2
+ const escapeStringRegexp = require ( 'escape-string-regexp' ) ;
3
+ const async = require ( 'async' ) ;
4
+ const fs = require ( 'fs' ) ;
5
+ const MultiStream = require ( 'multistream' ) ;
6
+ const fakeUa = require ( 'fake-useragent' ) ;
7
+
8
+ const GOOGLE_TTS_URL = 'http://translate.google.com/translate_tts' ;
9
+ const MAX_CHARS = 100 ;
10
+ const LANGUAGES = {
11
+ 'af' : 'Afrikaans' ,
12
+ 'sq' : 'Albanian' ,
13
+ 'ar' : 'Arabic' ,
14
+ 'hy' : 'Armenian' ,
15
+ 'ca' : 'Catalan' ,
16
+ 'zh' : 'Chinese' ,
17
+ 'zh-cn' : 'Chinese (Mandarin/China)' ,
18
+ 'zh-tw' : 'Chinese (Mandarin/Taiwan)' ,
19
+ 'zh-yue' : 'Chinese (Cantonese)' ,
20
+ 'hr' : 'Croatian' ,
21
+ 'cs' : 'Czech' ,
22
+ 'da' : 'Danish' ,
23
+ 'nl' : 'Dutch' ,
24
+ 'en' : 'English' ,
25
+ 'en-au' : 'English (Australia)' ,
26
+ 'en-uk' : 'English (United Kingdom)' ,
27
+ 'en-us' : 'English (United States)' ,
28
+ 'eo' : 'Esperanto' ,
29
+ 'fi' : 'Finnish' ,
30
+ 'fr' : 'French' ,
31
+ 'de' : 'German' ,
32
+ 'el' : 'Greek' ,
33
+ 'ht' : 'Haitian Creole' ,
34
+ 'hi' : 'Hindi' ,
35
+ 'hu' : 'Hungarian' ,
36
+ 'is' : 'Icelandic' ,
37
+ 'id' : 'Indonesian' ,
38
+ 'it' : 'Italian' ,
39
+ 'ja' : 'Japanese' ,
40
+ 'ko' : 'Korean' ,
41
+ 'la' : 'Latin' ,
42
+ 'lv' : 'Latvian' ,
43
+ 'mk' : 'Macedonian' ,
44
+ 'no' : 'Norwegian' ,
45
+ 'pl' : 'Polish' ,
46
+ 'pt' : 'Portuguese' ,
47
+ 'pt-br' : 'Portuguese (Brazil)' ,
48
+ 'ro' : 'Romanian' ,
49
+ 'ru' : 'Russian' ,
50
+ 'sr' : 'Serbian' ,
51
+ 'sk' : 'Slovak' ,
52
+ 'es' : 'Spanish' ,
53
+ 'es-es' : 'Spanish (Spain)' ,
54
+ 'es-us' : 'Spanish (United States)' ,
55
+ 'sw' : 'Swahili' ,
56
+ 'sv' : 'Swedish' ,
57
+ 'ta' : 'Tamil' ,
58
+ 'th' : 'Thai' ,
59
+ 'tr' : 'Turkish' ,
60
+ 'vi' : 'Vietnamese' ,
61
+ 'cy' : 'Welsh'
62
+ }
63
+
64
+ function Text2Speech ( _lang , _debug ) {
65
+ var lang = _lang || 'en' ;
66
+ var debug = _debug || false ;
67
+ lang = lang . toLowerCase ( ) ;
68
+
69
+ if ( ! LANGUAGES [ lang ] )
70
+ throw new Error ( 'Language not supported: ' + lang ) ;
71
+
72
+ var getArgs = getArgsFactory ( lang ) ;
73
+
74
+ return {
75
+ tokenize : tokenize ,
76
+ createServer : ( port ) => createServer ( getArgs , port ) ,
77
+ stream : ( text ) => stream ( getArgs , text ) ,
78
+ save : ( filepath , text , callback ) => save ( getArgs , filepath , text , callback )
79
+ }
80
+ }
81
+
82
+ function save ( getArgs , filepath , text , callback ) {
83
+ var text_parts = tokenize ( text ) ;
84
+ var total = text_parts . length ;
85
+ async . eachSeries ( text_parts , function ( part , cb ) {
86
+ var index = text_parts . indexOf ( part ) ;
87
+ var headers = getHeader ( ) ;
88
+ var args = getArgs ( part , index , total ) ;
89
+ var fullUrl = GOOGLE_TTS_URL + args ;
90
+
91
+ var writeStream = fs . createWriteStream ( filepath , {
92
+ flags : index > 0 ? 'a' : 'w'
93
+ } ) ;
94
+ request ( {
95
+ uri : fullUrl ,
96
+ headers : headers ,
97
+ method : 'GET'
98
+ } )
99
+ . pipe ( writeStream ) ;
100
+ writeStream . on ( 'finish' , cb ) ;
101
+ writeStream . on ( 'error' , cb ) ;
102
+ } , callback ) ;
103
+ }
104
+
105
+ function stream ( getArgs , text ) {
106
+ var text_parts = tokenize ( text ) ;
107
+ var total = text_parts . length ;
108
+
109
+ return MultiStream ( text_parts . map ( function ( part , index ) {
110
+ var headers = getHeader ( ) ;
111
+ var args = getArgs ( part , index , total ) ;
112
+ var fullUrl = GOOGLE_TTS_URL + args
113
+
114
+ return request ( {
115
+ uri : fullUrl ,
116
+ headers : headers ,
117
+ method : 'GET'
118
+ } ) ;
119
+ } ) ) ;
120
+ }
121
+
122
+ function getHeader ( ) {
123
+ var headers = {
124
+ "User-Agent" : fakeUa ( )
125
+ } ;
126
+ //console.log('headers', headers);
127
+ return headers ;
128
+ }
129
+
130
+ function getArgsFactory ( lang ) {
131
+ return function ( text , index , total ) {
132
+ var textlen = text . length ;
133
+ var encodedText = encodeURIComponent ( text ) ;
134
+ var language = lang || 'en' ;
135
+ return `?ie=UTF-8&tl=${ language } &q=${ encodedText } &total=${ total } &idx=${ index } &client=tw-ob&textlen=${ textlen } `
136
+ }
137
+ }
138
+
139
+ function tokenize ( text ) {
140
+ var text_parts = [ ] ;
141
+ if ( ! text )
142
+ throw new Error ( 'No text to speak' ) ;
143
+
144
+ var punc = '¡!()[]¶;|°•—«»≤≥«»‹›\n ' ;
145
+ var punc_list = punc . split ( '' ) . map ( function ( char ) {
146
+ return escapeStringRegexp ( char ) ;
147
+ } ) ;
148
+
149
+ var pattern = punc_list . join ( '|' ) ;
150
+ var parts = text . split ( new RegExp ( pattern ) ) ;
151
+ parts = parts . filter ( p => p . length > 0 ) ;
152
+
153
+ var output = [ ] ;
154
+ var i = 0 ;
155
+ for ( let p of parts ) {
156
+ if ( ! output [ i ] ) {
157
+ output [ i ] = '' ;
158
+ }
159
+ if ( output [ i ] . length + p . length < MAX_CHARS ) {
160
+ output [ i ] += ' ' + p ;
161
+ } else {
162
+ i ++ ;
163
+ output [ i ] = p ;
164
+ }
165
+ }
166
+ output [ 0 ] = output [ 0 ] . substr ( 1 ) ;
167
+ return output ;
168
+ }
169
+
170
+ function createServer ( getArgs , port ) {
171
+ var http = require ( "http" ) ;
172
+ var url = require ( 'url' ) ;
173
+
174
+ var server = http . createServer ( function ( req , res ) {
175
+ var queryData = url . parse ( req . url , true ) . query ;
176
+ var argsCallback = getArgs ;
177
+ if ( queryData && queryData . lang && LANGUAGES [ queryData . lang ] ) {
178
+ argsCallback = getArgsFactory ( queryData . lang ) ;
179
+ }
180
+ if ( queryData && queryData . text ) {
181
+ res . writeHead ( 200 , { 'Content-Type' : 'audio/mpeg' } ) ;
182
+ stream ( argsCallback , queryData . text ) . pipe ( res ) ;
183
+ } else {
184
+ console . log ( req . headers ) ;
185
+ res . writeHead ( 200 , { 'Content-Type' : 'application/json' } ) ;
186
+ res . end ( JSON . stringify ( {
187
+ code : - 1 ,
188
+ message : `Missing text. Please try: ${ req . headers . host } ?text=your+text`
189
+ } ) )
190
+ }
191
+ } ) ;
192
+
193
+ server . listen ( port ) ;
194
+ console . log ( "Text-to-Speech Server running on " + port ) ;
195
+ }
196
+
197
+ module . exports = Text2Speech ;
0 commit comments