1
- const fetch = require ( 'node-fetch' ) ,
2
- slugify = require ( 'slugify' ) ,
3
- cheerio = require ( 'cheerio' ) ,
4
- iconv = require ( 'iconv-lite' ) ;
5
-
6
- const baseUrl = 'https://www.pensador.com/' ;
7
-
8
- module . exports = async ( options ) => {
9
- if ( typeof options == 'undefined' ) {
10
- var options = { max : 10 } ;
11
- }
12
- // Default value for search term, Term is not mandatory anymore
13
- var searchTerm = 'frases_curtas' ;
14
- // if term is set then is changed to a new value
15
- if ( options . term != 'undefined' ) {
16
- searchTerm = slugify ( `frases de ${ options . term } ` , {
17
- replacement : '_' ,
18
- remove : / [ * + ~ . ( ) ' " ! : @ ] / g,
19
- lower : true ,
20
- } ) ;
21
- }
22
-
23
- let keepGoing = true ;
24
- let current = 1 ;
25
-
26
- let phrases = [ ] ;
27
-
28
- while ( keepGoing ) {
29
- let contentPage = await fetchPage ( searchTerm , current ) ;
30
- let result = await extract ( contentPage ) ;
31
-
32
- phrases . push ( ...result . phrases ) ;
33
-
34
- if ( options . max !== undefined && phrases . length > options . max ) {
35
- phrases = phrases . slice ( 0 , options . max ) ;
36
-
37
- keepGoing = false ;
38
- }
39
-
40
- if ( result . next === false ) {
41
- keepGoing = false ;
42
- }
43
-
44
- current = current + 1 ;
45
- }
46
-
47
- return { total : phrases . length , searchTerm, phrases } ;
48
-
49
- async function fetchPage ( searchTerm , current = 1 ) {
50
- return new Promise ( ( resolve , reject ) => {
51
- fetch ( `${ baseUrl } /${ searchTerm } /${ current } ` )
52
- . then ( ( res ) => res . arrayBuffer ( ) )
53
- . then ( ( arrayBuffer ) =>
54
- iconv . decode ( Buffer . from ( arrayBuffer ) , 'utf-8' ) . toString ( ) ,
55
- )
56
- . then ( ( body ) => resolve ( body ) )
57
- . catch ( ( err ) => reject ( err ) ) ;
58
- } ) ;
59
- }
60
-
61
- async function extract ( htmlContent ) {
62
- return new Promise ( ( resolve , reject ) => {
63
- try {
64
- const phrases = [ ] ;
65
- const $ = cheerio . load ( htmlContent ) ;
66
- $ ( '.thought-card' ) . each ( function ( i , e ) {
67
- phrases . push ( {
68
- author : $ ( this ) . find ( 'a' ) . first ( ) . text ( ) ,
69
- text : $ ( this )
70
- . find ( 'p' )
71
- . first ( )
72
- . text ( )
73
- . replace ( / \n / g, '' ) ,
74
- } ) ;
75
- } ) ;
76
-
77
- let next = false ;
78
- $ ( '#paginacao' ) . each ( function ( i , e ) {
79
- if ( $ ( this ) . find ( '.nav' ) . last ( ) . text ( ) . includes ( 'xima' ) ) {
80
- next = true ;
81
- }
82
- } ) ;
83
-
84
- resolve ( { phrases, next } ) ;
85
- } catch ( err ) {
86
- reject ( err ) ;
87
- }
88
- } ) ;
89
- }
1
+ const fetch = require ( "node-fetch" ) ,
2
+ slugify = require ( "slugify" ) ,
3
+ cheerio = require ( "cheerio" ) ,
4
+ iconv = require ( "iconv-lite" ) ;
5
+
6
+ const baseUrl = 'https://www.pensador.com/' ;
7
+
8
+ module . exports = async ( options ) => {
9
+ if ( typeof options == 'undefined' ) {
10
+ var options = { max : 10 } ;
11
+ }
12
+ // Default value for search term, Term is not mandatory anymore
13
+ var searchTerm = 'frases_curtas' ;
14
+ // if term is set then is changed to a new value
15
+ if ( options . term != 'undefined' ) {
16
+ searchTerm = slugify ( `frases de ${ options . term } ` , {
17
+ replacement : '_' ,
18
+ remove : / [ * + ~ . ( ) ' " ! : @ ] / g,
19
+ lower : true ,
20
+ } ) ;
21
+ }
22
+
23
+ let keepGoing = true ;
24
+ let current = 1 ;
25
+
26
+ let phrases = [ ] ;
27
+
28
+ while ( keepGoing ) {
29
+ let contentPage = await fetchPage ( searchTerm , current ) ;
30
+ let result = await extract ( contentPage ) ;
31
+
32
+ phrases . push ( ...result . phrases ) ;
33
+
34
+ if ( options . max !== undefined && phrases . length > options . max ) {
35
+ phrases = phrases . slice ( 0 , options . max ) ;
36
+
37
+ keepGoing = false ;
38
+ }
39
+
40
+ if ( result . next === false ) {
41
+ keepGoing = false ;
42
+ }
43
+
44
+ current = current + 1 ;
45
+ }
46
+
47
+ return { total : phrases . length , searchTerm, phrases } ;
48
+
49
+ async function fetchPage ( searchTerm , current = 1 ) {
50
+ return new Promise ( ( resolve , reject ) => {
51
+ fetch ( `${ baseUrl } /${ searchTerm } /${ current } ` )
52
+ . then ( res => res . arrayBuffer ( ) )
53
+ . then ( arrayBuffer =>
54
+ iconv . decode ( Buffer . from ( arrayBuffer ) , "utf-8" ) . toString ( )
55
+ )
56
+ . then ( body => resolve ( body ) )
57
+ . catch ( err => reject ( err ) ) ;
58
+ } ) ;
59
+ }
60
+
61
+ async function extract ( htmlContent ) {
62
+ return new Promise ( ( resolve , reject ) => {
63
+ try {
64
+ const phrases = [ ] ;
65
+ const $ = cheerio . load ( htmlContent ) ;
66
+ $ ( ".thought-card" ) . each ( function ( i , e ) {
67
+ phrases . push ( {
68
+ author : $ ( this )
69
+ . find ( "a" )
70
+ . first ( )
71
+ . text ( ) ,
72
+ text : $ ( this )
73
+ . find ( "p" )
74
+ . first ( )
75
+ . text ( )
76
+ . replace ( / \n / g, "" )
77
+ } ) ;
78
+ } ) ;
79
+
80
+ let next = false ;
81
+ $ ( "#paginacao" ) . each ( function ( i , e ) {
82
+ if (
83
+ $ ( this )
84
+ . find ( ".nav" )
85
+ . last ( )
86
+ . text ( )
87
+ . includes ( "xima" )
88
+ ) {
89
+ next = true ;
90
+ }
91
+ } ) ;
92
+
93
+ resolve ( { phrases, next } ) ;
94
+ } catch ( err ) {
95
+ reject ( err ) ;
96
+ }
97
+ } ) ;
98
+ }
90
99
} ;
91
100
92
101
function _throw ( m ) {
93
- throw m ;
94
- }
102
+ throw m ;
103
+ }
0 commit comments