@@ -7,6 +7,10 @@ import path from 'node:path';
77import { config } from '@/config' ;
88import { getCurrentPath } from '@/utils/helpers' ;
99
10+ const __dirname = getCurrentPath ( import . meta. url ) ;
11+ const appUrl = 'https://app.theinitium.com/' ;
12+ const userAgent = 'PugpigBolt v4.1.8 (iPhone, iOS 18.2.1) on phone (model iPhone15,2)' ;
13+
1014export const route : Route = {
1115 path : '/app/:category?' ,
1216 categories : [ 'new-media' , 'popular' ] ,
@@ -52,51 +56,96 @@ Category 栏目:
5256| 播客 | article_audio_sc | article_audio_tc |` ,
5357} ;
5458
55- const resolveRelativeLink = ( $ : CheerioAPI , elem : Element , attr : string , baseUrl ?: string ) => {
59+ const resolveRelativeLink = ( $ : CheerioAPI , elem : Element , attr : string , appUrl ?: string ) => {
5660 // code from @/middleware/paratmeter.ts
5761 const $elem = $ ( elem ) ;
5862
59- if ( baseUrl ) {
63+ if ( appUrl ) {
6064 try {
6165 const oldAttr = $elem . attr ( attr ) ;
6266 if ( oldAttr ) {
6367 // e.g. <video><source src="https://example.com"></video> should leave <video> unchanged
64- $elem . attr ( attr , new URL ( oldAttr , baseUrl ) . href ) ;
68+ $elem . attr ( attr , new URL ( oldAttr , appUrl ) . href ) ;
6569 }
6670 } catch {
6771 // no-empty
6872 }
6973 }
7074} ;
7175
72- async function handler ( ctx ) {
73- const category = ctx . req . param ( 'category' ) ?? 'latest_sc' ;
74- const __dirname = getCurrentPath ( import . meta. url ) ;
75- const baseUrl = 'https://app.theinitium.com/' ;
76- const userAgent = 'PugpigBolt v4.1.8 (iPhone, iOS 18.2.1) on phone (model iPhone15,2)' ;
77-
78- async function getUA ( url : string ) {
79- return await got ( {
80- method : 'get' ,
81- url,
82- headers : {
83- 'User-Agent' : userAgent ,
84- } ,
85- } ) ;
86- }
76+ async function getUA ( url : string ) {
77+ return await got ( {
78+ method : 'get' ,
79+ url,
80+ headers : {
81+ 'User-Agent' : userAgent ,
82+ } ,
83+ } ) ;
84+ }
8785
88- const feeds = await cache . tryGet ( new URL ( 'timelines.json' , baseUrl ) . href , async ( ) => await getUA ( new URL ( 'timelines.json' , baseUrl ) . href ) , config . cache . routeExpire , false ) ;
86+ async function fetchAppPage ( url : URL ) {
87+ const response = await getUA ( url . href ) ;
88+ const $ = load ( response . data ) ;
89+ // resolve relative links with app.theinitium.com
90+ // code from @/middleware/paratmeter.ts
91+ $ ( 'a, area' ) . each ( ( _ , elem ) => {
92+ resolveRelativeLink ( $ , elem , 'href' , appUrl ) ;
93+ // $(elem).attr('rel', 'noreferrer'); // currently no such a need
94+ } ) ;
95+ // https://www.w3schools.com/tags/att_src.asp
96+ $ ( 'img, video, audio, source, iframe, embed, track' ) . each ( ( _ , elem ) => {
97+ resolveRelativeLink ( $ , elem , 'src' , appUrl ) ;
98+ $ ( elem ) . removeAttr ( 'srcset' ) ;
99+ } ) ;
100+ $ ( 'video[poster]' ) . each ( ( _ , elem ) => {
101+ resolveRelativeLink ( $ , elem , 'poster' , appUrl ) ;
102+ } ) ;
103+ const article = $ ( '.pp-article__body' ) ;
104+ article . find ( '.block-related-articles' ) . remove ( ) ;
105+ article . find ( 'figure.wp-block-pullquote' ) . children ( ) . unwrap ( ) ;
106+ article . find ( 'div.block-explanation-note' ) . wrapInner ( '<blockquote></blockquote>' ) ;
107+ article . find ( 'div.wp-block-tcc-author-note' ) . wrapInner ( '<em></em>' ) . after ( '<hr>' ) ;
108+ article . find ( 'p.has-small-font-size' ) . wrapInner ( '<small></small>' ) ;
109+ return art ( path . join ( __dirname , 'templates/description.art' ) , {
110+ standfirst : $ ( '.pp-header-group__standfirst' ) . html ( ) ,
111+ coverImage : $ ( '.pp-media__image' ) . attr ( 'src' ) ,
112+ coverCaption : $ ( '.pp-media__caption' ) . html ( ) ,
113+ article : article . html ( ) ,
114+ copyright : $ ( '.copyright' ) . html ( ) ,
115+ } ) ;
116+ }
89117
90- const metadata = feeds . data . timelines . find ( ( timeline ) => timeline . id === category ) ;
118+ async function fetchWebPage ( url : URL ) {
119+ const response = await got ( url . href ) ;
120+ const $ = load ( response . data ) ;
121+ const article = $ ( '.wkwp-post-content' ) ;
122+ article . find ( '.block-related-articles' ) . remove ( ) ;
123+ article . find ( 'figure.wp-block-pullquote' ) . children ( ) . unwrap ( ) ;
124+ article . find ( 'div.block-explanation-note' ) . wrapInner ( '<blockquote></blockquote>' ) ;
125+ article . find ( 'div.wp-block-tcc-author-note' ) . wrapInner ( '<em></em>' ) . after ( '<hr>' ) ;
126+ article . find ( 'p.has-small-font-size' ) . wrapInner ( '<small></small>' ) ;
127+ return art ( path . join ( __dirname , 'templates/description.art' ) , {
128+ standfirst : $ ( 'span.caption1' ) . html ( ) ,
129+ coverImage : $ ( '.wp-post-image' ) . attr ( 'src' ) ,
130+ coverCaption : $ ( '.image-caption' ) . html ( ) ,
131+ article : article . html ( ) ,
132+ copyright : $ ( '.entry-copyright' ) . html ( ) ,
133+ } ) ;
134+ }
91135
92- const response = await getUA ( new URL ( metadata . feed , baseUrl ) . href ) ;
136+ async function handler ( ctx ) {
137+ const category = ctx . req . param ( 'category' ) ?? 'latest_sc' ;
93138
139+ const feeds = await cache . tryGet ( new URL ( 'timelines.json' , appUrl ) . href , async ( ) => await getUA ( new URL ( 'timelines.json' , appUrl ) . href ) , config . cache . routeExpire , false ) ;
140+ const metadata = feeds . data . timelines . find ( ( timeline ) => timeline . id === category ) ;
141+ const response = await getUA ( new URL ( metadata . feed , appUrl ) . href ) ;
94142 const feed = response . data . stories . filter ( ( item ) => item . type === 'article' ) ;
95143
96144 const items = await Promise . all (
97145 feed . map ( ( item ) =>
98- cache . tryGet ( new URL ( item . url , baseUrl ) . href , async ( ) => {
99- item . link = item . shareurl ?? new URL ( item . url , baseUrl ) . href ;
146+ cache . tryGet ( item . shareurl , async ( ) => {
147+ const url = new URL ( item . shareurl ) ;
148+ item . link = url . href ;
100149 item . description = item . summary ;
101150 item . pubDate = item . published ;
102151 item . category = [ ] ;
@@ -112,35 +161,16 @@ async function handler(ctx) {
112161 }
113162 }
114163 item . category = [ ...new Set ( item . category ) ] ;
115- const response = await getUA ( new URL ( item . url , baseUrl ) . href ) ;
116- const $ = load ( response . data ) ;
117- // resolve relative links with app.theinitium.com
118- // code from @/middleware/paratmeter.ts
119- $ ( 'a, area' ) . each ( ( _ , elem ) => {
120- resolveRelativeLink ( $ , elem , 'href' , baseUrl ) ;
121- // $(elem).attr('rel', 'noreferrer'); // currently no such a need
122- } ) ;
123- // https://www.w3schools.com/tags/att_src.asp
124- $ ( 'img, video, audio, source, iframe, embed, track' ) . each ( ( _ , elem ) => {
125- resolveRelativeLink ( $ , elem , 'src' , baseUrl ) ;
126- $ ( elem ) . removeAttr ( 'srcset' ) ;
127- } ) ;
128- $ ( 'video[poster]' ) . each ( ( _ , elem ) => {
129- resolveRelativeLink ( $ , elem , 'poster' , baseUrl ) ;
130- } ) ;
131- const article = $ ( '.pp-article__body' ) ;
132- article . find ( '.block-related-articles' ) . remove ( ) ;
133- article . find ( 'figure.wp-block-pullquote' ) . children ( ) . unwrap ( ) ;
134- article . find ( 'div.block-explanation-note' ) . wrapInner ( '<blockquote></blockquote>' ) ;
135- article . find ( 'div.wp-block-tcc-author-note' ) . wrapInner ( '<em></em>' ) . after ( '<hr>' ) ;
136- article . find ( 'p.has-small-font-size' ) . wrapInner ( '<small></small>' ) ;
137- item . description = art ( path . join ( __dirname , 'templates/description.art' ) , {
138- standfirst : $ ( '.pp-header-group__standfirst' ) . html ( ) ,
139- coverImage : $ ( '.pp-media__image' ) . attr ( 'src' ) ,
140- coverCaption : $ ( '.pp-media__caption' ) . html ( ) ,
141- article : article . html ( ) ,
142- copyright : $ ( '.copyright' ) . html ( ) ,
143- } ) ;
164+ switch ( url . hostname ) {
165+ case 'app.theinitium.com' :
166+ item . description = await fetchAppPage ( url ) ;
167+ break ;
168+ case 'theinitium.com' :
169+ item . description = await fetchWebPage ( url ) ;
170+ break ;
171+ default :
172+ break ;
173+ }
144174 return item ;
145175 } )
146176 )
0 commit comments