Skip to content

Commit 88bbe3e

Browse files
authored
Merge pull request #15 from DIYgod/master
[pull] master from diygod:master
2 parents 4dba764 + bca47a1 commit 88bbe3e

File tree

3 files changed

+202
-3
lines changed

3 files changed

+202
-3
lines changed

lib/routes/huggingface/blog.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import { Route, type DataItem } from '@/types';
2+
import got from '@/utils/got';
3+
import { load } from 'cheerio';
4+
import { parseDate } from '@/utils/parse-date';
5+
6+
export const route: Route = {
7+
path: '/blog',
8+
categories: ['programming'],
9+
example: '/huggingface/blog',
10+
parameters: {},
11+
features: {
12+
requireConfig: false,
13+
requirePuppeteer: false,
14+
antiCrawler: false,
15+
supportBT: false,
16+
supportPodcast: false,
17+
supportScihub: false,
18+
},
19+
radar: [
20+
{
21+
source: ['huggingface.co/blog', 'huggingface.co/'],
22+
},
23+
],
24+
name: '英文博客',
25+
maintainers: ['cesaryuan', 'zcf0508'],
26+
handler,
27+
url: 'huggingface.co/blog',
28+
};
29+
30+
interface Author {
31+
user: string;
32+
guest: boolean;
33+
org?: string;
34+
}
35+
36+
interface Blog {
37+
authors: Author[];
38+
canonical: boolean;
39+
isUpvotedByUser: boolean;
40+
publishedAt: string;
41+
slug: string;
42+
title: string;
43+
upvotes: number;
44+
thumbnail: string;
45+
guest: boolean;
46+
}
47+
48+
interface BlogData {
49+
blog: Blog;
50+
blogUrl: string;
51+
lang: string;
52+
loggedInUser: string;
53+
}
54+
55+
async function handler() {
56+
const { body: response } = await got('https://huggingface.co/blog');
57+
const $ = load(response);
58+
59+
/** @type {Array<{blog: {local: string, title: string, author: string, thumbnail: string, date: string, tags: Array<string>}, blogUrl: string, lang: 'zh', link: string}>} */
60+
const papers = $('div[data-target="BlogThumbnail"]')
61+
.toArray()
62+
.map((item) => {
63+
const props = $(item).data('props') as BlogData;
64+
const link = $(item).find('a').attr('href');
65+
return {
66+
...props,
67+
link,
68+
};
69+
});
70+
71+
const items: DataItem[] = papers.map((item) => ({
72+
title: item.blog.title,
73+
link: `https://huggingface.co${item.link}`,
74+
pubDate: parseDate(item.blog.publishedAt),
75+
author: item.blog.authors.map((author) => ({
76+
name: author.user,
77+
})),
78+
upvotes: item.blog.upvotes,
79+
image: new URL(item.blog.thumbnail, 'https://huggingface.co').toString(),
80+
}));
81+
82+
return {
83+
title: 'Huggingface 英文博客',
84+
link: 'https://huggingface.co/blog',
85+
item: items,
86+
};
87+
}

lib/routes/papers/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ export const handler = async (ctx) => {
1717

1818
const rootUrl = 'https://papers.cool';
1919
const currentUrl = new URL(category, rootUrl).href;
20-
const feedUrl = new URL(`${category}/feed`, rootUrl).href;
20+
const feedUrl = new URL(`arxiv/${category}/feed`, rootUrl).href;
2121

2222
const site = category.split(/\//)[0];
2323
const apiKimiUrl = new URL(`${site}/kimi?paper=`, rootUrl).href;
@@ -76,15 +76,15 @@ export const handler = async (ctx) => {
7676
};
7777

7878
export const route: Route = {
79-
path: '/:category{.+}?',
79+
path: '/arxiv/:category{.+}?',
8080
name: 'Topic',
8181
url: 'papers.cool',
8282
maintainers: ['nczitzk', 'Muyun99'],
8383
handler,
8484
example: '/papers/arxiv/cs.AI',
8585
parameters: { category: 'Category, arXiv Artificial Intelligence (cs.AI) by default' },
8686
description: `:::tip
87-
If you subscribe to [arXiv Artificial Intelligence (cs.AI)](https://papers.cool/arxiv/cs.AI)where the URL is \`https://papers.cool/arxiv/cs.AI\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/arxiv/cs.AI\`](https://rsshub.app/papers/arxiv/cs.AI).
87+
If you subscribe to [arXiv Artificial Intelligence (cs.AI)](https://papers.cool/arxiv/cs.AI), where the URL is \`https://papers.cool/arxiv/cs.AI\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/arxiv/cs.AI\`](https://rsshub.app/papers/arxiv/cs.AI).
8888
:::
8989
9090
| Category | id |

lib/routes/papers/query.ts

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import { Route } from '@/types';
2+
import { getCurrentPath } from '@/utils/helpers';
3+
const __dirname = getCurrentPath(import.meta.url);
4+
5+
import { parseDate } from '@/utils/parse-date';
6+
import { art } from '@/utils/render';
7+
import path from 'node:path';
8+
import parser from '@/utils/rss-parser';
9+
10+
const pdfUrlGenerators = {
11+
arxiv: (id: string) => `https://arxiv.org/pdf/${id}.pdf`,
12+
};
13+
14+
export const handler = async (ctx) => {
15+
const { keyword = 'query/Detection' } = ctx.req.param();
16+
const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 150;
17+
18+
const rootUrl = 'https://papers.cool';
19+
const currentUrl = new URL(`arxiv/search?highlight=1&query=${keyword}&sort=0`, rootUrl).href;
20+
const feedUrl = new URL(`arxiv/search/feed?query=${keyword}`, rootUrl).href;
21+
22+
const site = keyword.split(/\//)[0];
23+
const apiKimiUrl = new URL(`${site}/kimi?paper=`, rootUrl).href;
24+
const feed = await parser.parseURL(feedUrl);
25+
26+
const language = 'en';
27+
28+
const items = feed.items.slice(0, limit).map((item) => {
29+
const title = item.title;
30+
const guid = item.guid;
31+
32+
const id = item.link?.split(/\//).pop() ?? '';
33+
const kimiUrl = new URL(id, apiKimiUrl).href;
34+
const pdfUrl = Object.hasOwn(pdfUrlGenerators, site) ? pdfUrlGenerators[site](id) : undefined;
35+
36+
const authorString = item.author;
37+
const description = art(path.join(__dirname, 'templates/description.art'), {
38+
pdfUrl,
39+
siteUrl: item.link,
40+
kimiUrl,
41+
authorString,
42+
summary: item.summary,
43+
});
44+
45+
return {
46+
title,
47+
description,
48+
pubDate: parseDate(item.pubDate ?? ''),
49+
link: item.link,
50+
category: item.categories,
51+
author: authorString,
52+
doi: `${site}${id}`,
53+
guid,
54+
id: guid,
55+
content: {
56+
html: description,
57+
text: item.content,
58+
},
59+
language,
60+
enclosure_url: pdfUrl,
61+
enclosure_type: 'application/pdf',
62+
enclosure_title: title,
63+
};
64+
});
65+
66+
return {
67+
title: feed.title,
68+
description: feed.description,
69+
link: currentUrl,
70+
item: items,
71+
allowEmpty: true,
72+
image: feed.image?.url,
73+
language: feed.language,
74+
};
75+
};
76+
77+
export const route: Route = {
78+
path: '/query/:keyword{.+}?',
79+
name: 'Topic',
80+
url: 'papers.cool',
81+
maintainers: ['Muyun99'],
82+
handler,
83+
example: '/papers/query/Detection',
84+
parameters: { keyword: 'Keyword to search for papers, e.g., Detection, Segmentation, etc.' },
85+
description: `:::tip
86+
If you subscibe to [arXiv Paper queryed by Detection](https://papers.cool/arxiv/search?highlight=1&query=Detection), where the URL is \`https://papers.cool/arxiv/search?highlight=1&query=Detection\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/query/Detection\`](https://rsshub.app/papers/query/Detection).
87+
:::
88+
89+
| Category | id |
90+
| ----------------------------------------------------- | ------------------- |
91+
| arXiv Paper queryed by Detection | query/Detection |
92+
| arXiv Paper queryed by Segmentation | query/Segmentation |
93+
`,
94+
categories: ['journal'],
95+
96+
features: {
97+
requireConfig: false,
98+
requirePuppeteer: false,
99+
antiCrawler: false,
100+
supportRadar: true,
101+
supportBT: false,
102+
supportPodcast: false,
103+
supportScihub: true,
104+
},
105+
radar: [
106+
{
107+
title: 'arXiv Paper queryed by Keyword',
108+
source: ['papers.cool/arxiv/search?highlight=1&query=*&sort=0'],
109+
target: '/papers/query/:keyword',
110+
},
111+
],
112+
};

0 commit comments

Comments
 (0)