Merge pull request #15 from DIYgod/master

pull[bot] · web-flow · commit 88bbe3e9e942 · 2024-12-26T03:21:26.000Z
[pull] master from diygod:master
diff --git a/lib/routes/huggingface/blog.ts b/lib/routes/huggingface/blog.ts
@@ -0,0 +1,87 @@
+import { Route, type DataItem } from '@/types';
+import got from '@/utils/got';
+import { load } from 'cheerio';
+import { parseDate } from '@/utils/parse-date';
+
+export const route: Route = {
+    path: '/blog',
+    categories: ['programming'],
+    example: '/huggingface/blog',
+    parameters: {},
+    features: {
+        requireConfig: false,
+        requirePuppeteer: false,
+        antiCrawler: false,
+        supportBT: false,
+        supportPodcast: false,
+        supportScihub: false,
+    },
+    radar: [
+        {
+            source: ['huggingface.co/blog', 'huggingface.co/'],
+        },
+    ],
+    name: '英文博客',
+    maintainers: ['cesaryuan', 'zcf0508'],
+    handler,
+    url: 'huggingface.co/blog',
+};
+
+interface Author {
+    user: string;
+    guest: boolean;
+    org?: string;
+}
+
+interface Blog {
+    authors: Author[];
+    canonical: boolean;
+    isUpvotedByUser: boolean;
+    publishedAt: string;
+    slug: string;
+    title: string;
+    upvotes: number;
+    thumbnail: string;
+    guest: boolean;
+}
+
+interface BlogData {
+    blog: Blog;
+    blogUrl: string;
+    lang: string;
+    loggedInUser: string;
+}
+
+async function handler() {
+    const { body: response } = await got('https://huggingface.co/blog');
+    const $ = load(response);
+
+    /** @type {Array<{blog: {local: string, title: string, author: string, thumbnail: string, date: string, tags: Array<string>}, blogUrl: string, lang: 'zh', link: string}>} */
+    const papers = $('div[data-target="BlogThumbnail"]')
+        .toArray()
+        .map((item) => {
+            const props = $(item).data('props') as BlogData;
+            const link = $(item).find('a').attr('href');
+            return {
+                ...props,
+                link,
+            };
+        });
+
+    const items: DataItem[] = papers.map((item) => ({
+        title: item.blog.title,
+        link: `https://huggingface.co${item.link}`,
+        pubDate: parseDate(item.blog.publishedAt),
+        author: item.blog.authors.map((author) => ({
+            name: author.user,
+        })),
+        upvotes: item.blog.upvotes,
+        image: new URL(item.blog.thumbnail, 'https://huggingface.co').toString(),
+    }));
+
+    return {
+        title: 'Huggingface 英文博客',
+        link: 'https://huggingface.co/blog',
+        item: items,
+    };
+}
diff --git a/lib/routes/papers/index.ts b/lib/routes/papers/index.ts
@@ -17,7 +17,7 @@ export const handler = async (ctx) => {
 
     const rootUrl = 'https://papers.cool';
     const currentUrl = new URL(category, rootUrl).href;
-    const feedUrl = new URL(`${category}/feed`, rootUrl).href;
+    const feedUrl = new URL(`arxiv/${category}/feed`, rootUrl).href;
 
     const site = category.split(/\//)[0];
     const apiKimiUrl = new URL(`${site}/kimi?paper=`, rootUrl).href;
@@ -76,15 +76,15 @@ export const handler = async (ctx) => {
 };
 
 export const route: Route = {
-    path: '/:category{.+}?',
+    path: '/arxiv/:category{.+}?',
     name: 'Topic',
     url: 'papers.cool',
     maintainers: ['nczitzk', 'Muyun99'],
     handler,
     example: '/papers/arxiv/cs.AI',
     parameters: { category: 'Category, arXiv Artificial Intelligence (cs.AI) by default' },
     description: `:::tip
-  If you subscribe to [arXiv Artificial Intelligence (cs.AI)](https://papers.cool/arxiv/cs.AI)，where the URL is \`https://papers.cool/arxiv/cs.AI\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/arxiv/cs.AI\`](https://rsshub.app/papers/arxiv/cs.AI).
+  If you subscribe to [arXiv Artificial Intelligence (cs.AI)](https://papers.cool/arxiv/cs.AI), where the URL is \`https://papers.cool/arxiv/cs.AI\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/arxiv/cs.AI\`](https://rsshub.app/papers/arxiv/cs.AI).
   :::
 
   | Category                                              | id          |
diff --git a/lib/routes/papers/query.ts b/lib/routes/papers/query.ts
@@ -0,0 +1,112 @@
+import { Route } from '@/types';
+import { getCurrentPath } from '@/utils/helpers';
+const __dirname = getCurrentPath(import.meta.url);
+
+import { parseDate } from '@/utils/parse-date';
+import { art } from '@/utils/render';
+import path from 'node:path';
+import parser from '@/utils/rss-parser';
+
+const pdfUrlGenerators = {
+    arxiv: (id: string) => `https://arxiv.org/pdf/${id}.pdf`,
+};
+
+export const handler = async (ctx) => {
+    const { keyword = 'query/Detection' } = ctx.req.param();
+    const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 150;
+
+    const rootUrl = 'https://papers.cool';
+    const currentUrl = new URL(`arxiv/search?highlight=1&query=${keyword}&sort=0`, rootUrl).href;
+    const feedUrl = new URL(`arxiv/search/feed?query=${keyword}`, rootUrl).href;
+
+    const site = keyword.split(/\//)[0];
+    const apiKimiUrl = new URL(`${site}/kimi?paper=`, rootUrl).href;
+    const feed = await parser.parseURL(feedUrl);
+
+    const language = 'en';
+
+    const items = feed.items.slice(0, limit).map((item) => {
+        const title = item.title;
+        const guid = item.guid;
+
+        const id = item.link?.split(/\//).pop() ?? '';
+        const kimiUrl = new URL(id, apiKimiUrl).href;
+        const pdfUrl = Object.hasOwn(pdfUrlGenerators, site) ? pdfUrlGenerators[site](id) : undefined;
+
+        const authorString = item.author;
+        const description = art(path.join(__dirname, 'templates/description.art'), {
+            pdfUrl,
+            siteUrl: item.link,
+            kimiUrl,
+            authorString,
+            summary: item.summary,
+        });
+
+        return {
+            title,
+            description,
+            pubDate: parseDate(item.pubDate ?? ''),
+            link: item.link,
+            category: item.categories,
+            author: authorString,
+            doi: `${site}${id}`,
+            guid,
+            id: guid,
+            content: {
+                html: description,
+                text: item.content,
+            },
+            language,
+            enclosure_url: pdfUrl,
+            enclosure_type: 'application/pdf',
+            enclosure_title: title,
+        };
+    });
+
+    return {
+        title: feed.title,
+        description: feed.description,
+        link: currentUrl,
+        item: items,
+        allowEmpty: true,
+        image: feed.image?.url,
+        language: feed.language,
+    };
+};
+
+export const route: Route = {
+    path: '/query/:keyword{.+}?',
+    name: 'Topic',
+    url: 'papers.cool',
+    maintainers: ['Muyun99'],
+    handler,
+    example: '/papers/query/Detection',
+    parameters: { keyword: 'Keyword to search for papers, e.g., Detection, Segmentation, etc.' },
+    description: `:::tip
+  If you subscibe to [arXiv Paper queryed by Detection](https://papers.cool/arxiv/search?highlight=1&query=Detection), where the URL is \`https://papers.cool/arxiv/search?highlight=1&query=Detection\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/query/Detection\`](https://rsshub.app/papers/query/Detection).
+  :::
+
+  | Category                                              | id                  |
+  | ----------------------------------------------------- | ------------------- |
+  | arXiv Paper queryed by Detection                      | query/Detection     |
+  | arXiv Paper queryed by Segmentation                   | query/Segmentation  |
+  `,
+    categories: ['journal'],
+
+    features: {
+        requireConfig: false,
+        requirePuppeteer: false,
+        antiCrawler: false,
+        supportRadar: true,
+        supportBT: false,
+        supportPodcast: false,
+        supportScihub: true,
+    },
+    radar: [
+        {
+            title: 'arXiv Paper queryed by Keyword',
+            source: ['papers.cool/arxiv/search?highlight=1&query=*&sort=0'],
+            target: '/papers/query/:keyword',
+        },
+    ],
+};