Skip to content

Commit

Permalink
Add RSS import/export (#750)
Browse files Browse the repository at this point in the history
* Add RSS export/import

* Update README, man and help

* Add tests for RSS import/export
  • Loading branch information
vagos authored Jul 8, 2024
1 parent 704018a commit 6394fb2
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 6 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ To get started right away, jump to the [Quickstart](#quickstart) section. `buku`
- Powerful search options (regex, substring...)
- Continuous search with on the fly mode switch
- Portable, merge-able database to sync between systems
- Import/export bookmarks from/to HTML, XBEL, Markdown or Orgfile
- Import/export bookmarks from/to HTML, XBEL, Markdown, RSS or Orgfile
- Smart tag management using redirection (>>, >, <<)
- Multi-threaded full DB refresh
- Manual encryption support
Expand Down Expand Up @@ -221,10 +221,11 @@ POWER TOYS:
format: [title](url) <!-- TAGS -->
export Orgfile, if file ends with '.org'
format: *[[url][title]] :tags:
export rss feed if file ends with '.rss'
export buku DB, if file ends with '.db'
combines with search results, if opted
-i, --import file import bookmarks from file
supports .html .xbel .json .md .org .db
supports .html .xbel .json .md .org .rss .db
-p, --print [...] show record details by indices, ranges
print all bookmarks, if no arguments
-n shows the last n results (like tail)
Expand Down
63 changes: 61 additions & 2 deletions buku
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ from subprocess import DEVNULL, PIPE, Popen
from typing import Any, Dict, List, Optional, Tuple, NamedTuple
from collections.abc import Sequence, Set, Callable
from warnings import warn
import xml.etree.ElementTree as ET

import urllib3
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -2529,6 +2530,10 @@ class BukuDb:
res = convert_bookmark_set(resultset, 'xbel', old)
count += res['count']
outfp.write(res['data'])
elif filepath.endswith('.rss'):
res = convert_bookmark_set(resultset, 'rss', old)
count += res['count']
outfp.write(res['data'])
else:
res = convert_bookmark_set(resultset, 'html', old)
count += res['count']
Expand Down Expand Up @@ -2881,6 +2886,8 @@ class BukuDb:
items = import_md(filepath=filepath, newtag=newtag)
elif filepath.endswith('org'):
items = import_org(filepath=filepath, newtag=newtag)
elif filepath.endswith('rss'):
items = import_rss(filepath=filepath, newtag=newtag)
elif filepath.endswith('json'):
if not tacit:
resp = input('Add parent folder names as tags? (y/n): ')
Expand Down Expand Up @@ -3330,7 +3337,7 @@ def convert_bookmark_set(
converted data and count of converted bookmark set
"""
import html
assert export_type in ['markdown', 'html', 'org', 'xbel']
assert export_type in ['markdown', 'html', 'org', 'xbel', 'rss']
# compatibility
resultset = bookmark_vars(bookmark_set)
old = old or {}
Expand Down Expand Up @@ -3376,6 +3383,29 @@ def convert_bookmark_set(
count += 1

out += '</xbel>'
elif export_type == 'rss':
out = (
'<feed xmlns="http://www.w3.org/2005/Atom">\n'
' <title>Bookmarks</title>\n'
' <generator uri="https://github.com/jarun/buku">buku</generator>\n'
)

for row in resultset:
out += ' <entry>\n'
out += ' <title>' + title(row) + '</title>\n'
_url = html.escape(row.url).encode('ascii', 'xmlcharrefreplace').decode('utf-8')
out += ' <link href="%s" rel="alternate" type="text/html"/>\n' % _url
out += ' <id>%s</id>\n' % row.id
for tag in (t for t in row.tags.split(',') if t):
_tag = html.escape(tag).encode('ascii', 'xmlcharrefreplace').decode('utf-8')
out += ' <category term="%s"/>\n' % _tag
if row.desc:
_desc = html.escape(row.desc).encode('ascii', 'xmlcharrefreplace').decode('utf-8')
out += ' <content type="html"> <![CDATA[ <p>%s</p> ]]> </content>\n' % _desc
out += ' </entry>\n'
count += 1

out += '</feed>'
elif export_type == 'html':
timestamp = str(int(time.time()))
out = (
Expand Down Expand Up @@ -3519,6 +3549,34 @@ def import_md(filepath: str, newtag: Optional[str]):

yield (url, title, delim_wrap(tags), None, 0, True, False)

def import_rss(filepath: str, newtag: Optional[str]):
"""Parse bookmark RSS file.
Parameters
----------
filepath : str
Path to RSS file.
newtag : str, optional
New tag for bookmarks in RSS file.
Returns
tuple
Parsed result.
"""

with open(filepath, mode='r', encoding='utf-8') as infp:
ns = {'atom': 'http://www.w3.org/2005/Atom'}
root = ET.fromstring(infp.read())
for entry in root.findall('atom:entry', ns):
title = entry.find('atom:title', ns).text
url = entry.find('atom:link', ns).attrib['href']
tags = ','.join([tag.attrib['term'] for tag in entry.findall('atom:category', ns)])
if newtag is not None:
tags = newtag + ',' + tags
desc = entry.find('atom:content', ns)
desc = desc.text if desc is not None else None
yield (url, title, delim_wrap(tags), desc, 0, True, False)

def import_org(filepath: str, newtag: Optional[str]):
"""Parse bookmark org file.
Expand Down Expand Up @@ -5750,10 +5808,11 @@ POSITIONAL ARGUMENTS:
format: [title](url) <!-- TAGS -->
export Orgfile, if file ends with '.org'
format: *[[url][title]] :tags:
export rss feed if file ends with '.rss'
export buku DB, if file ends with '.db'
combines with search results, if opted
-i, --import file import bookmarks from file
supports .html .xbel .json .md .org .db
supports .html .xbel .json .md .org .rss .db
-p, --print [...] show record details by indices, ranges
print all bookmarks, if no arguments
-n shows the last n results (like tail)
Expand Down
8 changes: 6 additions & 2 deletions buku.1
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ is a command-line utility to store, tag, search and organize bookmarks.
* Powerful search options (regex, substring...)
* Continuous search with on the fly mode switch
* Portable, merge-able database to sync between systems
* Import/export bookmarks from/to HTML, XBEL, Markdown or Orgfile
* Import/export bookmarks from/to HTML, XBEL, Markdown, RSS or Orgfile
* Smart tag management using redirection (>>, >, <<)
* Multithreaded full DB refresh
* Manual encryption support
Expand Down Expand Up @@ -211,14 +211,18 @@ Orgfile is used if
.I file
has extension '.org' Orgfile format: * [[url][title]], 1 entry per line.
.br
RSS is used if
.I file
has extension '.rss' RSS format: <entry> per bookmark with <title>, <link>, <category>, <content> elements
.br
A buku database is generated if
.I file
has extension '.db'.
.TP
.BI \-i " " \--import " file"
Import bookmarks from Firefox bookmarks formatted HTML.
.I file
is considered Firefox-exported JSON if it has '.json' extension, XBEL if it is '.xbel', Markdown (compliant with --export format) if it is '.md', Orgfile if the extension is '.org' or another buku database if the extension is '.db'.
is considered Firefox-exported JSON if it has '.json' extension, XBEL if it is '.xbel', Markdown (compliant with --export format) if it is '.md', Orgfile if the extension is '.org', RSS if the extension is '.rss' or another buku database if the extension is '.db'.
.TP
.BI \-p " " \--print " [...]"
Show details (DB index, URL, title, tags and comment) of bookmark record by DB index. If no arguments, all records with actual index from DB are shown. Accepts hyphenated ranges and space-separated indices. A negative value (introduced for convenience) behaves like the tail utility, e.g., -n shows the details of the last n bookmarks.
Expand Down
44 changes: 44 additions & 0 deletions tests/test_buku.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,28 @@ def test_import_md(tmpdir, newtag, exp_res):
res = list(import_md(p.strpath, newtag))
assert res[0] == exp_res

@pytest.mark.parametrize(
"newtag, exp_res",
[
(None, ("http://example.com", "text1", ",", None, 0, True, False)),
("tag1", ("http://example.com", "text1", ",tag1,", None, 0, True, False)),
],
)
def test_import_rss(tmpdir, newtag, exp_res):
from buku import import_rss

p = tmpdir.mkdir("importrss").join("test.rss")
p.write(
'<feed xmlns="http://www.w3.org/2005/Atom">\n'
' <title>Bookmarks</title>\n'
' <generator uri="https://github.com/jarun/buku">buku</generator>\n'
' <entry>\n'
' <title>text1</title>\n'
' <link href="http://example.com"/>\n'
' </entry>\n'
'</feed>\n')
res = list(import_rss(p.strpath, newtag))
assert res[0] == exp_res

@pytest.mark.parametrize(
"newtag, exp_res",
Expand Down Expand Up @@ -862,6 +884,28 @@ def test_copy_to_clipboard(platform, params):
"markdown",
"- [Untitled](http://example.com)\n- [Untitled](http://example.org)\n- [Google](http://google.com)\n",
],
[
"rss",
'<feed xmlns="http://www.w3.org/2005/Atom">\n'
' <title>Bookmarks</title>\n'
' <generator uri="https://github.com/jarun/buku">buku</generator>\n'
' <entry>\n'
' <title></title>\n'
' <link href="http://example.com" rel="alternate" type="text/html"/>\n'
' <id>1</id>\n'
' </entry>\n'
' <entry>\n'
' <title></title>\n'
' <link href="http://example.org" rel="alternate" type="text/html"/>\n'
' <id>1</id>\n'
' </entry>\n'
' <entry>\n'
' <title>Google</title>\n'
' <link href="http://google.com" rel="alternate" type="text/html"/>\n'
' <id>2</id>\n'
' </entry>\n'
'</feed>',
],
["random", None],
[
"xbel",
Expand Down

0 comments on commit 6394fb2

Please sign in to comment.