|
1 | 1 | import os |
2 | 2 | import xml.etree.ElementTree as ET |
3 | 3 | from datetime import datetime |
| 4 | +from email.utils import format_datetime, parsedate_to_datetime |
4 | 5 |
|
5 | 6 | # Path to your docs folder |
6 | 7 | docs_folder = 'docs' |
7 | | -rss_file = 'docs/rss.xml' |
| 8 | +rss_file = os.path.join(docs_folder, 'rss.xml') |
8 | 9 |
|
9 | | -# Get a list of all PDF files in docs and its subdirectories |
| 10 | +# Get a list of all PDF files in docs and subdirectories |
10 | 11 | pdf_files = [] |
11 | | -for root, dirs, files in os.walk(docs_folder): |
| 12 | +for root_dir, dirs, files in os.walk(docs_folder): |
12 | 13 | for file in files: |
13 | 14 | if file.endswith(".pdf"): |
14 | | - pdf_files.append(os.path.join(root, file)) |
| 15 | + pdf_files.append(os.path.join(root_dir, file)) |
15 | 16 |
|
16 | 17 | print(f"Found {len(pdf_files)} PDFs in the 'docs' folder.") |
17 | 18 |
|
18 | 19 | # Create or load RSS XML |
19 | 20 | if os.path.exists(rss_file): |
20 | 21 | tree = ET.parse(rss_file) |
21 | 22 | root = tree.getroot() |
| 23 | + channel = root.find("channel") |
22 | 24 | else: |
23 | 25 | root = ET.Element("rss", version="2.0") |
24 | 26 | channel = ET.SubElement(root, "channel") |
25 | 27 | ET.SubElement(channel, "title").text = "Modern C++ Tutorials - Docs Updates" |
26 | 28 | ET.SubElement(channel, "link").text = "https://github.com/damirlj/modern_cpp_tutorials" |
27 | 29 | ET.SubElement(channel, "description").text = "New articles and updates in the docs/ folder" |
28 | 30 |
|
29 | | -# Get current date for publishing |
30 | | -current_date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S GMT") |
| 31 | +# Create a map of current items to remove duplicates |
| 32 | +existing_items = {item.find("guid").text: item for item in channel.findall("item") if item.find("guid") is not None} |
| 33 | + |
| 34 | +# Get current date in RFC 2822 format (for RSS) |
| 35 | +current_date = format_datetime(datetime.utcnow()) |
31 | 36 |
|
32 | | -# Iterate through each PDF file found |
| 37 | +# Add or update RSS items |
33 | 38 | for pdf in pdf_files: |
34 | | - # Convert the full file path to a relative URL for GitHub |
35 | 39 | relative_path = os.path.relpath(pdf, docs_folder) |
36 | 40 | commit_url = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}" |
37 | 41 |
|
38 | | - # Create a new RSS item for each PDF |
| 42 | + # Remove old item if it exists |
| 43 | + if commit_url in existing_items: |
| 44 | + channel.remove(existing_items[commit_url]) |
| 45 | + |
| 46 | + # Create and add the new item |
39 | 47 | item = ET.Element("item") |
40 | | - ET.SubElement(item, "title").text = relative_path # Use relative path as title |
| 48 | + ET.SubElement(item, "title").text = relative_path |
41 | 49 | ET.SubElement(item, "link").text = commit_url |
42 | 50 | ET.SubElement(item, "guid").text = commit_url |
43 | 51 | ET.SubElement(item, "pubDate").text = current_date |
44 | | - |
45 | | - # Append the item to the channel |
46 | | - channel = root.find("channel") |
47 | 52 | channel.append(item) |
48 | 53 |
|
49 | | -# Save the updated RSS feed |
| 54 | +# Sort items by pubDate descending |
| 55 | +items = channel.findall("item") |
| 56 | + |
| 57 | +# Parse pubDate strings to datetime objects for sorting |
| 58 | +def get_pub_date(item): |
| 59 | + pub_date = item.find("pubDate").text |
| 60 | + return parsedate_to_datetime(pub_date) |
| 61 | + |
| 62 | +items.sort(key=get_pub_date, reverse=True) |
| 63 | + |
| 64 | +# Optional: Keep only the latest N entries (e.g., 20) |
| 65 | +#MAX_ENTRIES = 20 |
| 66 | +#for item in channel.findall("item"): |
| 67 | +# channel.remove(item) |
| 68 | +#for item in items[:MAX_ENTRIES]: |
| 69 | +# channel.append(item) |
| 70 | + |
| 71 | +# Save updated RSS feed |
50 | 72 | tree = ET.ElementTree(root) |
51 | 73 | tree.write(rss_file, encoding="UTF-8", xml_declaration=True) |
52 | 74 |
|
53 | | -print(f"Generated RSS feed with {len(pdf_files)} articles.") |
| 75 | +print(f"Generated RSS feed with {min(len(items), MAX_ENTRIES)} articles.") |
0 commit comments