Skip to content

Commit 426e283

Browse files
author
dalj8690
committed
Updated the script for generating RSS feeds
1 parent b8a11e4 commit 426e283

File tree

1 file changed

+37
-15
lines changed

1 file changed

+37
-15
lines changed

generate_rss.py

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,75 @@
11
import os
22
import xml.etree.ElementTree as ET
33
from datetime import datetime
4+
from email.utils import format_datetime, parsedate_to_datetime
45

56
# Path to your docs folder
67
docs_folder = 'docs'
7-
rss_file = 'docs/rss.xml'
8+
rss_file = os.path.join(docs_folder, 'rss.xml')
89

9-
# Get a list of all PDF files in docs and its subdirectories
10+
# Get a list of all PDF files in docs and subdirectories
1011
pdf_files = []
11-
for root, dirs, files in os.walk(docs_folder):
12+
for root_dir, dirs, files in os.walk(docs_folder):
1213
for file in files:
1314
if file.endswith(".pdf"):
14-
pdf_files.append(os.path.join(root, file))
15+
pdf_files.append(os.path.join(root_dir, file))
1516

1617
print(f"Found {len(pdf_files)} PDFs in the 'docs' folder.")
1718

1819
# Create or load RSS XML
1920
if os.path.exists(rss_file):
2021
tree = ET.parse(rss_file)
2122
root = tree.getroot()
23+
channel = root.find("channel")
2224
else:
2325
root = ET.Element("rss", version="2.0")
2426
channel = ET.SubElement(root, "channel")
2527
ET.SubElement(channel, "title").text = "Modern C++ Tutorials - Docs Updates"
2628
ET.SubElement(channel, "link").text = "https://github.com/damirlj/modern_cpp_tutorials"
2729
ET.SubElement(channel, "description").text = "New articles and updates in the docs/ folder"
2830

29-
# Get current date for publishing
30-
current_date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S GMT")
31+
# Create a map of current items to remove duplicates
32+
existing_items = {item.find("guid").text: item for item in channel.findall("item") if item.find("guid") is not None}
33+
34+
# Get current date in RFC 2822 format (for RSS)
35+
current_date = format_datetime(datetime.utcnow())
3136

32-
# Iterate through each PDF file found
37+
# Add or update RSS items
3338
for pdf in pdf_files:
34-
# Convert the full file path to a relative URL for GitHub
3539
relative_path = os.path.relpath(pdf, docs_folder)
3640
commit_url = f"https://github.com/damirlj/modern_cpp_tutorials/blob/main/{relative_path}"
3741

38-
# Create a new RSS item for each PDF
42+
# Remove old item if it exists
43+
if commit_url in existing_items:
44+
channel.remove(existing_items[commit_url])
45+
46+
# Create and add the new item
3947
item = ET.Element("item")
40-
ET.SubElement(item, "title").text = relative_path # Use relative path as title
48+
ET.SubElement(item, "title").text = relative_path
4149
ET.SubElement(item, "link").text = commit_url
4250
ET.SubElement(item, "guid").text = commit_url
4351
ET.SubElement(item, "pubDate").text = current_date
44-
45-
# Append the item to the channel
46-
channel = root.find("channel")
4752
channel.append(item)
4853

49-
# Save the updated RSS feed
54+
# Sort items by pubDate descending
55+
items = channel.findall("item")
56+
57+
# Parse pubDate strings to datetime objects for sorting
58+
def get_pub_date(item):
59+
pub_date = item.find("pubDate").text
60+
return parsedate_to_datetime(pub_date)
61+
62+
items.sort(key=get_pub_date, reverse=True)
63+
64+
# Optional: Keep only the latest N entries (e.g., 20)
65+
#MAX_ENTRIES = 20
66+
#for item in channel.findall("item"):
67+
# channel.remove(item)
68+
#for item in items[:MAX_ENTRIES]:
69+
# channel.append(item)
70+
71+
# Save updated RSS feed
5072
tree = ET.ElementTree(root)
5173
tree.write(rss_file, encoding="UTF-8", xml_declaration=True)
5274

53-
print(f"Generated RSS feed with {len(pdf_files)} articles.")
75+
print(f"Generated RSS feed with {min(len(items), MAX_ENTRIES)} articles.")

0 commit comments

Comments
 (0)