-
Notifications
You must be signed in to change notification settings - Fork 1
/
news.py
79 lines (62 loc) · 2.28 KB
/
news.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import yaml
import feedparser
import os
import glob
from datetime import datetime, timedelta
from dateutil import parser as dateparser
from youtube import is_youtube_channel
from common import parse_yml_files, convert_rss_data_to_md
def get_rss_data(data):
news_items = []
feed = feedparser.parse(data['feed'])
today = datetime.now()
# today = datetime.now() - timedelta(7)
# today = today.replace(hour=0, minute=0, second=0, microsecond=0)
for entry in feed.entries:
post_date = dateparser.parse(entry.published)
if post_date.date() != today.date():
continue
if any(item['title'] == entry.title for item in news_items):
continue
news_items.append({
'title': entry.title,
'url': entry.link,
'date': post_date.isoformat(),
'summary': entry.summary,
'website': data['website']
})
return news_items
def fetch_rss_feeds(rss_data):
news_items = []
for data in rss_data:
try:
if is_youtube_channel(data['feed']):
continue
else:
news_items.append(get_rss_data(data))
except Exception as e:
print(f"Failed to parse feed {data}: {e}")
news_items = [element for element in news_items if element != []]
news_items = sorted([item for sublist in news_items for item in sublist], key=lambda x: x['date'], reverse=True)
return news_items
def generate_hugo_content(news_items):
count = 0
for item in news_items:
md = convert_rss_data_to_md(item)
today = datetime.today().strftime("%d_%m_%Y")
# today = datetime.today() - timedelta(7)
# today = today.strftime("%d_%m_%Y")
directory = f"site/dotnetramblings/content/post/{today}"
if not os.path.exists(directory):
os.makedirs(directory)
with open(f"{directory}/{today}_{count}.md", "w", encoding='utf-8') as file:
file.write(md)
count += 1
def main():
folder_path = './data/'
file_paths = glob.glob(os.path.join(folder_path, '*.yml'))
rss_data = parse_yml_files(file_paths)
news_items = fetch_rss_feeds(rss_data)
generate_hugo_content(news_items)
if __name__ == "__main__":
main()