1
1
import xml .etree .ElementTree as ET
2
2
import queue
3
3
from pathlib import Path
4
- from sphinx_sitemap import setup as base_setup , get_locales , hreflang_formatter , add_html_link , record_builder_type
4
+ from sphinx_sitemap import setup as base_setup , add_html_link , record_builder_type
5
5
from sphinx .util .logging import getLogger
6
6
7
7
logger = getLogger (__name__ )
8
8
9
9
def setup (app ):
10
- app .add_config_value (
11
- 'ov_sitemap_urlset' ,
12
- default = None ,
13
- rebuild = ''
14
- )
15
-
16
- app .add_config_value (
17
- 'ov_sitemap_meta' ,
18
- default = None ,
19
- rebuild = ''
20
- )
10
+ app .add_config_value ('ov_sitemap_urlset' , default = None , rebuild = '' )
11
+ app .add_config_value ('ov_sitemap_meta' , default = None , rebuild = '' )
12
+ app .connect ("builder-inited" , record_builder_type )
13
+ app .connect ("html-page-context" , add_html_link )
14
+ app .connect ('build-finished' , lambda app , exc : create_sitemap (app , exc , ['google' , 'coveo' ]))
21
15
22
16
setup = base_setup (app )
17
+
23
18
for listener in app .events .listeners ['build-finished' ]:
24
19
if listener .handler .__name__ == 'create_sitemap' :
25
20
app .disconnect (listener .id )
26
-
27
- app .connect ("builder-inited" , record_builder_type )
28
- app .connect ("html-page-context" , add_html_link )
29
- app .connect ('build-finished' , create_sitemap )
21
+
30
22
app .parallel_safe = True
31
23
app .parallel_read_safe = True
32
24
app .parallel_write_safe = True
33
25
return setup
34
26
35
-
36
- def create_sitemap (app , exception ):
37
- """Generates the sitemap.xml from the collected HTML page links"""
38
-
39
- urlset = app .builder .config .ov_sitemap_urlset
27
+ def create_sitemap (app , exception , searchers ):
40
28
meta = app .builder .config .ov_sitemap_meta
41
-
42
29
site_url = app .builder .config .site_url
43
30
44
31
if site_url :
45
32
site_url .rstrip ("/" ) + "/"
46
33
else :
47
- logger .warning (
48
- "sphinx-sitemap: html_baseurl is required in conf.py." "Sitemap not built." ,
49
- type = "sitemap" ,
50
- subtype = "configuration" ,
51
- )
34
+ logger .warning ("sphinx-sitemap: html_baseurl is required in conf.py. Sitemap not built." , type = "sitemap" , subtype = "configuration" )
52
35
return
53
- if ( not app . sitemap_links ):
54
- print ( "sphinx-sitemap warning: No pages generated for %s" %
55
- app . config . sitemap_filename )
36
+
37
+ if not app . sitemap_links :
38
+ print ( f"sphinx-sitemap warning: No pages generated." )
56
39
return
57
40
58
- ET .register_namespace ('xhtml' , "http://www.w3.org/1999/xhtml" )
41
+ all_links = []
42
+ while True :
43
+ try :
44
+ all_links .append (app .sitemap_links .get_nowait ())
45
+ except queue .Empty :
46
+ break
59
47
60
- root = ET . Element ( "urlset" )
48
+ unique_links = set ( all_links )
61
49
62
- if not urlset :
63
- root .set ("xmlns" , "http://www.sitemaps.org/schemas/sitemap/0.9" )
64
- else :
65
- for item in urlset :
66
- root .set (* item )
50
+ for searcher in searchers :
51
+ ET .register_namespace ('xhtml' , "http://www.w3.org/1999/xhtml" )
52
+ namespaces = {"xmlns" : "http://www.sitemaps.org/schemas/sitemap/0.9" }
67
53
68
- locales = get_locales (app )
54
+ if searcher == "coveo" :
55
+ namespaces ["xmlns:coveo" ] = "https://www.coveo.com/en/company/about-us"
69
56
70
- if app .builder .config .version :
71
- version = app .builder .config .version + '/'
72
- else :
73
- version = ""
57
+ root = ET .Element ("urlset" , namespaces )
58
+ version = app .builder .config .version + '/' if app .builder .config .version else ""
74
59
75
- unique_links = set ()
76
- while True :
77
- try :
78
- link = app .env .app .sitemap_links .get_nowait () # type: ignore
79
- if link in unique_links :
80
- continue
81
- unique_links .add (link )
82
- except queue .Empty :
83
- break
60
+ for link in unique_links :
61
+ url = ET .SubElement (root , "url" )
62
+ lang = app .builder .config .language + "/" if app .builder .config .language else ""
63
+ scheme = app .config .sitemap_url_scheme
64
+ ET .SubElement (url , "loc" ).text = site_url + scheme .format (lang = lang , version = version , link = link )
65
+
66
+ if searcher == "coveo" :
67
+ process_coveo_meta (meta , url , link )
68
+ elif searcher == "google" :
69
+ from datetime import datetime
70
+ today_date = datetime .now ().strftime ('%Y-%m-%d' )
71
+ ET .SubElement (url , "lastmod" ).text = today_date
72
+ ET .SubElement (url , "changefreq" ).text = "monthly"
73
+ ET .SubElement (url , "priority" ).text = "0.5"
74
+
75
+ filename = Path (app .outdir ) / f"sitemap_{ searcher } .xml"
76
+ ET .ElementTree (root ).write (filename , xml_declaration = True , encoding = 'utf-8' , method = "xml" )
77
+ print (f"sitemap_{ searcher } .xml was generated for URL { site_url } in { filename } " )
84
78
85
- url = ET .SubElement (root , "url" )
86
-
87
- if app .builder .config .language :
88
- lang = app .builder .config .language + "/"
89
- else :
90
- lang = ""
91
-
92
- scheme = app .config .sitemap_url_scheme
93
- ET .SubElement (url , "loc" ).text = site_url + scheme .format (
94
- lang = lang , version = version , link = link
95
- )
96
-
97
- process_coveo_meta (meta , url , link )
98
-
99
- for lang in locales :
100
- lang = lang + "/"
101
- ET .SubElement (
102
- url ,
103
- "{http://www.w3.org/1999/xhtml}link" ,
104
- rel = "alternate" ,
105
- hreflang = hreflang_formatter (lang .rstrip ("/" )),
106
- href = site_url + scheme .format (lang = lang , version = version , link = link ),
107
- )
108
-
109
- filename = Path (app .outdir ) / app .config .sitemap_filename
110
- ET .ElementTree (root ).write (filename ,
111
- xml_declaration = True ,
112
- encoding = 'utf-8' ,
113
- method = "xml" )
114
- print ("%s was generated for URL %s in %s" % (app .config .sitemap_filename ,
115
- site_url , filename ))
116
79
117
80
def process_coveo_meta (meta , url , link ):
118
81
if not meta :
119
82
return
120
-
83
+
121
84
for namespace , values in meta :
122
85
namespace_element = ET .SubElement (url , namespace )
123
86
loc_element = url .find ("loc" )
124
-
87
+
125
88
for tag_name , tag_value in values .items ():
126
89
if tag_name == 'ovdoctype' :
127
90
ET .SubElement (namespace_element , tag_name ).text = process_link (link )
@@ -140,24 +103,18 @@ def extract_categories(link):
140
103
segments = path .split ('/' )[1 :]
141
104
if segments and segments [- 1 ].endswith ('.html' ):
142
105
segments = segments [:- 1 ]
143
-
144
106
if segments :
145
107
segments = segments [1 :]
146
-
147
108
if segments and '.' in segments [0 ]:
148
109
year , * rest = segments [0 ].split ('.' )
149
110
if year .isdigit () and len (year ) == 4 :
150
111
segments [0 ] = year
151
-
152
112
segments = [format_segment (segment ) for segment in segments ]
153
-
154
113
if segments :
155
114
hierarchy = ['|' .join (segments [:i ]) for i in range (1 , len (segments ) + 1 )]
156
115
return ';' .join (hierarchy )
157
-
158
116
return "No category"
159
117
160
118
def format_segment (segment ):
161
119
if segment == 'c_cpp_api' : segment = 'C/C++_api'
162
-
163
120
return ' ' .join (word .capitalize () for word in segment .replace ('-' , ' ' ).replace ('_' , ' ' ).split ())
0 commit comments