|
| 1 | +# This is vendored version of code that should eventually moved into DocumenterTools.jl |
| 2 | +# once the generic interface has crystallized, and then DocumenterTools should be added |
| 3 | +# as a dependency here. |
| 4 | +# |
| 5 | +# WIP upstream PR: https://github.com/JuliaDocs/DocumenterTools.jl/pull/78 |
| 6 | +# |
| 7 | +# Note: these functions are not part of MultiDocumenter public API. |
| 8 | + |
| 9 | +""" |
| 10 | + DocumenterTools.update_canonical_links_for_build( |
| 11 | + docs_directory::AbstractString; |
| 12 | + canonical::AbstractString, |
| 13 | + ) |
| 14 | +
|
| 15 | +- **`canonical`**: corresponds to the `canonical` attribute of `Documenter.HTML`, |
| 16 | + specifying the root of the canonical URL. |
| 17 | +""" |
| 18 | +function update_canonical_links_for_version( |
| 19 | + docs_directory::AbstractString; |
| 20 | + canonical::AbstractString, |
| 21 | +) |
| 22 | + canonical = rstrip(canonical, '/') |
| 23 | + |
| 24 | + walkdocs(docs_directory, isdochtml) do fileinfo |
| 25 | + @debug "update_canonical_links: checking $(fileinfo.relpath)" |
| 26 | + # Determine the |
| 27 | + filepath = splitpath(fileinfo.relpath) |
| 28 | + new_canonical_href = if filepath[end] == "index.html" |
| 29 | + joinurl(canonical, filepath[1:end-1]...) * '/' |
| 30 | + else |
| 31 | + joinurl(canonical, filepath[1:end]...) |
| 32 | + end |
| 33 | + |
| 34 | + html = Gumbo.parsehtml(read(fileinfo.fullpath, String)) |
| 35 | + n_canonical_tags::Int = 0 |
| 36 | + dom_updated::Bool = false |
| 37 | + for e in AbstractTrees.PreOrderDFS(html.root) |
| 38 | + is_canonical_element(e) || continue |
| 39 | + n_canonical_tags += 1 |
| 40 | + canonical_href = Gumbo.getattr(e, "href", nothing) |
| 41 | + if canonical_href != new_canonical_href |
| 42 | + Gumbo.setattr!(e, "href", new_canonical_href) |
| 43 | + @debug "update_canonical_links_for_version: canonical_href updated" canonical_href new_canonical_href fileinfo.relpath |
| 44 | + dom_updated = true |
| 45 | + end |
| 46 | + end |
| 47 | + if n_canonical_tags == 0 |
| 48 | + for e in AbstractTrees.PreOrderDFS(html.root) |
| 49 | + e isa Gumbo.HTMLElement || continue |
| 50 | + Gumbo.tag(e) == :head || continue |
| 51 | + canonical_href_element = Gumbo.HTMLElement{:link}( |
| 52 | + [], |
| 53 | + e, |
| 54 | + Dict("rel" => "canonical", "href" => new_canonical_href), |
| 55 | + ) |
| 56 | + push!(e.children, canonical_href_element) |
| 57 | + @debug "update_canonical_links_for_version: added new canonical_href" new_canonical_href fileinfo.relpath |
| 58 | + dom_updated = true |
| 59 | + break |
| 60 | + end |
| 61 | + end |
| 62 | + if dom_updated |
| 63 | + open(io -> print(io, html), fileinfo.fullpath, "w") |
| 64 | + end |
| 65 | + if n_canonical_tags > 1 |
| 66 | + @error "Multiple canonical tags!" file = fileinfo.relpath |
| 67 | + end |
| 68 | + end |
| 69 | +end |
| 70 | + |
| 71 | +is_canonical_element(e) = |
| 72 | + (e isa Gumbo.HTMLElement) && |
| 73 | + (Gumbo.tag(e) == :link) && |
| 74 | + (Gumbo.getattr(e, "rel", nothing) == "canonical") |
| 75 | +joinurl(ps::AbstractString...) = join(ps, '/') |
| 76 | + |
| 77 | +""" |
| 78 | +Takes the multi-versioned Documenter site in `docs_directory` and updates the HTML canonical URLs |
| 79 | +to point to `canonical`. |
| 80 | +""" |
| 81 | +function update_canonical_links(docs_directory::AbstractString; canonical::AbstractString) |
| 82 | + canonical = rstrip(canonical, '/') |
| 83 | + docs_directory = abspath(docs_directory) |
| 84 | + isdir(docs_directory) || throw(ArgumentError("No such directory: $(docs_directory)")) |
| 85 | + |
| 86 | + redirect_index_html_path = joinpath(docs_directory, "index.html") |
| 87 | + canonical_path = if isfile(redirect_index_html_path) |
| 88 | + redirect_url = get_meta_redirect_url(redirect_index_html_path) |
| 89 | + splitpath(normpath(redirect_url)) |
| 90 | + else |
| 91 | + canonical_version_from_versions_js(docs_directory) |
| 92 | + end |
| 93 | + canonical_full_root = joinurl(canonical, canonical_path...) |
| 94 | + # If we have determined which version should be the canonical version, we can actually |
| 95 | + # go and run update_canonical_links_for_version on each directory. First, we'll gather |
| 96 | + # up the list of Documenter (or other) directories we actually want to run over. |
| 97 | + docs_subdirectory_queue, docs_subdirectories = readdir(docs_directory), [] |
| 98 | + while !isempty(docs_subdirectory_queue) |
| 99 | + docs_subdirectory = popfirst!(docs_subdirectory_queue) |
| 100 | + path = joinpath(docs_directory, docs_subdirectory) |
| 101 | + # We'll skip all files. This includes files such as index.html, which in this |
| 102 | + # directory will likely be the redirect. Also, links should be pointing to other |
| 103 | + # versions, so we'll skip them too. |
| 104 | + if !isdir(path) || islink(path) |
| 105 | + continue |
| 106 | + end |
| 107 | + # Preview directory is should contain other Documenter directories, so we just add |
| 108 | + # the subdirectories into the queue and ignore the parent directory itself |
| 109 | + if docs_subdirectory == "previews" |
| 110 | + append!(docs_subdirectory_queue, joinpath.(docs_subdirectory, readdir(path))) |
| 111 | + continue |
| 112 | + end |
| 113 | + # For other directories, we check for the presence of siteinfo.js, and warn if that |
| 114 | + # is missing (but we still try to go and update the canonical URLs). |
| 115 | + if !isfile(joinpath(path, "siteinfo.js")) |
| 116 | + @warn "update_canonical_links: missing siteinfo.js file" path |
| 117 | + end |
| 118 | + push!(docs_subdirectories, path) |
| 119 | + end |
| 120 | + # Finally, we can run update_canonical_links_for_version on the directory. |
| 121 | + for path in docs_subdirectories |
| 122 | + @debug "Updating canonical URLs for a version" path canonical_full_root |
| 123 | + update_canonical_links_for_version(path; canonical = canonical_full_root) |
| 124 | + end |
| 125 | +end |
| 126 | + |
| 127 | +function canonical_directory_from_redirect_index_html(docs_directory::AbstractString) |
| 128 | + redirect_index_html_path = joinpath(docs_directory, "index.html") |
| 129 | + isfile(redirect_index_html_path) || return nothing |
| 130 | + redirect_url = get_meta_redirect_url(redirect_index_html_path) |
| 131 | + splitpath(normpath(redirect_url)) |
| 132 | +end |
| 133 | + |
| 134 | +""" |
| 135 | +Parses the HTML file at `indexhtml_path` and tries to extract the `url=...` value |
| 136 | +of the redirect `<meta http-equiv="refresh" ...>` tag. |
| 137 | +""" |
| 138 | +function get_meta_redirect_url(indexhtml_path::AbstractString) |
| 139 | + html = Gumbo.parsehtml(read(indexhtml_path, String)) |
| 140 | + for e in AbstractTrees.PreOrderDFS(html.root) |
| 141 | + e isa Gumbo.HTMLElement || continue |
| 142 | + Gumbo.tag(e) == :meta || continue |
| 143 | + Gumbo.getattr(e, "http-equiv", nothing) == "refresh" || continue |
| 144 | + content = Gumbo.getattr(e, "content", nothing) |
| 145 | + if isnothing(content) |
| 146 | + @warn "<meta http-equiv=\"refresh\" ...> with no content attribute" path = |
| 147 | + indexhtml_path |
| 148 | + continue |
| 149 | + end |
| 150 | + m = match(r"[0-9]+;\s*url=(.*)", content) |
| 151 | + if isnothing(m) |
| 152 | + @warn "Unable to parse content value of <meta http-equiv=\"refresh\" ...>" content path = |
| 153 | + indexhtml_path |
| 154 | + continue |
| 155 | + end |
| 156 | + return m.captures[1] |
| 157 | + end |
| 158 | + return nothing |
| 159 | +end |
| 160 | + |
| 161 | +function canonical_version_from_versions_js(docs_directory) |
| 162 | + isdir(docs_directory) || throw(ArgumentError("Not a directory: $(docs_directory)")) |
| 163 | + # Try to extract the list of versions from versions.js |
| 164 | + versions_js = joinpath(docs_directory, "versions.js") |
| 165 | + isfile(versions_js) || |
| 166 | + throw(ArgumentError("versions.js is missing in $(docs_directory)")) |
| 167 | + versions = map(extract_versions_list(versions_js)) do version_str |
| 168 | + isversion, version_number = if occursin(Base.VERSION_REGEX, version_str) |
| 169 | + true, VersionNumber(version_str) |
| 170 | + else |
| 171 | + false, nothing |
| 172 | + end |
| 173 | + fullpath = joinpath(docs_directory, version_str) |
| 174 | + return (; |
| 175 | + path = version_str, |
| 176 | + path_exists = isdir(fullpath) || islink(fullpath), |
| 177 | + symlink = islink(fullpath), |
| 178 | + isversion, |
| 179 | + version_number, |
| 180 | + fullpath, |
| 181 | + ) |
| 182 | + end |
| 183 | + # We'll filter out a couple of potential bad cases and issue warnings |
| 184 | + filter(versions) do vi |
| 185 | + if !vi.path_exists |
| 186 | + @warn "update_canonical_links: path does not exists or is not a directory" docs_directory vi |
| 187 | + return false |
| 188 | + end |
| 189 | + return true |
| 190 | + end |
| 191 | + # We need to determine the canonical path. This would usually be something like the stable/ |
| 192 | + # directory, but it can have a different name, including being a version number. So first we |
| 193 | + # try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused) |
| 194 | + # previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`, |
| 195 | + # with the highest version number. This does not cover all possible cases, but should be good |
| 196 | + # enough for now. |
| 197 | + if isempty(versions) |
| 198 | + error("Unable to determine the canonical path. Found no version directories") |
| 199 | + end |
| 200 | + |
| 201 | + non_version_symlinks = filter(vi -> !vi.isversion && vi.symlink, versions) |
| 202 | + canonical_version = if isempty(non_version_symlinks) |
| 203 | + # We didn't find any non-version symlinks, so we'll try to find the vN directory now |
| 204 | + # as a fallback. |
| 205 | + version_symlinks = map(versions) do vi |
| 206 | + m = match(r"^v([0-9]+)$", vi.path) |
| 207 | + isnothing(m) && return nothing |
| 208 | + parse(Int, m[1]) => vi |
| 209 | + end |
| 210 | + filter!(!isnothing, version_symlinks) |
| 211 | + if isempty(version_symlinks) |
| 212 | + error("Unable to determine the canonical path. Found no version directories") |
| 213 | + end |
| 214 | + # Note: findmax(first, version_symlinks) would be nicer, but is not supported |
| 215 | + # on Julia 1.6 |
| 216 | + _, idx = findmax(first.(version_symlinks)) |
| 217 | + version_symlinks[idx][2] |
| 218 | + elseif length(non_version_symlinks) > 1 |
| 219 | + error( |
| 220 | + "Unable to determine the canonical path. Found multiple non-version symlinks.\n$(non_version_symlinks)", |
| 221 | + ) |
| 222 | + else |
| 223 | + only(non_version_symlinks) |
| 224 | + end |
| 225 | + |
| 226 | + return canonical_version.path |
| 227 | +end |
| 228 | + |
| 229 | +function extract_versions_list(versions_js::AbstractString) |
| 230 | + versions_js = abspath(versions_js) |
| 231 | + isfile(versions_js) || throw(ArgumentError("No such file: $(versions_js)")) |
| 232 | + versions_js_content = read(versions_js, String) |
| 233 | + m = match(r"var\s+DOC_VERSIONS\s*=\s*\[([0-9A-Za-z\"\s.,+-]+)\]", versions_js_content) |
| 234 | + if isnothing(m) |
| 235 | + throw(ArgumentError(""" |
| 236 | + Could not find DOC_VERSIONS in $(versions_js): |
| 237 | + $(versions_js_content)""")) |
| 238 | + end |
| 239 | + versions = strip.(c -> isspace(c) || (c == '"'), split(m[1], ",")) |
| 240 | + filter!(!isempty, versions) |
| 241 | + if isempty(versions) |
| 242 | + throw(ArgumentError(""" |
| 243 | + DOC_VERSIONS empty in $(versions_js): |
| 244 | + $(versions_js_content)""")) |
| 245 | + end |
| 246 | + return versions |
| 247 | +end |
0 commit comments