Skip to content
forked from neovim/neovim

Commit

Permalink
feat(gen_help_html.lua): adapt to new parser
Browse files Browse the repository at this point in the history
  • Loading branch information
justinmk committed Sep 26, 2022
1 parent 7b2f439 commit c90cc7f
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 74 deletions.
135 changes: 77 additions & 58 deletions scripts/gen_help_html.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
-- Converts Vim :help files to HTML. Validates |tag| links and document syntax (parser errors).
--
-- NOTE: :helptags checks for duplicate tags, whereas this script checks _links_ (to tags).
--
-- USAGE (GENERATE HTML):
-- 1. Run `make helptags` first; this script depends on vim.fn.taglist().
-- 2. nvim -V1 -es --clean +"lua require('scripts.gen_help_html').gen('./build/runtime/doc/', 'target/dir/')"
Expand Down Expand Up @@ -29,22 +31,6 @@
-- >-
-- >--
-- etc.
--
-- tab:xyz The 'z' is always used, then 'x' is prepended, and
-- then 'y' is used as many times as will fit. Thus
-- "tab:<->" displays:
-- >
-- <>
-- <->
-- <-->
-- etc.
-- * Should NOT be a "headline". Perhaps a "table" (or just "line").
-- expr5 and expr6 *expr5* *expr6*
-- ---------------
-- expr6 + expr6 Number addition, |List| or |Blob| concatenation *expr-+*
-- expr6 - expr6 Number subtraction *expr--*
-- expr6 . expr6 String concatenation *expr-.*
-- expr6 .. expr6 String concatenation *expr-..*

local tagmap = nil
local helpfiles = nil
Expand All @@ -71,6 +57,26 @@ local exclude = {
['usr_24.txt'] = true,
}

-- TODO: These known invalid |links| require an update to the relevant docs.
local exclude_invalid = {
["'previewpopup'"] = "quickref.txt",
["'pvp'"] = "quickref.txt",
["'string'"] = "eval.txt",
Query = "treesitter.txt",
["eq?"] = "treesitter.txt",
["lsp-request"] = "lsp.txt",
matchit = "vim_diff.txt",
["matchit.txt"] = "help.txt",
["set!"] = "treesitter.txt",
["v:_null_blob"] = "builtin.txt",
["v:_null_dict"] = "builtin.txt",
["v:_null_list"] = "builtin.txt",
["v:_null_string"] = "builtin.txt",
["vim.lsp.buf_request()"] = "lsp.txt",
["vim.lsp.util.get_progress_messages()"] = "lsp.txt",
["vim.treesitter.start()"] = "treesitter.txt"
}

local function tofile(fname, text)
local f = io.open(fname, 'w')
if not f then
Expand Down Expand Up @@ -248,15 +254,43 @@ local function getws(node, bufnr)
end

local function get_tagname(node, bufnr, link)
local node_name = (node.named and node:named()) and node:type() or nil
local node_text = vim.treesitter.get_node_text(node, bufnr)
local tag = ((node_name == 'option' and node_text)
or (link and node_text:gsub('^|', ''):gsub('|$', '') or node_text:gsub('^%*', ''):gsub('%*$', '')))
local helpfile = tag and vim.fs.basename(tagmap[tag]) or nil -- "api.txt"
local helppage = get_helppage(helpfile) -- "api.html"
local tag = ((node:type() == 'option' or node:parent():type() == 'option')
and ("'%s'"):format(node_text)
or (link and node_text:gsub('^[^|]*|', ''):gsub('|[^|]*$', '') or node_text:gsub('^%*', ''):gsub('%*$', '')))
local helpfile = vim.fs.basename(tagmap[tag]) or nil -- "api.txt"
local helppage = get_helppage(helpfile) -- "api.html"
return helppage, tag
end

-- Returns true if the given invalid tagname is a false positive.
local function ignore_invalid(s)
-- Strings like |~/====| appear in various places and the parser thinks they are links, but they
-- are just table borders.
return not not (s:find('===') or exclude_invalid[s])
end

local function has_ancestor(node, ancestor_name)
local p = node
while true do
p = p:parent()
if not p or p:type() == 'help_file' then
break
elseif p:type() == ancestor_name then
return true
end
end
return false
end

local function validate_link(node, bufnr, fname)
local helppage, tagname = get_tagname(node:child(1), bufnr, true)
if not has_ancestor(node, 'column_heading') and not node:has_error() and not tagmap[tagname] and not ignore_invalid(tagname) then
invalid_tags[tagname] = vim.fs.basename(fname)
end
return helppage, tagname
end

-- Traverses the tree at `root` and checks that |tag| links point to valid helptags.
local function visit_validate(root, level, lang_tree, opt, stats)
level = level or 0
Expand All @@ -276,10 +310,7 @@ local function visit_validate(root, level, lang_tree, opt, stats)
local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]'
table.insert(stats.parse_errors, sample_text)
elseif node_name == 'hotlink' or node_name == 'option' then
local _, tagname = get_tagname(root, opt.buf, true)
if not root:has_error() and not tagmap[tagname] then
invalid_tags[tagname] = vim.fs.basename(opt.fname)
end
local _, _ = validate_link(root, opt.buf, opt.fname)
end
end

Expand All @@ -296,11 +327,11 @@ local function visit_node(root, level, lang_tree, headings, opt, stats)
local parent = root:parent() and root:parent():type() or nil
local text = ''
local toplevel = level < 1
local function node_text()
return vim.treesitter.get_node_text(root, opt.buf)
local function node_text(node)
return vim.treesitter.get_node_text(node or root, opt.buf)
end

if root:child_count() == 0 then
if root:child_count() == 0 or node_name == 'ERROR' then
text = node_text()
else
-- Process children and join them with whitespace.
Expand All @@ -316,6 +347,8 @@ local function visit_node(root, level, lang_tree, headings, opt, stats)

if node_name == 'help_file' then -- root node
return text
elseif node_name == 'url' then
return ('<a href="%s">%s</a>\n'):format(trimmed, trimmed)
elseif node_name == 'word' or node_name == 'uppercase_name' then
if parent == 'headline' then
-- Start a new heading item, or update the current one.
Expand All @@ -328,41 +361,30 @@ local function visit_node(root, level, lang_tree, headings, opt, stats)
return ('<a name="%s"></a><h2 class="help-heading">%s</h2>\n'):format(to_heading_tag(headings[#headings]), text)
elseif node_name == 'column_heading' or node_name == 'column_name' then
return ('<h4>%s</h4>\n'):format(trimmed)
elseif node_name == 'block' then
if opt.old then
-- XXX: Treat old docs as preformatted. Until those docs are "fixed" or we get better info
-- from tree-sitter-vimdoc, this avoids broken layout for legacy docs.
return ('<div class="old-help-para">\n%s</div>\n'):format(text)
end
return string.format('<div class="help-para">\n%s\n</div>\n', text)
elseif node_name == 'line' then
-- TODO: remove these "sibling inspection" hacks once the parser provides structured info
-- about paragraphs and listitems: https://github.com/vigoux/tree-sitter-vimdoc/issues/12
local next_text = root:next_sibling() and vim.treesitter.get_node_text(root:next_sibling(), opt.buf) or ''
local li = startswith_bullet(text) -- Listitem?
local next_li = startswith_bullet(next_text) -- Next is listitem?
-- Close the paragraph/listitem if the next sibling is not a line.
-- Close the listitem if the next sibling is not a line.
local close = (next_ ~= 'line' or next_li or is_blank(next_text)) and '</div>\n' or ''

-- HACK: discard common "noise" lines.
if is_noise(text) then
table.insert(stats.noise_lines, getbuflinestr(root, opt.buf, 0))
return (opt.old or prev ~= 'line') and '' or close
return ''
end

if opt.old then
-- XXX: Treat old docs as preformatted. Until those docs are "fixed" or we get better info
-- from tree-sitter-vimdoc, this avoids broken layout for legacy docs.
return ('<div class="old-help-line">%s</div>\n'):format(text)
end

if li then
return string.format('<div class="help-item">%s%s', trim_bullet(expandtabs(text)), close)
end
if prev ~= 'line' then -- Start a new paragraph.
return string.format('<div class="help-para">%s%s', expandtabs(text), close)
return string.format('<div class="help-item">%s%s', trim_bullet(text), close)
end

-- Continue in the current paragraph/listitem.
return string.format('%s%s', expandtabs(text), close)
return string.format('%s\n', text)
elseif node_name == 'hotlink' or node_name == 'option' then
local helppage, tagname = get_tagname(root, opt.buf, true)
if not root:has_error() and not tagmap[tagname] then
invalid_tags[tagname] = vim.fs.basename(opt.fname)
end
local helppage, tagname = validate_link(root, opt.buf, opt.fname)
return ('<a href="%s#%s">%s</a>'):format(helppage, url_encode(tagname), html_esc(tagname))
elseif node_name == 'backtick' then
return ('<code>%s</code>'):format(html_esc(text))
Expand All @@ -371,7 +393,7 @@ local function visit_node(root, level, lang_tree, headings, opt, stats)
elseif node_name == 'code_block' then
return ('<pre>\n%s</pre>\n'):format(html_esc(trim_indent(trim_gt_lt(text))))
elseif node_name == 'tag' then -- anchor
local _, tagname = get_tagname(root, opt.buf, false)
local _, tagname = get_tagname(root:child(1), opt.buf, false)
local s = ('<a name="%s"></a><span class="help-tag">%s</span>'):format(url_encode(tagname), trimmed)
if parent == 'headline' and prev ~= 'tag' then
-- Start the <span> container for tags in a heading.
Expand All @@ -387,10 +409,6 @@ local function visit_node(root, level, lang_tree, headings, opt, stats)
-- Store the raw text to give context to the bug report.
local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]'
table.insert(stats.parse_errors, sample_text)
if prev == 'ERROR' then
-- Avoid trashing the text with cascading errors.
return trimmed, ('parse-error:"%s"'):format(node_text())
end
return ('<a class="parse-error" target="_blank" title="Parsing error. Report to tree-sitter-vimdoc..." href="%s">%s</a>'):format(
get_bug_url_vimdoc(opt.fname, opt.to_fname, sample_text), trimmed)
else -- Unknown token.
Expand Down Expand Up @@ -443,6 +461,7 @@ local function parse_buf(fname)
vim.cmd('sbuffer '..tostring(fname)) -- Buffer number.
end
-- vim.treesitter.require_language('help', './build/lib/nvim/parser/help.so')
vim.treesitter.require_language('help', '/Users/justin/Library/Caches/tree-sitter/lib/help.so')
local lang_tree = vim.treesitter.get_parser(buf, 'help')
return lang_tree, buf
end
Expand Down Expand Up @@ -587,7 +606,7 @@ local function gen_one(fname, to_fname, old, commit)
local toc = [[
<div class="col-narrow toc">
<div><a href="index.html">Main</a></div>
<div><a href="vimindex.html">Help index</a></div>
<div><a href="vimindex.html">Commands index</a></div>
<div><a href="quickref.html">Quick reference</a></div>
<hr/>
]]
Expand Down Expand Up @@ -657,7 +676,7 @@ local function gen_css(fname)
padding-top: 10px;
padding-bottom: 10px;
}
.old-help-line {
.old-help-para {
/* Tabs are used for alignment in old docs, so we must match Vim's 8-char expectation. */
tab-size: 8;
white-space: pre;
Expand Down
26 changes: 10 additions & 16 deletions test/functional/lua/help_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,25 @@ describe(':help docs', function()
before_each(clear)
it('validate', function()
-- If this test fails, try these steps (in order):
-- 1. Try to fix/cleanup the :help docs, especially Nvim-owned :help docs.
-- 2. Try to fix the parser: https://github.com/vigoux/tree-sitter-vimdoc
-- 1. Fix/cleanup the :help docs.
-- 2. Fix the parser: https://github.com/vigoux/tree-sitter-vimdoc
-- 3. File a parser bug, and adjust the tolerance of this test in the meantime.

local rv = exec_lua([[return require('scripts.gen_help_html').validate('./build/runtime/doc')]])
-- Check that parse errors did not increase wildly.
-- TODO: yes, there are currently 24k+ parser errors.
-- WIP: https://github.com/vigoux/tree-sitter-vimdoc/pull/16
ok(rv.err_count < 24000, '<24000', rv.err_count)
-- TODO: should be eq(0, …)
ok(exec_lua('return vim.tbl_count(...)', rv.invalid_tags) < 538, '<538',
exec_lua('return vim.inspect(...)', rv.invalid_tags))
ok(rv.err_count < 2500, '<2500 parse errors', rv.err_count)
eq({}, rv.invalid_tags, exec_lua([[return 'found invalid :help tag links:\n'..vim.inspect(...)]], rv.invalid_tags))
end)

it('gen_help_html.lua generates HTML', function()
-- Test:
-- 1. Check that parse errors did not increase wildly. Because we explicitly test only a few
-- :help files, we can be more precise about the tolerances here.
-- 2. exercise gen_help_html.lua, check that it actually works.
-- 3. check that its tree-sitter-vimdoc dependency is working.
-- 1. Test that gen_help_html.lua actually works.
-- 2. Test that parse errors did not increase wildly. Because we explicitly test only a few
-- :help files, we can be precise about the tolerances here.

local tmpdir = exec_lua('return vim.fs.dirname(vim.fn.tempname())')
-- Because gen() is slow (1 min), this test is limited to a few files.
-- Because gen() is slow (~30s), this test is limited to a few files.
local rv = exec_lua([[
local to_dir = ...
return require('scripts.gen_help_html').gen(
Expand All @@ -46,9 +42,7 @@ describe(':help docs', function()
tmpdir
)
eq(4, #rv.helpfiles)
ok(rv.err_count < 700, '<700', rv.err_count)
-- TODO: should be eq(0, …)
ok(exec_lua('return vim.tbl_count(...)', rv.invalid_tags) <= 32, '<=32',
exec_lua('return vim.inspect(...)', rv.invalid_tags))
ok(rv.err_count < 32, '<32 parse errors', rv.err_count)
eq({}, rv.invalid_tags, exec_lua([[return 'found invalid :help tag links:\n'..vim.inspect(...)]], rv.invalid_tags))
end)
end)

0 comments on commit c90cc7f

Please sign in to comment.