diff --git a/packages/myst-cli/src/build/utils/getFileContent.ts b/packages/myst-cli/src/build/utils/getFileContent.ts index 058b6d29d..bba9cb306 100644 --- a/packages/myst-cli/src/build/utils/getFileContent.ts +++ b/packages/myst-cli/src/build/utils/getFileContent.ts @@ -1,4 +1,4 @@ -import { resolve } from 'node:path'; +import path from 'node:path'; import { plural } from 'myst-common'; import { tic } from 'myst-cli-utils'; import type { LinkTransformer } from 'myst-transforms'; @@ -6,11 +6,15 @@ import { combineProjectCitationRenderers } from '../../process/citations.js'; import { loadFile, selectFile } from '../../process/file.js'; import { loadReferences } from '../../process/loadReferences.js'; import type { TransformFn } from '../../process/mdast.js'; -import { postProcessMdast, transformMdast } from '../../process/mdast.js'; -import { loadProject, selectPageReferenceStates } from '../../process/site.js'; +import { transformMdast } from '../../process/mdast.js'; +import { loadProject, selectPageReferenceStates, makeBarrier } from '../../process/site.js'; +import { buildIndexTransform, MultiPageReferenceResolver } from 'myst-transforms'; import type { ISession } from '../../session/types.js'; import { selectors } from '../../store/index.js'; import type { ImageExtensions } from '../../utils/resolveExtension.js'; +import { castSession } from '../../session/cache.js'; +import { VFile } from 'vfile'; +import { logMessagesFromVFile } from '../../utils/logging.js'; export async function getFileContent( session: ISession, @@ -34,13 +38,13 @@ export async function getFileContent( }, ) { const toc = tic(); - files = files.map((file) => resolve(file)); - projectPath = projectPath ?? resolve('.'); + files = files.map((file) => path.resolve(file)); + projectPath = projectPath ?? path.resolve('.'); const { project, pages } = await loadProject(session, projectPath); const projectFiles = pages.map((page) => page.file).filter((file) => !files.includes(file)); await Promise.all([ // Load all citations (.bib) - ...project.bibliography.map((path) => loadFile(session, path, projectPath, '.bib')), + ...project.bibliography.map((bib) => loadFile(session, bib, projectPath, '.bib')), // Load all content (.md, .tex, .myst.json, or .ipynb) ...[...files, ...projectFiles].map((file, ind) => { const preFrontmatter = Array.isArray(preFrontmatters) @@ -60,11 +64,45 @@ export async function getFileContent( // Keep 'files' indices consistent in 'allFiles' as index is used for other fields. const allFiles = [...files, ...projectFiles, ...projectParts]; + const { wait: waitReferencing, promise: referencingPromise } = makeBarrier(allFiles.length); + const { wait: waitIndexing, promise: indexingPromise } = makeBarrier(allFiles.length); + + // TODO: maybe move transformMdast into a multi-file function + const referenceStateContext: { + referenceStates: ReturnType; + } = { referenceStates: [] }; + const referencingPages = allFiles.map((file) => { + return { file }; + }); + referencingPromise.then(() => { + const pageReferenceStates = selectPageReferenceStates(session, referencingPages); + referenceStateContext.referenceStates.push(...pageReferenceStates); + }); + indexingPromise.then(() => { + const cache = castSession(session); + referencingPages.forEach((page) => { + const fileState = cache.$internalReferences[page.file]; + if (!fileState) return; + const { mdast } = cache.$getMdast(page.file)?.post ?? {}; + if (!mdast) return; + const vfile = new VFile(); + vfile.path = page.file; + buildIndexTransform( + mdast, + vfile, + fileState, + new MultiPageReferenceResolver(referenceStateContext.referenceStates, fileState.filePath), + ); + logMessagesFromVFile(session, vfile); + }); + }); await Promise.all( allFiles.map(async (file, ind) => { const pageSlug = pages.find((page) => page.file === file)?.slug; const titleDepth = typeof titleDepths === 'number' ? titleDepths : titleDepths?.[ind]; await transformMdast(session, { + referenceResolutionBlocker: waitReferencing, + indexGenerationBlocker: waitIndexing, file, imageExtensions, projectPath, @@ -74,24 +112,13 @@ export async function getFileContent( titleDepth, extraTransforms, execute, - }); - }), - ); - const pageReferenceStates = selectPageReferenceStates( - session, - allFiles.map((file) => { - return { file }; - }), - ); - await Promise.all( - [...files, ...projectParts].map(async (file) => { - await postProcessMdast(session, { - file, extraLinkTransformers, - pageReferenceStates, + runPostProcess: [...files, ...projectParts].includes(file), + referenceStateContext, }); }), ); + const selectedFiles = await Promise.all( files.map(async (file) => { const selectedFile = selectFile(session, file); diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index fb50a03cd..5e4f7a29e 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -3,19 +3,14 @@ import { tic } from 'myst-cli-utils'; import type { GenericParent, IExpressionResult, PluginUtils, References } from 'myst-common'; import { fileError, fileWarn, RuleId, slugToUrl } from 'myst-common'; import type { PageFrontmatter } from 'myst-frontmatter'; +import type { Dependency } from 'myst-spec-ext'; import { SourceFileKind } from 'myst-spec-ext'; import type { LinkTransformer } from 'myst-transforms'; import { - basicTransformationsPlugin, - htmlPlugin, - footnotesPlugin, ReferenceState, MultiPageReferenceResolver, resolveLinksAndCitationsTransform, resolveReferencesTransform, - mathPlugin, - codePlugin, - enumerateTargetsPlugin, keysTransform, linksTransform, MystTransformer, @@ -25,13 +20,19 @@ import { RRIDTransformer, RORTransformer, DOITransformer, - joinGatesPlugin, - glossaryPlugin, - abbreviationPlugin, - reconstructHtmlPlugin, - inlineMathSimplificationPlugin, checkLinkTextTransform, - indexIdentifierPlugin, + reconstructHtmlTransform, + htmlTransform, + basicTransformations, + inlineMathSimplificationTransform, + mathTransform, + glossaryTransform, + abbreviationTransform, + indexIdentifierTransform, + enumerateTargetsTransform, + joinGatesTransform, + codeTransform, + footnotesTransform, } from 'myst-transforms'; import { unified } from 'unified'; import { select, selectAll } from 'unist-util-select'; @@ -73,11 +74,12 @@ import { import type { ImageExtensions } from '../utils/resolveExtension.js'; import { logMessagesFromVFile } from '../utils/logging.js'; import { combineCitationRenderers } from './citations.js'; -import { bibFilesInDir, selectFile } from './file.js'; +import { bibFilesInDir } from './file.js'; import { parseMyst } from './myst.js'; import { kernelExecutionTransform, LocalDiskCache } from 'myst-execute'; import type { IOutput } from '@jupyterlab/nbformat'; import { rawDirectiveTransform } from '../transforms/raw.js'; +import { TransformPipelineBuilder } from './pipeline.js'; const LINKS_SELECTOR = 'link,card,linkBlock'; @@ -109,6 +111,8 @@ function referenceFileFromPartFile(session: ISession, partFile: string) { export async function transformMdast( session: ISession, opts: { + referenceResolutionBlocker: () => void; + indexGenerationBlocker: () => void; file: string; projectPath?: string; projectSlug?: string; @@ -117,9 +121,15 @@ export async function transformMdast( watchMode?: boolean; execute?: boolean; extraTransforms?: TransformFn[]; + extraLinkTransformers?: LinkTransformer[]; minifyMaxCharacters?: number; index?: string; titleDepth?: number; + runPostProcess?: boolean; + referenceStateContext: { + referenceStates: ReferenceState[]; + }; + checkLinks?: boolean; }, ) { const { @@ -131,9 +141,15 @@ export async function transformMdast( extraTransforms, watchMode = false, minifyMaxCharacters, + extraLinkTransformers, index, titleDepth, execute, + runPostProcess, + referenceStateContext, + referenceResolutionBlocker, + indexGenerationBlocker, + checkLinks, } = opts; const toc = tic(); const { store, log } = session; @@ -151,6 +167,8 @@ export async function transformMdast( log.debug(`Processing "${file}"`); const vfile = new VFile(); // Collect errors on this file vfile.path = file; + + const sha256 = selectors.selectFileInfo(store.getState(), file).sha256 as string; const mdast = structuredClone(mdastPre); const frontmatter = processPageFrontmatter( session, @@ -168,6 +186,7 @@ export async function transformMdast( }, projectPath, ); + const isJupytext = frontmatter.kernelspec || frontmatter.jupytext; const references: References = { cite: { order: [], data: {} }, }; @@ -178,178 +197,255 @@ export async function transformMdast( vfile, }); cache.$internalReferences[file] = state; + + const builder = new TransformPipelineBuilder(); // Import additional content from mdast or other files - importMdastFromJson(session, file, mdast); - await includeFilesTransform(session, file, mdast, frontmatter, vfile); - rawDirectiveTransform(mdast, vfile); + builder.addTransform('import-mdast-json', (tree) => importMdastFromJson(session, file, tree)); // after=START + builder.addTransform('include-files', (tree) => + includeFilesTransform(session, file, tree, frontmatter, vfile), + ); + builder.addTransform('raw-directive', (tree) => rawDirectiveTransform(tree, vfile)); // This needs to come before basic transformations since it may add labels to blocks - liftCodeMetadataToBlock(session, vfile, mdast); + builder.addTransform('lift-code-metadata', (tree) => + liftCodeMetadataToBlock(session, vfile, tree), + ); - const pipe = unified() - .use(reconstructHtmlPlugin) // We need to group and link the HTML first - .use(htmlPlugin, { htmlHandlers }) // Some of the HTML plugins need to operate on the transformed html, e.g. figure caption transforms - .use(basicTransformationsPlugin, { + builder.addTransform('reconstruct-html', reconstructHtmlTransform); // We need to group and link the HTML first + builder.addTransform('html', (tree) => htmlTransform(tree, { htmlHandlers })); // Some of the HTML plugins need to operate on the transformed html, e.g. figure caption transforms + builder.addTransform('basic', (tree) => + basicTransformations(tree, vfile, { parser: (content: string) => parseMyst(session, content, file), firstDepth: (titleDepth ?? 1) + (frontmatter.content_includes_title ? 0 : 1), - }) - .use(inlineMathSimplificationPlugin) - .use(mathPlugin, { macros: frontmatter.math }) - .use(glossaryPlugin) // This should be before the enumerate plugins - .use(abbreviationPlugin, { abbreviations: frontmatter.abbreviations }) - .use(indexIdentifierPlugin) - .use(enumerateTargetsPlugin, { state }) // This should be after math/container transforms - .use(joinGatesPlugin); + }), + ); + builder.addTransform('inline-math', (tree) => inlineMathSimplificationTransform(tree)); + builder.addTransform('math', (tree) => mathTransform(tree, vfile, { macros: frontmatter.math })); + builder.addTransform('glossary', (tree) => glossaryTransform(tree, vfile)); // This should be before the enumerate plugins + builder.addTransform('abbreviation', (tree) => + abbreviationTransform(tree, { abbreviations: frontmatter.abbreviations }), + ); + builder.addTransform('index-identifier', (tree) => indexIdentifierTransform(tree)); + builder.addTransform('enumerate-targets', (tree) => enumerateTargetsTransform(tree, { state })); // This should be after math/container transforms + builder.addTransform('join-gates', (tree) => joinGatesTransform(tree, vfile)); + // Load custom transform plugins session.plugins?.transforms.forEach((t) => { if (t.stage !== 'document') return; - pipe.use(t.plugin, undefined, pluginUtils); + builder.addTransform( + t.name, + async (tree) => { + const pipe = unified(); + pipe.use(t.plugin, undefined, pluginUtils); + await pipe.run(tree, vfile); + }, + { after: t.after, before: t.before }, + ); }); - await pipe.run(mdast, vfile); // This needs to come after basic transformations since meta tags are added there - propagateBlockDataToCode(session, vfile, mdast); + builder.addTransform('propagate-block-data', (tree) => + propagateBlockDataToCode(session, vfile, tree), + ); // Initialize citation renderers for this (non-bib) file - cache.$citationRenderers[file] = await transformLinkedDOIs( - session, - vfile, - mdast, - cache.$doiRenderers, - file, + const citationState: { fileRenderer: ReturnType } = { + fileRenderer: {}, + }; + const registerCitations = async (tree: GenericParent) => { + cache.$citationRenderers[file] = await transformLinkedDOIs( + session, + vfile, + tree, + cache.$doiRenderers, + file, + ); + const rendererFiles = [file]; + if (projectPath) { + rendererFiles.unshift(projectPath); + } else { + const localFiles = (await bibFilesInDir(session, path.dirname(file))) || []; + rendererFiles.push(...localFiles); + } + // Combine file-specific citation renderers with project renderers from bib files + citationState.fileRenderer = combineCitationRenderers(cache, ...rendererFiles); + }; + builder.addTransform('register-citations', registerCitations); + builder.addTransform( + 'kernel-execution', + (tree) => { + const cachePath = path.join(session.buildPath(), 'execute'); + kernelExecutionTransform(tree, vfile, { + basePath: session.sourcePath(), + cache: new LocalDiskCache<(IExpressionResult | IOutput[])[]>(cachePath), + sessionFactory: () => session.jupyterSessionManager(), + frontmatter: frontmatter, + ignoreCache: false, + errorIsFatal: false, + log: session.log, + }); + }, + { skip: !execute }, ); - const rendererFiles = [file]; - if (projectPath) { - rendererFiles.unshift(projectPath); - } else { - const localFiles = (await bibFilesInDir(session, path.dirname(file))) || []; - rendererFiles.push(...localFiles); - } - // Combine file-specific citation renderers with project renderers from bib files - const fileCitationRenderer = combineCitationRenderers(cache, ...rendererFiles); + builder.addTransform('render-inline-expressions', (tree) => + transformRenderInlineExpressions(tree, vfile), + ); + builder.addTransform('cache-outputs', (tree) => + transformOutputsToCache(session, tree, kind, { minifyMaxCharacters }), + ); + builder.addTransform('filter-output', (tree) => + transformFilterOutputStreams(tree, vfile, frontmatter.settings), + ); + builder.addTransform('citations', (tree) => { + transformCitations(session, file, tree, citationState.fileRenderer, references); + }); - if (execute) { - const cachePath = path.join(session.buildPath(), 'execute'); - await kernelExecutionTransform(mdast, vfile, { - basePath: session.sourcePath(), - cache: new LocalDiskCache<(IExpressionResult | IOutput[])[]>(cachePath), - sessionFactory: () => session.jupyterSessionManager(), - frontmatter: frontmatter, - ignoreCache: false, - errorIsFatal: false, - log: session.log, - }); - } - transformRenderInlineExpressions(mdast, vfile); - await transformOutputsToCache(session, mdast, kind, { minifyMaxCharacters }); - transformFilterOutputStreams(mdast, vfile, frontmatter.settings); - transformCitations(session, file, mdast, fileCitationRenderer, references); - await unified() - .use(codePlugin, { lang: frontmatter?.kernelspec?.language }) - .use(footnotesPlugin) // Needs to happen near the end - .run(mdast, vfile); - transformImagesToEmbed(mdast); - transformImagesWithoutExt(session, mdast, file, { imageExtensions }); - const isJupytext = frontmatter.kernelspec || frontmatter.jupytext; - if (isJupytext) transformLiftCodeBlocksInJupytext(mdast); - const sha256 = selectors.selectFileInfo(store.getState(), file).sha256 as string; - const useSlug = pageSlug !== index; - let url: string | undefined; - let dataUrl: string | undefined; - if (pageSlug && projectSlug) { - url = `/${projectSlug}/${useSlug ? pageSlug : ''}`; - dataUrl = `/${projectSlug}/${pageSlug}.json`; - } else if (pageSlug) { - url = `/${useSlug ? pageSlug : ''}`; - dataUrl = `/${pageSlug}.json`; - } - url = slugToUrl(url); - updateFileInfoFromFrontmatter(session, file, frontmatter, url, dataUrl); - const data: RendererData = { - kind: isJupytext ? SourceFileKind.Notebook : kind, - file, - location, - sha256, - slug: pageSlug, - dependencies: [], - frontmatter, - mdast, - references, - widgets, - } as any; - const cachedMdast = cache.$getMdast(file); - if (cachedMdast) cachedMdast.post = data; - if (extraTransforms) { - await Promise.all( - extraTransforms.map(async (transform) => { - await transform(session, opts); - }), - ); - } - logMessagesFromVFile(session, vfile); - if (!watchMode) log.info(toc(`📖 Built ${file} in %s.`)); -} + builder.addTransform('code', (tree) => + codeTransform(tree, vfile, { lang: frontmatter?.kernelspec?.language }), + ); + builder.addTransform('footnotes', (tree) => footnotesTransform(tree, vfile)); // Needs to happen near the end + builder.addTransform('images-to-embed', transformImagesToEmbed); + builder.addTransform('image-extensions', (tree) => + transformImagesWithoutExt(session, tree, file, { imageExtensions }), + ); + builder.addTransform( + 'jupytext-lift-code-blocks', + isJupytext ? transformLiftCodeBlocksInJupytext : undefined, + ); + const dependencies: Dependency[] = []; + builder.addTransform('write-post-mdast', async (tree) => { + // This writes the frontmatter to the file, so its position is important + // We might need to rethink its location + const useSlug = pageSlug !== index; + let url: string | undefined; + let dataUrl: string | undefined; + if (pageSlug && projectSlug) { + url = `/${projectSlug}/${useSlug ? pageSlug : ''}`; + dataUrl = `/${projectSlug}/${pageSlug}.json`; + } else if (pageSlug) { + url = `/${useSlug ? pageSlug : ''}`; + dataUrl = `/${pageSlug}.json`; + } + url = slugToUrl(url); + updateFileInfoFromFrontmatter(session, file, frontmatter, url, dataUrl); -export async function postProcessMdast( - session: ISession, - { - file, - checkLinks, - pageReferenceStates, - extraLinkTransformers, - }: { - file: string; - checkLinks?: boolean; - pageReferenceStates?: ReferenceState[]; - extraLinkTransformers?: LinkTransformer[]; - }, -) { - const toc = tic(); - const { log } = session; - const cache = castSession(session); - const mdastPost = selectFile(session, file); - if (!mdastPost) return; - const vfile = new VFile(); // Collect errors on this file - vfile.path = file; - const { mdast, dependencies, frontmatter } = mdastPost; - const fileState = cache.$internalReferences[file]; - const state = pageReferenceStates - ? new MultiPageReferenceResolver(pageReferenceStates, file, vfile) - : fileState; - const externalReferences = Object.values(cache.$externalReferences); - // NOTE: This is doing things in place, we should potentially make this a different state? - const transformers = [ - ...(extraLinkTransformers || []), - new WikiTransformer(), - new GithubTransformer(), - new RRIDTransformer(), - new RORTransformer(), - new DOITransformer(), // This also is picked up in the next transform - new MystTransformer(externalReferences), - new SphinxTransformer(externalReferences), - new StaticFileTransformer(session, file), // Links static files and internally linked files - ]; - resolveLinksAndCitationsTransform(mdast, { state, transformers }); - linksTransform(mdast, state.vfile as VFile, { - transformers, - selector: LINKS_SELECTOR, + const data: RendererData = { + kind: isJupytext ? SourceFileKind.Notebook : kind, + file, + location, + sha256, + slug: pageSlug, + dependencies, + frontmatter, + mdast: tree, + references, + widgets, + } as any; + const cachedMdast = cache.$getMdast(file); + if (cachedMdast) cachedMdast.post = data; + if (extraTransforms) { + await Promise.all( + extraTransforms.map(async (transform) => { + await transform(session, opts); + }), + ); + } + }); + + // Blocking cross-project resolution + builder.addTransform('reference-resolution', referenceResolutionBlocker); + builder.addTransform('index-generation', indexGenerationBlocker); + + const sharedStateContext: { + sharedState?: any; + externalReferences?: any; + transformers: LinkTransformer[]; + } = { transformers: [] }; + builder.addTransform('set-shared-state', () => { + sharedStateContext.sharedState = referenceStateContext.referenceStates + ? new MultiPageReferenceResolver(referenceStateContext.referenceStates, file, vfile) + : state; + sharedStateContext.externalReferences = Object.values(cache.$externalReferences); + // NOTE: This is doing things in place, we should potentially make this a different state? + sharedStateContext.transformers = [ + ...(extraLinkTransformers || []), + new WikiTransformer(), + new GithubTransformer(), + new RRIDTransformer(), + new RORTransformer(), + new DOITransformer(), // This also is picked up in the next transform + new MystTransformer(sharedStateContext.externalReferences), + new SphinxTransformer(sharedStateContext.externalReferences), + new StaticFileTransformer(session, file), // Links static files and internally linked files + ]; }); - await transformLinkedRORs(session, vfile, mdast, file); - resolveReferencesTransform(mdast, state.vfile as VFile, { state, transformers }); - await transformMystXRefs(session, vfile, mdast, frontmatter); - await embedTransform(session, mdast, file, dependencies, state); - const pipe = unified(); + const transformOptions = { skip: !runPostProcess }; + builder.addTransform( + 'resolve-links-and-citations', + (tree) => + resolveLinksAndCitationsTransform(tree, { + state: sharedStateContext.sharedState, + transformers: sharedStateContext.transformers, + }), + transformOptions, + ); + builder.addTransform( + 'links', + (tree) => + linksTransform(tree, sharedStateContext.sharedState.vfile as VFile, { + transformers: sharedStateContext.transformers, + selector: LINKS_SELECTOR, + }), + transformOptions, + ); + builder.addTransform( + 'ror', + (tree) => transformLinkedRORs(session, vfile, tree, file), + transformOptions, + ); + builder.addTransform( + 'resolve-references', + (tree) => + resolveReferencesTransform(tree, sharedStateContext.sharedState.vfile as VFile, { + state: sharedStateContext.sharedState, + transformers: sharedStateContext.transformers, + }), + transformOptions, + ); + builder.addTransform( + 'myst-xrefs', + (tree) => transformMystXRefs(session, vfile, tree, frontmatter), + transformOptions, + ); session.plugins?.transforms.forEach((t) => { - if (t.stage !== 'project') return; - pipe.use(t.plugin, undefined, pluginUtils); + if (t.stage === 'document') return; + builder.addTransform( + t.name, + async (tree) => { + const pipe = unified(); + pipe.use(t.plugin, undefined, pluginUtils); + await pipe.run(tree, vfile); + }, + { ...transformOptions, after: t.after, before: t.before }, + ); }); - await pipe.run(mdast, vfile); + builder.addTransform( + 'embed', + (tree) => embedTransform(session, tree, file, dependencies, sharedStateContext.sharedState), + transformOptions, + ); // Ensure there are keys on every node after post processing - keysTransform(mdast); - checkLinkTextTransform(mdast, externalReferences, vfile); - logMessagesFromVFile(session, fileState.vfile); + builder.addTransform('keys', keysTransform, transformOptions); + builder.addTransform( + 'check-link-text', + (tree) => checkLinkTextTransform(tree, sharedStateContext.externalReferences, vfile), + transformOptions, + ); + const pipeline = builder.build(); + await pipeline.run(mdast); logMessagesFromVFile(session, vfile); - log.debug(toc(`Transformed mdast cross references and links for "${file}" in %s`)); + + if (!watchMode) log.info(toc(`📖 Built ${file} in %s.`)); if (checkLinks) await checkLinksTransform(session, file, mdast); } @@ -380,48 +476,84 @@ export async function finalizeMdast( ) { const vfile = new VFile(); // Collect errors on this file vfile.path = file; - if (simplifyFigures) { - // Transform output nodes to images / text - reduceOutputs(session, mdast, file, imageWriteFolder, { + const builder = new TransformPipelineBuilder(); + builder.addTransform( + 'reduce-outputs', + simplifyFigures + ? (tree) => { + reduceOutputs(session, tree, file, imageWriteFolder, { + altOutputFolder: simplifyFigures ? undefined : imageAltOutputFolder, + }); + } + : undefined, + ); + // Transform output nodes to images / text + builder.addTransform('write-outputs', (tree) => + transformOutputsToFile(session, tree, imageWriteFolder, { altOutputFolder: simplifyFigures ? undefined : imageAltOutputFolder, - }); - } - transformOutputsToFile(session, mdast, imageWriteFolder, { - altOutputFolder: simplifyFigures ? undefined : imageAltOutputFolder, - vfile, - }); - if (!useExistingImages) { - await transformImagesToDisk(session, mdast, file, imageWriteFolder, { - altOutputFolder: imageAltOutputFolder, - imageExtensions, - }); - // Must happen after transformImages - await transformImageFormats(session, mdast, file, imageWriteFolder, { - altOutputFolder: imageAltOutputFolder, - imageExtensions, - }); - if (optimizeWebp) { - await transformWebp(session, { file, imageWriteFolder, maxSizeWebp }); - } - if (processThumbnail) { - // Note, the thumbnail transform must be **after** images, as it may read the images - await transformThumbnail(session, mdast, file, frontmatter, imageWriteFolder, { - altOutputFolder: imageAltOutputFolder, - webp: optimizeWebp, - maxSizeWebp, - }); - await transformBanner(session, file, frontmatter, imageWriteFolder, { - altOutputFolder: imageAltOutputFolder, - webp: optimizeWebp, - maxSizeWebp, - }); - } - } - await transformDeleteBase64UrlSource(mdast); - if (simplifyFigures) { - // This must happen after embedded content is resolved so all children are present on figures - transformPlaceholderImages(mdast, { imageExtensions }); - } + vfile, + }), + ); + builder.addTransform( + 'write-images', + !useExistingImages + ? (tree) => + transformImagesToDisk(session, tree, file, imageWriteFolder, { + altOutputFolder: imageAltOutputFolder, + imageExtensions, + }) + : undefined, + ); + // Must happen after transformImages + builder.addTransform( + 'image-formats', + !useExistingImages + ? (tree) => + transformImageFormats(session, tree, file, imageWriteFolder, { + altOutputFolder: imageAltOutputFolder, + imageExtensions, + }) + : undefined, + ); + builder.addTransform( + 'webp', + !useExistingImages && optimizeWebp + ? () => transformWebp(session, { file, imageWriteFolder, maxSizeWebp }) + : undefined, + ); + + // Note, the thumbnail transform must be **after** images, as it may read the images + builder.addTransform( + 'thumbnails', + !useExistingImages && processThumbnail + ? (tree) => + transformThumbnail(session, tree, file, frontmatter, imageWriteFolder, { + altOutputFolder: imageAltOutputFolder, + webp: optimizeWebp, + maxSizeWebp, + }) + : undefined, + ); + builder.addTransform( + 'banner', + !useExistingImages && processThumbnail + ? () => + transformBanner(session, file, frontmatter, imageWriteFolder, { + altOutputFolder: imageAltOutputFolder, + webp: optimizeWebp, + maxSizeWebp, + }) + : undefined, + ); + + builder.addTransform('delete-base64', transformDeleteBase64UrlSource); + // This must happen after embedded content is resolved so all children are present on figures + builder.addTransform( + 'placeholder-images', + simplifyFigures ? (tree) => transformPlaceholderImages(tree, { imageExtensions }) : undefined, + ); + const pipeline = builder.build(); + await pipeline.run(mdast); const cache = castSession(session); const postData = cache.$getMdast(file)?.post; if (postData) { diff --git a/packages/myst-cli/src/process/pipeline.ts b/packages/myst-cli/src/process/pipeline.ts new file mode 100644 index 000000000..6718e4833 --- /dev/null +++ b/packages/myst-cli/src/process/pipeline.ts @@ -0,0 +1,107 @@ +import type { GenericParent } from 'myst-common'; + +export type TransformFunction = (mdast: GenericParent) => void; + +export type TransformOptions = { + after?: string; + before?: string; + skip?: boolean; +}; + +type TransformObject = { + name: string; + transform?: TransformFunction; +} & TransformOptions; + +/** + * A sequential pipeline for transforming MyST AST + */ +export class TransformPipeline { + transforms: TransformFunction[]; + constructor(transforms: TransformFunction[]) { + this.transforms = transforms; + } + + async run(mdast: GenericParent) { + for (const transform of this.transforms) { + await Promise.resolve(transform(mdast)); + } + } +} + +/** + * Builder for assembling an asynchronous sequential pipeline for + * processing MyST AST + */ +export class TransformPipelineBuilder { + transforms: TransformObject[]; + constructor() { + this.transforms = []; + } + + build() { + const namedTransforms = new Map( + this.transforms.map((transform) => [transform.name, transform]), + ); + + // Check the following invariants: + // 1. Transform has _at most_ one of `before` or `after`, but not both + // 2. Transform does not refer to itself + // 3. Transform refers to another transform that exists + this.transforms.forEach((transform) => { + // Prohibit transforms from defining multiple relationship constraints + // This assumption avoids a class of insertion conflicts + if (transform.before && transform.after) { + throw new Error('Transform cannot both define before and after'); + } + const comparison = transform.before ?? transform.after; + if (!comparison) return; + if (comparison === transform.name) { + throw new Error('Transform cannot refer to itself in before or after'); + } + + if (!namedTransforms.has(comparison)) { + throw new Error('Transform must refer to valid transform in before or after'); + } + }); + + // Perform `after` and `before` handling + // Cyclic references will not be handled specially + const transformOrder = this.transforms + .filter((t) => !t.before && !t.after) + .map(({ name }) => name); + while (transformOrder.length !== namedTransforms.size) { + this.transforms.forEach((t) => { + // Have we handled this yet? + if (transformOrder.includes(t.name)) return; + // Otherwise, can we handle it? + if (t.before && transformOrder.includes(t.before)) { + transformOrder.splice(transformOrder.indexOf(t.before), 0, t.name); + } else if (t.after && transformOrder.includes(t.after)) { + transformOrder.splice(transformOrder.indexOf(t.after) + 1, 0, t.name); + } + }); + } + // Pull out transform functions for non-skipped transforms + const transforms = transformOrder + .map((name) => namedTransforms.get(name)!) + .filter(({ skip, transform }) => !skip && !!transform) + .map(({ transform }) => transform) as TransformFunction[]; + return new TransformPipeline(transforms); + } + + /** + * Add AST transform function with `name`. + * @param options - options to control the insertion point + */ + addTransform(name: string, transform?: TransformFunction, options?: TransformOptions) { + if (this.transforms.map((t) => t.name).includes(name)) { + throw new Error(`Duplicate transforms with name "${name}"`); + } + this.transforms.push({ + name, + transform, + ...options, + }); + } +} diff --git a/packages/myst-cli/src/process/site.ts b/packages/myst-cli/src/process/site.ts index adac56779..3bfc7c6b3 100644 --- a/packages/myst-cli/src/process/site.ts +++ b/packages/myst-cli/src/process/site.ts @@ -42,7 +42,7 @@ import { combineProjectCitationRenderers } from './citations.js'; import { loadFile, selectFile } from './file.js'; import { loadReferences } from './loadReferences.js'; import type { TransformFn } from './mdast.js'; -import { finalizeMdast, postProcessMdast, transformMdast } from './mdast.js'; +import { finalizeMdast, transformMdast } from './mdast.js'; import { toSectionedParts, buildHierarchy, sectionToHeadingLevel } from './search.js'; const WEB_IMAGE_EXTENSIONS = [ @@ -339,21 +339,7 @@ export function selectPageReferenceStates( }) .filter((state): state is ReferenceState => !!state); if (!opts?.suppressWarnings) warnOnDuplicateIdentifiers(session, pageReferenceStates); - pages.forEach((page) => { - const state = cache.$internalReferences[page.file]; - if (!state) return; - const { mdast } = cache.$getMdast(page.file)?.post ?? {}; - if (!mdast) return; - const vfile = new VFile(); - vfile.path = page.file; - buildIndexTransform( - mdast, - vfile, - state, - new MultiPageReferenceResolver(pageReferenceStates, state.filePath), - ); - logMessagesFromVFile(session, vfile); - }); + return pageReferenceStates; } @@ -405,6 +391,32 @@ export async function writeFile( session.log.debug(toc(`Wrote "${file}" in %s`)); } +/** + * A barrier synchronization primitive that blocks until a fixed number clients are waiting + * + * @param nClients - number of clients that must wait before unblocking + */ +export function makeBarrier(nClients: number): { + promise: Promise; + wait: () => Promise; +} { + const ctx: { resolve?: () => void | undefined } = {}; + const promise = new Promise((resolve) => { + ctx.resolve = resolve; + }); + + let nWaiting = nClients; + const wait = async () => { + nWaiting--; + if (!nWaiting) { + ctx.resolve!(); + } + await promise; + return nWaiting; + }; + return { promise, wait }; +} + export async function fastProcessFile( session: ISession, { @@ -434,9 +446,48 @@ export async function fastProcessFile( const state = session.store.getState(); const fileParts = selectors.selectFileParts(state, file); const projectParts = selectors.selectProjectParts(state, projectPath); + + const allFiles = [file, ...fileParts]; + const { wait: waitReferencing, promise: referencingPromise } = makeBarrier(allFiles.length); + const { wait: waitIndexing, promise: indexingPromise } = makeBarrier(allFiles.length); + + // TODO: maybe move transformMdast into a multi-file function + const referenceStateContext: { + referenceStates: ReturnType; + } = { referenceStates: [] }; + const referencingPages = [ + ...pages, + ...projectParts.map((part) => { + return { file: part }; + }), + ]; + referencingPromise.then(() => { + const pageReferenceStates = selectPageReferenceStates(session, referencingPages); + referenceStateContext.referenceStates.push(...pageReferenceStates); + }); + indexingPromise.then(() => { + const cache = castSession(session); + referencingPages.forEach((page) => { + const fileState = cache.$internalReferences[page.file]; + if (!fileState) return; + const { mdast } = cache.$getMdast(page.file)?.post ?? {}; + if (!mdast) return; + const vfile = new VFile(); + vfile.path = page.file; + buildIndexTransform( + mdast, + vfile, + fileState, + new MultiPageReferenceResolver(referenceStateContext.referenceStates, fileState.filePath), + ); + logMessagesFromVFile(session, vfile); + }); + }); await Promise.all( - [file, ...fileParts].map(async (f) => { + allFiles.map(async (f) => { return transformMdast(session, { + referenceResolutionBlocker: waitReferencing, + indexGenerationBlocker: waitIndexing, file: f, imageExtensions: imageExtensions ?? WEB_IMAGE_EXTENSIONS, projectPath, @@ -446,26 +497,15 @@ export async function fastProcessFile( extraTransforms, index: project.index, execute, - }); - }), - ); - const pageReferenceStates = selectPageReferenceStates(session, [ - ...pages, - ...projectParts.map((part) => { - return { file: part }; - }), - ]); - await Promise.all( - [file, ...fileParts].map(async (f) => { - return postProcessMdast(session, { - file: f, - pageReferenceStates, extraLinkTransformers, + runPostProcess: true, + referenceStateContext, }); }), ); + await Promise.all( - [file, ...fileParts].map(async (f) => { + allFiles.map(async (f) => { const { mdast, frontmatter } = castSession(session).$getMdast(f)?.post ?? {}; if (mdast) { await finalizeMdast(session, mdast, frontmatter ?? {}, f, { @@ -539,10 +579,42 @@ export async function processProject( }); const pagesToTransform: { file: string; slug?: string }[] = [...pages, ...projectParts]; const usedImageExtensions = imageExtensions ?? WEB_IMAGE_EXTENSIONS; - // Transform all pages + + const { wait: waitReferencing, promise: referencingPromise } = makeBarrier( + pagesToTransform.length, + ); + const { wait: waitIndexing, promise: indexingPromise } = makeBarrier(pagesToTransform.length); + + const referenceStateContext: { + referenceStates: ReturnType; + } = { referenceStates: [] }; + referencingPromise.then(() => { + const pageReferenceStates = selectPageReferenceStates(session, pagesToTransform); + referenceStateContext.referenceStates.push(...pageReferenceStates); + }); + indexingPromise.then(() => { + const cache = castSession(session); + pagesToTransform.forEach((page) => { + const fileState = cache.$internalReferences[page.file]; + if (!fileState) return; + const { mdast } = cache.$getMdast(page.file)?.post ?? {}; + if (!mdast) return; + const vfile = new VFile(); + vfile.path = page.file; + buildIndexTransform( + mdast, + vfile, + fileState, + new MultiPageReferenceResolver(referenceStateContext.referenceStates, fileState.filePath), + ); + logMessagesFromVFile(session, vfile); + }); + }); await Promise.all( - pagesToTransform.map((page) => - transformMdast(session, { + pagesToTransform.map(async (page) => { + await transformMdast(session, { + referenceResolutionBlocker: waitReferencing, + indexGenerationBlocker: waitIndexing, file: page.file, projectPath: project.path, projectSlug: siteProject.slug, @@ -551,22 +623,17 @@ export async function processProject( watchMode, execute, extraTransforms, - index: project.index, - }), - ), - ); - const pageReferenceStates = selectPageReferenceStates(session, pagesToTransform); - // Handle all cross references - await Promise.all( - pagesToTransform.map((page) => - postProcessMdast(session, { - file: page.file, - checkLinks: checkLinks || strict, - pageReferenceStates, extraLinkTransformers, - }), - ), + checkLinks: checkLinks || strict, + index: project.index, + runPostProcess: true, + referenceStateContext, + }); + }), ); + + /////// + // Write all pages if (writeFiles) { await Promise.all( @@ -585,7 +652,7 @@ export async function processProject( }), ); await Promise.all( - pages.map(async (page) => { + pages.map((page) => { return writeFile(session, { file: page.file, projectSlug: siteProject.slug as string, diff --git a/packages/myst-common/src/types.ts b/packages/myst-common/src/types.ts index 0231d1927..fc9384d2f 100644 --- a/packages/myst-common/src/types.ts +++ b/packages/myst-common/src/types.ts @@ -119,6 +119,8 @@ export type TransformSpec = { name: string; doc?: string; stage: 'document' | 'project'; + before?: string; + after?: string; // context?: 'tex' | 'docx' | 'jats' | 'typst' | 'site'; plugin: Plugin< [PluginOptions | undefined, PluginUtils],