From 3d42bf4bad444496c6de8a10477addbd417ae045 Mon Sep 17 00:00:00 2001 From: Franklin Koch Date: Wed, 11 Dec 2024 14:29:55 -0500 Subject: [PATCH 01/11] Initial sketch of pipeline builder --- packages/myst-cli/src/process/mdast.ts | 251 ++++++++++++++++++------- 1 file changed, 183 insertions(+), 68 deletions(-) diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index fb50a03cd..59aeb0160 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -6,16 +6,10 @@ import type { PageFrontmatter } from 'myst-frontmatter'; import { SourceFileKind } from 'myst-spec-ext'; import type { LinkTransformer } from 'myst-transforms'; import { - basicTransformationsPlugin, - htmlPlugin, - footnotesPlugin, ReferenceState, MultiPageReferenceResolver, resolveLinksAndCitationsTransform, resolveReferencesTransform, - mathPlugin, - codePlugin, - enumerateTargetsPlugin, keysTransform, linksTransform, MystTransformer, @@ -25,13 +19,19 @@ import { RRIDTransformer, RORTransformer, DOITransformer, - joinGatesPlugin, - glossaryPlugin, - abbreviationPlugin, - reconstructHtmlPlugin, - inlineMathSimplificationPlugin, checkLinkTextTransform, - indexIdentifierPlugin, + reconstructHtmlTransform, + htmlTransform, + basicTransformations, + inlineMathSimplificationTransform, + mathTransform, + glossaryTransform, + abbreviationTransform, + indexIdentifierTransform, + enumerateTargetsTransform, + joinGatesTransform, + codeTransform, + footnotesTransform, } from 'myst-transforms'; import { unified } from 'unified'; import { select, selectAll } from 'unist-util-select'; @@ -106,6 +106,86 @@ function referenceFileFromPartFile(session: ISession, partFile: string) { return file ?? partFile; } +type TransformFunction = (mdast: GenericParent) => void; + +type TransformSorting = { + after?: string; + before?: string; +}; +type TransformObject = { + name: string; + transform: TransformFunction; +} & TransformSorting; + +class TransformPipeline { + transforms: TransformFunction[]; + constructor(transforms: TransformFunction[]) { + this.transforms = transforms; + } + + async run(mdast: GenericParent) { + for (const transform of this.transforms) { + await Promise.resolve(transform(mdast)); + } + } +} + +class TransformPipelineBuilder { + transforms: TransformObject[]; + constructor() { + this.transforms = []; + } + + build() { + const transformNames = new Set(this.transforms.map((transform) => transform.name)); + this.transforms.forEach((transform) => { + // Prohibit transforms from defining multiple relationship constraints + // This assumption avoids a class of insertion conflicts + if (transform.before && transform.after) { + throw new Error('Transform cannot both define before and after'); + } + const comparison = transform.before ?? transform.after; + if (!comparison) return; + if (comparison === transform.name) { + throw new Error('Transform cannot refer to itself in before or after'); + } + + if (!transformNames.has(comparison)) { + throw new Error('Transform must refer to valid transform in before or after'); + } + }); + const namedTransforms = new Map( + this.transforms.map((transform) => [transform.name, transform]), + ); + const transformOrder = this.transforms + .filter((t) => !t.before && !t.after) + .map(({ name }) => name); + while (transformOrder.length !== namedTransforms.size) { + this.transforms.forEach((t) => { + // Have we handled this yet? + if (transformOrder.includes(t.name)) return; + // Otherwise, can we handle it? + if (t.before && transformOrder.includes(t.before)) { + transformOrder.splice(transformOrder.indexOf(t.before), 0, t.name); + } else if (t.after && transformOrder.includes(t.after)) { + transformOrder.splice(transformOrder.indexOf(t.after) + 1, 0, t.name); + } + }); + } + console.log(namedTransforms); + const transforms = transformOrder.map((name) => namedTransforms.get(name)!.transform); + return new TransformPipeline(transforms); + } + + addTransform(name: string, transform: TransformFunction, sorting?: TransformSorting) { + this.transforms.push({ + name, + transform, + ...sorting, + }); + } +} + export async function transformMdast( session: ISession, opts: { @@ -178,79 +258,114 @@ export async function transformMdast( vfile, }); cache.$internalReferences[file] = state; + + const builder = new TransformPipelineBuilder(); + // // Import additional content from mdast or other files - importMdastFromJson(session, file, mdast); - await includeFilesTransform(session, file, mdast, frontmatter, vfile); - rawDirectiveTransform(mdast, vfile); + builder.addTransform('import-mdast-json', (tree) => importMdastFromJson(session, file, tree)); // after=START + builder.addTransform('include-files', (tree) => + includeFilesTransform(session, file, tree, frontmatter, vfile), + ); + builder.addTransform('raw-directive', (tree) => rawDirectiveTransform(tree, vfile)); // This needs to come before basic transformations since it may add labels to blocks - liftCodeMetadataToBlock(session, vfile, mdast); + builder.addTransform('lift-code-metadata', (tree) => + liftCodeMetadataToBlock(session, vfile, tree), + ); - const pipe = unified() - .use(reconstructHtmlPlugin) // We need to group and link the HTML first - .use(htmlPlugin, { htmlHandlers }) // Some of the HTML plugins need to operate on the transformed html, e.g. figure caption transforms - .use(basicTransformationsPlugin, { + builder.addTransform('reconstruct-html', reconstructHtmlTransform); // We need to group and link the HTML first + builder.addTransform('html', (tree) => htmlTransform(tree, { htmlHandlers })); // Some of the HTML plugins need to operate on the transformed html, e.g. figure caption transforms + builder.addTransform('basic', (tree) => + basicTransformations(tree, vfile, { parser: (content: string) => parseMyst(session, content, file), firstDepth: (titleDepth ?? 1) + (frontmatter.content_includes_title ? 0 : 1), - }) - .use(inlineMathSimplificationPlugin) - .use(mathPlugin, { macros: frontmatter.math }) - .use(glossaryPlugin) // This should be before the enumerate plugins - .use(abbreviationPlugin, { abbreviations: frontmatter.abbreviations }) - .use(indexIdentifierPlugin) - .use(enumerateTargetsPlugin, { state }) // This should be after math/container transforms - .use(joinGatesPlugin); + }), + ); + builder.addTransform('inline-math', (tree) => inlineMathSimplificationTransform(tree)); + builder.addTransform('math', (tree) => mathTransform(tree, vfile, { macros: frontmatter.math })); + builder.addTransform('glossary', (tree) => glossaryTransform(tree, vfile)); // This should be before the enumerate plugins + builder.addTransform('abbreviation', (tree) => + abbreviationTransform(tree, { abbreviations: frontmatter.abbreviations }), + ); + builder.addTransform('index-identifier', (tree) => indexIdentifierTransform(tree)); + builder.addTransform('enumerate-targets', (tree) => enumerateTargetsTransform(tree, { state })); // This should be after math/container transforms + builder.addTransform('join-gates', (tree) => joinGatesTransform(tree, vfile)); // Load custom transform plugins + const pipe = unified(); session.plugins?.transforms.forEach((t) => { if (t.stage !== 'document') return; pipe.use(t.plugin, undefined, pluginUtils); }); - await pipe.run(mdast, vfile); + builder.addTransform('legacy-plugins', (tree) => pipe.run(tree, vfile)); // This needs to come after basic transformations since meta tags are added there - propagateBlockDataToCode(session, vfile, mdast); + builder.addTransform('propagate-block-data', (tree) => + propagateBlockDataToCode(session, vfile, tree), + ); // Initialize citation renderers for this (non-bib) file - cache.$citationRenderers[file] = await transformLinkedDOIs( - session, - vfile, - mdast, - cache.$doiRenderers, - file, + const citationState: { fileRenderer?: ReturnType } = {}; + const registerCitations = async (tree: GenericParent) => { + cache.$citationRenderers[file] = await transformLinkedDOIs( + session, + vfile, + tree, + cache.$doiRenderers, + file, + ); + const rendererFiles = [file]; + if (projectPath) { + rendererFiles.unshift(projectPath); + } else { + const localFiles = (await bibFilesInDir(session, path.dirname(file))) || []; + rendererFiles.push(...localFiles); + } + // Combine file-specific citation renderers with project renderers from bib files + citationState.fileRenderer = combineCitationRenderers(cache, ...rendererFiles); + }; + builder.addTransform('register-citations', registerCitations); + builder.addTransform('kernel-execution', (tree) => { + if (execute) { + const cachePath = path.join(session.buildPath(), 'execute'); + kernelExecutionTransform(tree, vfile, { + basePath: session.sourcePath(), + cache: new LocalDiskCache<(IExpressionResult | IOutput[])[]>(cachePath), + sessionFactory: () => session.jupyterSessionManager(), + frontmatter: frontmatter, + ignoreCache: false, + errorIsFatal: false, + log: session.log, + }); + } + }); + builder.addTransform('render-inline-expressions', (tree) => + transformRenderInlineExpressions(tree, vfile), ); - const rendererFiles = [file]; - if (projectPath) { - rendererFiles.unshift(projectPath); - } else { - const localFiles = (await bibFilesInDir(session, path.dirname(file))) || []; - rendererFiles.push(...localFiles); - } - // Combine file-specific citation renderers with project renderers from bib files - const fileCitationRenderer = combineCitationRenderers(cache, ...rendererFiles); + builder.addTransform('cache-outputs', (tree) => + transformOutputsToCache(session, tree, kind, { minifyMaxCharacters }), + ); + builder.addTransform('filter-output', (tree) => + transformFilterOutputStreams(tree, vfile, frontmatter.settings), + ); + builder.addTransform('citations', (tree) => { + if (citationState.fileRenderer) { + transformCitations(session, file, tree, citationState.fileRenderer, references); + } + }); - if (execute) { - const cachePath = path.join(session.buildPath(), 'execute'); - await kernelExecutionTransform(mdast, vfile, { - basePath: session.sourcePath(), - cache: new LocalDiskCache<(IExpressionResult | IOutput[])[]>(cachePath), - sessionFactory: () => session.jupyterSessionManager(), - frontmatter: frontmatter, - ignoreCache: false, - errorIsFatal: false, - log: session.log, - }); - } - transformRenderInlineExpressions(mdast, vfile); - await transformOutputsToCache(session, mdast, kind, { minifyMaxCharacters }); - transformFilterOutputStreams(mdast, vfile, frontmatter.settings); - transformCitations(session, file, mdast, fileCitationRenderer, references); - await unified() - .use(codePlugin, { lang: frontmatter?.kernelspec?.language }) - .use(footnotesPlugin) // Needs to happen near the end - .run(mdast, vfile); - transformImagesToEmbed(mdast); - transformImagesWithoutExt(session, mdast, file, { imageExtensions }); + builder.addTransform('code', (tree) => + codeTransform(tree, vfile, { lang: frontmatter?.kernelspec?.language }), + ); + builder.addTransform('footnotes', (tree) => footnotesTransform(tree, vfile)); // Needs to happen near the end + builder.addTransform('images-to-embed', transformImagesToEmbed); + builder.addTransform('image-extensions', (tree) => + transformImagesWithoutExt(session, tree, file, { imageExtensions }), + ); const isJupytext = frontmatter.kernelspec || frontmatter.jupytext; - if (isJupytext) transformLiftCodeBlocksInJupytext(mdast); + if (isJupytext) { + builder.addTransform('jupytext-lift-code-blocks', transformLiftCodeBlocksInJupytext); + } + const pipeline = builder.build(); + pipeline.run(mdast); const sha256 = selectors.selectFileInfo(store.getState(), file).sha256 as string; const useSlug = pageSlug !== index; let url: string | undefined; From 0f587768a73fa3c22889590278bb5c121e8e529b Mon Sep 17 00:00:00 2001 From: Franklin Koch Date: Wed, 11 Dec 2024 23:13:17 -0500 Subject: [PATCH 02/11] In-progress work on transform pipeline --- .../src/build/utils/getFileContent.ts | 67 +++- packages/myst-cli/src/process/mdast.ts | 364 +++++++++++------- packages/myst-common/src/types.ts | 2 + 3 files changed, 275 insertions(+), 158 deletions(-) diff --git a/packages/myst-cli/src/build/utils/getFileContent.ts b/packages/myst-cli/src/build/utils/getFileContent.ts index 058b6d29d..3d46759d5 100644 --- a/packages/myst-cli/src/build/utils/getFileContent.ts +++ b/packages/myst-cli/src/build/utils/getFileContent.ts @@ -1,4 +1,4 @@ -import { resolve } from 'node:path'; +import path from 'node:path'; import { plural } from 'myst-common'; import { tic } from 'myst-cli-utils'; import type { LinkTransformer } from 'myst-transforms'; @@ -6,12 +6,32 @@ import { combineProjectCitationRenderers } from '../../process/citations.js'; import { loadFile, selectFile } from '../../process/file.js'; import { loadReferences } from '../../process/loadReferences.js'; import type { TransformFn } from '../../process/mdast.js'; -import { postProcessMdast, transformMdast } from '../../process/mdast.js'; +import { transformMdast } from '../../process/mdast.js'; import { loadProject, selectPageReferenceStates } from '../../process/site.js'; import type { ISession } from '../../session/types.js'; import { selectors } from '../../store/index.js'; import type { ImageExtensions } from '../../utils/resolveExtension.js'; +function makeSyncPoint(clients: string[]): { + promises: Promise[]; + dispatch: (client: string) => void; +} { + const promiseResolvers = new Map void>(); + const promises: Promise[] = []; + + clients.forEach((name) => { + const promise = new Promise((resolve) => { + promiseResolvers.set(name, resolve); + }); + promises.push(promise); + }); + const dispatch = (client: string) => { + const resolve = promiseResolvers.get(client)!; + resolve(); + }; + return { promises, dispatch }; +} + export async function getFileContent( session: ISession, files: string[], @@ -34,13 +54,13 @@ export async function getFileContent( }, ) { const toc = tic(); - files = files.map((file) => resolve(file)); - projectPath = projectPath ?? resolve('.'); + files = files.map((file) => path.resolve(file)); + projectPath = projectPath ?? path.resolve('.'); const { project, pages } = await loadProject(session, projectPath); const projectFiles = pages.map((page) => page.file).filter((file) => !files.includes(file)); await Promise.all([ // Load all citations (.bib) - ...project.bibliography.map((path) => loadFile(session, path, projectPath, '.bib')), + ...project.bibliography.map((bib) => loadFile(session, bib, projectPath, '.bib')), // Load all content (.md, .tex, .myst.json, or .ipynb) ...[...files, ...projectFiles].map((file, ind) => { const preFrontmatter = Array.isArray(preFrontmatters) @@ -60,11 +80,31 @@ export async function getFileContent( // Keep 'files' indices consistent in 'allFiles' as index is used for other fields. const allFiles = [...files, ...projectFiles, ...projectParts]; + const { dispatch, promises: filePromises } = makeSyncPoint(allFiles); + + // TODO: maybe move transformMdast into a multi-file function + const referenceStateContext: { + referenceStates: ReturnType; + } = { referenceStates: [] }; + Promise.all(filePromises).then(() => { + const pageReferenceStates = selectPageReferenceStates( + session, + allFiles.map((file) => { + return { file }; + }), + ); + referenceStateContext.referenceStates.push(...pageReferenceStates); + }); await Promise.all( allFiles.map(async (file, ind) => { + const referenceResolutionBlocker = async () => { + dispatch(file); + await Promise.all(filePromises); + }; const pageSlug = pages.find((page) => page.file === file)?.slug; const titleDepth = typeof titleDepths === 'number' ? titleDepths : titleDepths?.[ind]; await transformMdast(session, { + referenceResolutionBlocker, file, imageExtensions, projectPath, @@ -74,24 +114,13 @@ export async function getFileContent( titleDepth, extraTransforms, execute, - }); - }), - ); - const pageReferenceStates = selectPageReferenceStates( - session, - allFiles.map((file) => { - return { file }; - }), - ); - await Promise.all( - [...files, ...projectParts].map(async (file) => { - await postProcessMdast(session, { - file, extraLinkTransformers, - pageReferenceStates, + runPostProcess: [...files, ...projectParts].includes(file), + referenceStateContext, }); }), ); + const selectedFiles = await Promise.all( files.map(async (file) => { const selectedFile = selectFile(session, file); diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index 59aeb0160..5e98d8540 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -3,6 +3,7 @@ import { tic } from 'myst-cli-utils'; import type { GenericParent, IExpressionResult, PluginUtils, References } from 'myst-common'; import { fileError, fileWarn, RuleId, slugToUrl } from 'myst-common'; import type { PageFrontmatter } from 'myst-frontmatter'; +import type { Dependency } from 'myst-spec-ext'; import { SourceFileKind } from 'myst-spec-ext'; import type { LinkTransformer } from 'myst-transforms'; import { @@ -73,7 +74,7 @@ import { import type { ImageExtensions } from '../utils/resolveExtension.js'; import { logMessagesFromVFile } from '../utils/logging.js'; import { combineCitationRenderers } from './citations.js'; -import { bibFilesInDir, selectFile } from './file.js'; +import { bibFilesInDir } from './file.js'; import { parseMyst } from './myst.js'; import { kernelExecutionTransform, LocalDiskCache } from 'myst-execute'; import type { IOutput } from '@jupyterlab/nbformat'; @@ -108,14 +109,15 @@ function referenceFileFromPartFile(session: ISession, partFile: string) { type TransformFunction = (mdast: GenericParent) => void; -type TransformSorting = { +type TransformOptions = { after?: string; before?: string; + skip?: boolean; }; type TransformObject = { name: string; - transform: TransformFunction; -} & TransformSorting; + transform?: TransformFunction; +} & TransformOptions; class TransformPipeline { transforms: TransformFunction[]; @@ -172,16 +174,18 @@ class TransformPipelineBuilder { } }); } - console.log(namedTransforms); - const transforms = transformOrder.map((name) => namedTransforms.get(name)!.transform); + const transforms = transformOrder + .map((name) => namedTransforms.get(name)!) + .filter(({ skip, transform }) => !skip && !!transform) + .map(({ transform }) => transform) as TransformFunction[]; return new TransformPipeline(transforms); } - addTransform(name: string, transform: TransformFunction, sorting?: TransformSorting) { + addTransform(name: string, transform?: TransformFunction, options?: TransformOptions) { this.transforms.push({ name, transform, - ...sorting, + ...options, }); } } @@ -189,6 +193,7 @@ class TransformPipelineBuilder { export async function transformMdast( session: ISession, opts: { + referenceResolutionBlocker: () => void; file: string; projectPath?: string; projectSlug?: string; @@ -197,9 +202,15 @@ export async function transformMdast( watchMode?: boolean; execute?: boolean; extraTransforms?: TransformFn[]; + extraLinkTransformers?: LinkTransformer[]; minifyMaxCharacters?: number; index?: string; titleDepth?: number; + runPostProcess?: boolean; + referenceStateContext: { + referenceStates: ReferenceState[]; + }; + checkLinks?: boolean; }, ) { const { @@ -211,9 +222,13 @@ export async function transformMdast( extraTransforms, watchMode = false, minifyMaxCharacters, + extraLinkTransformers, index, titleDepth, execute, + runPostProcess, + referenceStateContext, + checkLinks, } = opts; const toc = tic(); const { store, log } = session; @@ -231,6 +246,8 @@ export async function transformMdast( log.debug(`Processing "${file}"`); const vfile = new VFile(); // Collect errors on this file vfile.path = file; + + const sha256 = selectors.selectFileInfo(store.getState(), file).sha256 as string; const mdast = structuredClone(mdastPre); const frontmatter = processPageFrontmatter( session, @@ -248,6 +265,7 @@ export async function transformMdast( }, projectPath, ); + const isJupytext = frontmatter.kernelspec || frontmatter.jupytext; const references: References = { cite: { order: [], data: {} }, }; @@ -260,7 +278,6 @@ export async function transformMdast( cache.$internalReferences[file] = state; const builder = new TransformPipelineBuilder(); - // // Import additional content from mdast or other files builder.addTransform('import-mdast-json', (tree) => importMdastFromJson(session, file, tree)); // after=START builder.addTransform('include-files', (tree) => @@ -289,13 +306,20 @@ export async function transformMdast( builder.addTransform('index-identifier', (tree) => indexIdentifierTransform(tree)); builder.addTransform('enumerate-targets', (tree) => enumerateTargetsTransform(tree, { state })); // This should be after math/container transforms builder.addTransform('join-gates', (tree) => joinGatesTransform(tree, vfile)); + // Load custom transform plugins - const pipe = unified(); session.plugins?.transforms.forEach((t) => { - if (t.stage !== 'document') return; - pipe.use(t.plugin, undefined, pluginUtils); + if (t.stage && t.stage !== 'document') return; + builder.addTransform( + t.name, + async (tree) => { + const pipe = unified(); + pipe.use(t.plugin, undefined, pluginUtils); + await pipe.run(tree, vfile); + }, + { after: t.after, before: t.before }, + ); }); - builder.addTransform('legacy-plugins', (tree) => pipe.run(tree, vfile)); // This needs to come after basic transformations since meta tags are added there builder.addTransform('propagate-block-data', (tree) => @@ -303,7 +327,9 @@ export async function transformMdast( ); // Initialize citation renderers for this (non-bib) file - const citationState: { fileRenderer?: ReturnType } = {}; + const citationState: { fileRenderer: ReturnType } = { + fileRenderer: {}, + }; const registerCitations = async (tree: GenericParent) => { cache.$citationRenderers[file] = await transformLinkedDOIs( session, @@ -323,8 +349,9 @@ export async function transformMdast( citationState.fileRenderer = combineCitationRenderers(cache, ...rendererFiles); }; builder.addTransform('register-citations', registerCitations); - builder.addTransform('kernel-execution', (tree) => { - if (execute) { + builder.addTransform( + 'kernel-execution', + (tree) => { const cachePath = path.join(session.buildPath(), 'execute'); kernelExecutionTransform(tree, vfile, { basePath: session.sourcePath(), @@ -335,8 +362,9 @@ export async function transformMdast( errorIsFatal: false, log: session.log, }); - } - }); + }, + { skip: !execute }, + ); builder.addTransform('render-inline-expressions', (tree) => transformRenderInlineExpressions(tree, vfile), ); @@ -347,9 +375,7 @@ export async function transformMdast( transformFilterOutputStreams(tree, vfile, frontmatter.settings), ); builder.addTransform('citations', (tree) => { - if (citationState.fileRenderer) { - transformCitations(session, file, tree, citationState.fileRenderer, references); - } + transformCitations(session, file, tree, citationState.fileRenderer, references); }); builder.addTransform('code', (tree) => @@ -360,13 +386,108 @@ export async function transformMdast( builder.addTransform('image-extensions', (tree) => transformImagesWithoutExt(session, tree, file, { imageExtensions }), ); - const isJupytext = frontmatter.kernelspec || frontmatter.jupytext; - if (isJupytext) { - builder.addTransform('jupytext-lift-code-blocks', transformLiftCodeBlocksInJupytext); + builder.addTransform( + 'jupytext-lift-code-blocks', + isJupytext ? transformLiftCodeBlocksInJupytext : undefined, + ); + const cachedMdast = cache.$getMdast(file); + if (cachedMdast) cachedMdast.post = data; + if (extraTransforms) { + await Promise.all( + extraTransforms.map(async (transform) => { + await transform(session, opts); + }), + ); } + const dependencies: Dependency[] = []; + const sharedStateContext: { + sharedState?: any; + externalReferences?: any; + transformers: LinkTransformer[]; + } = { transformers: [] }; + builder.addTransform('set-shared-state', () => { + sharedStateContext.sharedState = referenceStateContext.referenceStates + ? new MultiPageReferenceResolver(referenceStateContext.referenceStates, file, vfile) + : state; + sharedStateContext.externalReferences = Object.values(cache.$externalReferences); + // NOTE: This is doing things in place, we should potentially make this a different state? + sharedStateContext.transformers = [ + ...(extraLinkTransformers || []), + new WikiTransformer(), + new GithubTransformer(), + new RRIDTransformer(), + new RORTransformer(), + new DOITransformer(), // This also is picked up in the next transform + new MystTransformer(sharedStateContext.externalReferences), + new SphinxTransformer(sharedStateContext.externalReferences), + new StaticFileTransformer(session, file), // Links static files and internally linked files + ]; + }); + const transformOptions = { skip: !runPostProcess }; + builder.addTransform( + 'resolve-links-and-citations', + (tree) => + resolveLinksAndCitationsTransform(tree, { + state: sharedStateContext.sharedState, + transformers: sharedStateContext.transformers, + }), + transformOptions, + ); + builder.addTransform( + 'links', + (tree) => + linksTransform(tree, sharedStateContext.sharedState.vfile as VFile, { + transformers: sharedStateContext.transformers, + selector: LINKS_SELECTOR, + }), + transformOptions, + ); + builder.addTransform('ror', (tree) => transformLinkedRORs(session, vfile, tree, file), { + skip: !runPostProcess, + }); + builder.addTransform( + 'resolve-references', + (tree) => + resolveReferencesTransform(tree, sharedStateContext.sharedState.vfile as VFile, { + state: sharedStateContext.sharedState, + transformers: sharedStateContext.transformers, + }), + transformOptions, + ); + builder.addTransform( + 'myst-xrefs', + (tree) => transformMystXRefs(session, vfile, tree, frontmatter), + transformOptions, + ); + builder.addTransform( + 'embed', + (tree) => embedTransform(session, tree, file, dependencies, sharedStateContext.sharedState), + transformOptions, + ); + + session.plugins?.transforms.forEach((t) => { + if (t.stage && t.stage !== 'project') return; + builder.addTransform( + t.name, + async (tree) => { + const pipe = unified(); + pipe.use(t.plugin, undefined, pluginUtils); + await pipe.run(tree, vfile); + }, + { ...transformOptions, after: t.after, before: t.before }, + ); + }); + + // Ensure there are keys on every node after post processing + builder.addTransform('keys', keysTransform, transformOptions); + builder.addTransform( + 'check-link-text', + (tree) => checkLinkTextTransform(tree, sharedStateContext.externalReferences, vfile), + transformOptions, + ); const pipeline = builder.build(); - pipeline.run(mdast); - const sha256 = selectors.selectFileInfo(store.getState(), file).sha256 as string; + await pipeline.run(mdast); + const useSlug = pageSlug !== index; let url: string | undefined; let dataUrl: string | undefined; @@ -379,6 +500,7 @@ export async function transformMdast( } url = slugToUrl(url); updateFileInfoFromFrontmatter(session, file, frontmatter, url, dataUrl); + const data: RendererData = { kind: isJupytext ? SourceFileKind.Notebook : kind, file, @@ -391,80 +513,8 @@ export async function transformMdast( references, widgets, } as any; - const cachedMdast = cache.$getMdast(file); - if (cachedMdast) cachedMdast.post = data; - if (extraTransforms) { - await Promise.all( - extraTransforms.map(async (transform) => { - await transform(session, opts); - }), - ); - } logMessagesFromVFile(session, vfile); if (!watchMode) log.info(toc(`📖 Built ${file} in %s.`)); -} - -export async function postProcessMdast( - session: ISession, - { - file, - checkLinks, - pageReferenceStates, - extraLinkTransformers, - }: { - file: string; - checkLinks?: boolean; - pageReferenceStates?: ReferenceState[]; - extraLinkTransformers?: LinkTransformer[]; - }, -) { - const toc = tic(); - const { log } = session; - const cache = castSession(session); - const mdastPost = selectFile(session, file); - if (!mdastPost) return; - const vfile = new VFile(); // Collect errors on this file - vfile.path = file; - const { mdast, dependencies, frontmatter } = mdastPost; - const fileState = cache.$internalReferences[file]; - const state = pageReferenceStates - ? new MultiPageReferenceResolver(pageReferenceStates, file, vfile) - : fileState; - const externalReferences = Object.values(cache.$externalReferences); - // NOTE: This is doing things in place, we should potentially make this a different state? - const transformers = [ - ...(extraLinkTransformers || []), - new WikiTransformer(), - new GithubTransformer(), - new RRIDTransformer(), - new RORTransformer(), - new DOITransformer(), // This also is picked up in the next transform - new MystTransformer(externalReferences), - new SphinxTransformer(externalReferences), - new StaticFileTransformer(session, file), // Links static files and internally linked files - ]; - resolveLinksAndCitationsTransform(mdast, { state, transformers }); - linksTransform(mdast, state.vfile as VFile, { - transformers, - selector: LINKS_SELECTOR, - }); - await transformLinkedRORs(session, vfile, mdast, file); - resolveReferencesTransform(mdast, state.vfile as VFile, { state, transformers }); - await transformMystXRefs(session, vfile, mdast, frontmatter); - await embedTransform(session, mdast, file, dependencies, state); - const pipe = unified(); - session.plugins?.transforms.forEach((t) => { - if (t.stage !== 'project') return; - pipe.use(t.plugin, undefined, pluginUtils); - }); - await pipe.run(mdast, vfile); - - // Ensure there are keys on every node after post processing - keysTransform(mdast); - checkLinkTextTransform(mdast, externalReferences, vfile); - logMessagesFromVFile(session, fileState.vfile); - logMessagesFromVFile(session, vfile); - log.debug(toc(`Transformed mdast cross references and links for "${file}" in %s`)); if (checkLinks) await checkLinksTransform(session, file, mdast); } @@ -495,48 +545,84 @@ export async function finalizeMdast( ) { const vfile = new VFile(); // Collect errors on this file vfile.path = file; - if (simplifyFigures) { - // Transform output nodes to images / text - reduceOutputs(session, mdast, file, imageWriteFolder, { + const builder = new TransformPipelineBuilder(); + builder.addTransform( + 'reduce-outputs', + simplifyFigures + ? (tree) => { + reduceOutputs(session, tree, file, imageWriteFolder, { + altOutputFolder: simplifyFigures ? undefined : imageAltOutputFolder, + }); + } + : undefined, + ); + // Transform output nodes to images / text + builder.addTransform('write-outputs', (tree) => + transformOutputsToFile(session, tree, imageWriteFolder, { altOutputFolder: simplifyFigures ? undefined : imageAltOutputFolder, - }); - } - transformOutputsToFile(session, mdast, imageWriteFolder, { - altOutputFolder: simplifyFigures ? undefined : imageAltOutputFolder, - vfile, - }); - if (!useExistingImages) { - await transformImagesToDisk(session, mdast, file, imageWriteFolder, { - altOutputFolder: imageAltOutputFolder, - imageExtensions, - }); - // Must happen after transformImages - await transformImageFormats(session, mdast, file, imageWriteFolder, { - altOutputFolder: imageAltOutputFolder, - imageExtensions, - }); - if (optimizeWebp) { - await transformWebp(session, { file, imageWriteFolder, maxSizeWebp }); - } - if (processThumbnail) { - // Note, the thumbnail transform must be **after** images, as it may read the images - await transformThumbnail(session, mdast, file, frontmatter, imageWriteFolder, { - altOutputFolder: imageAltOutputFolder, - webp: optimizeWebp, - maxSizeWebp, - }); - await transformBanner(session, file, frontmatter, imageWriteFolder, { - altOutputFolder: imageAltOutputFolder, - webp: optimizeWebp, - maxSizeWebp, - }); - } - } - await transformDeleteBase64UrlSource(mdast); - if (simplifyFigures) { - // This must happen after embedded content is resolved so all children are present on figures - transformPlaceholderImages(mdast, { imageExtensions }); - } + vfile, + }), + ); + builder.addTransform( + '', + !useExistingImages + ? (tree) => + transformImagesToDisk(session, tree, file, imageWriteFolder, { + altOutputFolder: imageAltOutputFolder, + imageExtensions, + }) + : undefined, + ); + // Must happen after transformImages + builder.addTransform( + '', + !useExistingImages + ? (tree) => + transformImageFormats(session, tree, file, imageWriteFolder, { + altOutputFolder: imageAltOutputFolder, + imageExtensions, + }) + : undefined, + ); + builder.addTransform( + '', + !useExistingImages && optimizeWebp + ? () => transformWebp(session, { file, imageWriteFolder, maxSizeWebp }) + : undefined, + ); + + // Note, the thumbnail transform must be **after** images, as it may read the images + builder.addTransform( + '', + !useExistingImages && processThumbnail + ? (tree) => + transformThumbnail(session, tree, file, frontmatter, imageWriteFolder, { + altOutputFolder: imageAltOutputFolder, + webp: optimizeWebp, + maxSizeWebp, + }) + : undefined, + ); + builder.addTransform( + '', + !useExistingImages && processThumbnail + ? () => + transformBanner(session, file, frontmatter, imageWriteFolder, { + altOutputFolder: imageAltOutputFolder, + webp: optimizeWebp, + maxSizeWebp, + }) + : undefined, + ); + + builder.addTransform('delete-base64', transformDeleteBase64UrlSource); + // This must happen after embedded content is resolved so all children are present on figures + builder.addTransform( + 'placeholder-images', + simplifyFigures ? (tree) => transformPlaceholderImages(tree, { imageExtensions }) : undefined, + ); + const pipeline = builder.build(); + await pipeline.run(mdast); const cache = castSession(session); const postData = cache.$getMdast(file)?.post; if (postData) { diff --git a/packages/myst-common/src/types.ts b/packages/myst-common/src/types.ts index 0231d1927..fc9384d2f 100644 --- a/packages/myst-common/src/types.ts +++ b/packages/myst-common/src/types.ts @@ -119,6 +119,8 @@ export type TransformSpec = { name: string; doc?: string; stage: 'document' | 'project'; + before?: string; + after?: string; // context?: 'tex' | 'docx' | 'jats' | 'typst' | 'site'; plugin: Plugin< [PluginOptions | undefined, PluginUtils], From 0c1170b2a3fa29ffbeeea2953e0ec89f23667c2d Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 12 Dec 2024 14:29:16 +0000 Subject: [PATCH 03/11] refactor: use transformOptions --- packages/myst-cli/src/process/mdast.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index 5e98d8540..d0ae2e717 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -442,9 +442,11 @@ export async function transformMdast( }), transformOptions, ); - builder.addTransform('ror', (tree) => transformLinkedRORs(session, vfile, tree, file), { - skip: !runPostProcess, - }); + builder.addTransform( + 'ror', + (tree) => transformLinkedRORs(session, vfile, tree, file), + transformOptions, + ); builder.addTransform( 'resolve-references', (tree) => From 05b5bdaaab2dcd03677988b5ca58cb15f68f13af Mon Sep 17 00:00:00 2001 From: Franklin Koch Date: Thu, 12 Dec 2024 10:34:50 -0500 Subject: [PATCH 04/11] wip --- .../src/build/utils/getFileContent.ts | 42 ++++++--- packages/myst-cli/src/process/mdast.ts | 84 +++++++++--------- packages/myst-cli/src/process/site.ts | 87 ++++++++++++------- 3 files changed, 130 insertions(+), 83 deletions(-) diff --git a/packages/myst-cli/src/build/utils/getFileContent.ts b/packages/myst-cli/src/build/utils/getFileContent.ts index 3d46759d5..e364c26fc 100644 --- a/packages/myst-cli/src/build/utils/getFileContent.ts +++ b/packages/myst-cli/src/build/utils/getFileContent.ts @@ -80,31 +80,53 @@ export async function getFileContent( // Keep 'files' indices consistent in 'allFiles' as index is used for other fields. const allFiles = [...files, ...projectFiles, ...projectParts]; - const { dispatch, promises: filePromises } = makeSyncPoint(allFiles); + const { dispatchReferencing, promises: referencingPromises } = makeSyncPoint(allFiles); + const { dispatchIndexing, promises: indexingPromises } = makeSyncPoint(allFiles); // TODO: maybe move transformMdast into a multi-file function const referenceStateContext: { referenceStates: ReturnType; } = { referenceStates: [] }; - Promise.all(filePromises).then(() => { - const pageReferenceStates = selectPageReferenceStates( - session, - allFiles.map((file) => { - return { file }; - }), - ); + const referencingPages = allFiles.map((file) => { + return { file }; + }); + Promise.all(referencingPromises).then(() => { + const pageReferenceStates = selectPageReferenceStates(session, referencingPages); referenceStateContext.referenceStates.push(...pageReferenceStates); }); + Promise.all(indexingPromises).then(() => { + const cache = castSession(session); + referencingPages.forEach((page) => { + const fileState = cache.$internalReferences[page.file]; + if (!fileState) return; + const { mdast } = cache.$getMdast(page.file)?.post ?? {}; + if (!mdast) return; + const vfile = new VFile(); + vfile.path = page.file; + buildIndexTransform( + mdast, + vfile, + fileState, + new MultiPageReferenceResolver(referenceStateContext.referenceStates, fileState.filePath), + ); + logMessagesFromVFile(session, vfile); + }); + }); await Promise.all( allFiles.map(async (file, ind) => { const referenceResolutionBlocker = async () => { - dispatch(file); - await Promise.all(filePromises); + dispatchReferencing(file); + await Promise.all(referencingPromises); + }; + const indexGenerationBlocker = async () => { + dispatchIndexing(file); + await Promise.all(indexingPromises); }; const pageSlug = pages.find((page) => page.file === file)?.slug; const titleDepth = typeof titleDepths === 'number' ? titleDepths : titleDepths?.[ind]; await transformMdast(session, { referenceResolutionBlocker, + indexGenerationBlocker, file, imageExtensions, projectPath, diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index d0ae2e717..0708f90e6 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -390,16 +390,45 @@ export async function transformMdast( 'jupytext-lift-code-blocks', isJupytext ? transformLiftCodeBlocksInJupytext : undefined, ); - const cachedMdast = cache.$getMdast(file); - if (cachedMdast) cachedMdast.post = data; - if (extraTransforms) { - await Promise.all( - extraTransforms.map(async (transform) => { - await transform(session, opts); - }), - ); - } const dependencies: Dependency[] = []; + builder.addTransform('write-post-mdast', async (tree) => { + // This writes the frontmatter to the file, so its position is important + // We might need to rethink its location + const useSlug = pageSlug !== index; + let url: string | undefined; + let dataUrl: string | undefined; + if (pageSlug && projectSlug) { + url = `/${projectSlug}/${useSlug ? pageSlug : ''}`; + dataUrl = `/${projectSlug}/${pageSlug}.json`; + } else if (pageSlug) { + url = `/${useSlug ? pageSlug : ''}`; + dataUrl = `/${pageSlug}.json`; + } + url = slugToUrl(url); + updateFileInfoFromFrontmatter(session, file, frontmatter, url, dataUrl); + + const data: RendererData = { + kind: isJupytext ? SourceFileKind.Notebook : kind, + file, + location, + sha256, + slug: pageSlug, + dependencies, + frontmatter, + tree, + references, + widgets, + } as any; + const cachedMdast = cache.$getMdast(file); + if (cachedMdast) cachedMdast.post = data; + if (extraTransforms) { + await Promise.all( + extraTransforms.map(async (transform) => { + await transform(session, opts); + }), + ); + } + }); const sharedStateContext: { sharedState?: any; externalReferences?: any; @@ -461,12 +490,6 @@ export async function transformMdast( (tree) => transformMystXRefs(session, vfile, tree, frontmatter), transformOptions, ); - builder.addTransform( - 'embed', - (tree) => embedTransform(session, tree, file, dependencies, sharedStateContext.sharedState), - transformOptions, - ); - session.plugins?.transforms.forEach((t) => { if (t.stage && t.stage !== 'project') return; builder.addTransform( @@ -479,6 +502,11 @@ export async function transformMdast( { ...transformOptions, after: t.after, before: t.before }, ); }); + builder.addTransform( + 'embed', + (tree) => embedTransform(session, tree, file, dependencies, sharedStateContext.sharedState), + transformOptions, + ); // Ensure there are keys on every node after post processing builder.addTransform('keys', keysTransform, transformOptions); @@ -489,32 +517,6 @@ export async function transformMdast( ); const pipeline = builder.build(); await pipeline.run(mdast); - - const useSlug = pageSlug !== index; - let url: string | undefined; - let dataUrl: string | undefined; - if (pageSlug && projectSlug) { - url = `/${projectSlug}/${useSlug ? pageSlug : ''}`; - dataUrl = `/${projectSlug}/${pageSlug}.json`; - } else if (pageSlug) { - url = `/${useSlug ? pageSlug : ''}`; - dataUrl = `/${pageSlug}.json`; - } - url = slugToUrl(url); - updateFileInfoFromFrontmatter(session, file, frontmatter, url, dataUrl); - - const data: RendererData = { - kind: isJupytext ? SourceFileKind.Notebook : kind, - file, - location, - sha256, - slug: pageSlug, - dependencies: [], - frontmatter, - mdast, - references, - widgets, - } as any; logMessagesFromVFile(session, vfile); if (!watchMode) log.info(toc(`📖 Built ${file} in %s.`)); if (checkLinks) await checkLinksTransform(session, file, mdast); diff --git a/packages/myst-cli/src/process/site.ts b/packages/myst-cli/src/process/site.ts index adac56779..bc82ac57d 100644 --- a/packages/myst-cli/src/process/site.ts +++ b/packages/myst-cli/src/process/site.ts @@ -24,6 +24,7 @@ import { resolvePageExports, } from '../build/site/manifest.js'; import { writeRemoteDOIBibtex } from '../build/utils/bibtex.js'; +import { makeSyncPoint } from '../build/utils/getFileContent.js'; import { MYST_DOI_BIB_FILE } from '../cli/options.js'; import { filterPages, loadProjectFromDisk } from '../project/load.js'; import { DEFAULT_INDEX_FILENAMES } from '../project/fromTOC.js'; @@ -42,7 +43,7 @@ import { combineProjectCitationRenderers } from './citations.js'; import { loadFile, selectFile } from './file.js'; import { loadReferences } from './loadReferences.js'; import type { TransformFn } from './mdast.js'; -import { finalizeMdast, postProcessMdast, transformMdast } from './mdast.js'; +import { finalizeMdast, transformMdast } from './mdast.js'; import { toSectionedParts, buildHierarchy, sectionToHeadingLevel } from './search.js'; const WEB_IMAGE_EXTENSIONS = [ @@ -339,21 +340,7 @@ export function selectPageReferenceStates( }) .filter((state): state is ReferenceState => !!state); if (!opts?.suppressWarnings) warnOnDuplicateIdentifiers(session, pageReferenceStates); - pages.forEach((page) => { - const state = cache.$internalReferences[page.file]; - if (!state) return; - const { mdast } = cache.$getMdast(page.file)?.post ?? {}; - if (!mdast) return; - const vfile = new VFile(); - vfile.path = page.file; - buildIndexTransform( - mdast, - vfile, - state, - new MultiPageReferenceResolver(pageReferenceStates, state.filePath), - ); - logMessagesFromVFile(session, vfile); - }); + return pageReferenceStates; } @@ -434,9 +421,56 @@ export async function fastProcessFile( const state = session.store.getState(); const fileParts = selectors.selectFileParts(state, file); const projectParts = selectors.selectProjectParts(state, projectPath); + + const allFiles = [file, ...fileParts]; + const { dispatchReferencing, promises: referencingPromises } = makeSyncPoint(allFiles); + const { dispatchIndexing, promises: indexingPromises } = makeSyncPoint(allFiles); + + // TODO: maybe move transformMdast into a multi-file function + const referenceStateContext: { + referenceStates: ReturnType; + } = { referenceStates: [] }; + const referencingPages = [ + ...pages, + ...projectParts.map((part) => { + return { file: part }; + }), + ]; + Promise.all(referencingPromises).then(() => { + const pageReferenceStates = selectPageReferenceStates(session, referencingPages); + referenceStateContext.referenceStates.push(...pageReferenceStates); + }); + Promise.all(indexingPromises).then(() => { + const cache = castSession(session); + referencingPages.forEach((page) => { + const fileState = cache.$internalReferences[page.file]; + if (!fileState) return; + const { mdast } = cache.$getMdast(page.file)?.post ?? {}; + if (!mdast) return; + const vfile = new VFile(); + vfile.path = page.file; + buildIndexTransform( + mdast, + vfile, + fileState, + new MultiPageReferenceResolver(referenceStateContext.referenceStates, fileState.filePath), + ); + logMessagesFromVFile(session, vfile); + }); + }); await Promise.all( - [file, ...fileParts].map(async (f) => { + allFiles.map(async (f) => { + const referenceResolutionBlocker = async () => { + dispatchReferencing(file); + await Promise.all(referencingPromises); + }; + const indexGenerationBlocker = async () => { + dispatchIndexing(file); + await Promise.all(indexingPromises); + }; return transformMdast(session, { + referenceResolutionBlocker, + indexGenerationBlocker, file: f, imageExtensions: imageExtensions ?? WEB_IMAGE_EXTENSIONS, projectPath, @@ -446,26 +480,15 @@ export async function fastProcessFile( extraTransforms, index: project.index, execute, - }); - }), - ); - const pageReferenceStates = selectPageReferenceStates(session, [ - ...pages, - ...projectParts.map((part) => { - return { file: part }; - }), - ]); - await Promise.all( - [file, ...fileParts].map(async (f) => { - return postProcessMdast(session, { - file: f, - pageReferenceStates, extraLinkTransformers, + runPostProcess: true, + referenceStateContext, }); }), ); + await Promise.all( - [file, ...fileParts].map(async (f) => { + allFiles.map(async (f) => { const { mdast, frontmatter } = castSession(session).$getMdast(f)?.post ?? {}; if (mdast) { await finalizeMdast(session, mdast, frontmatter ?? {}, f, { From 22a76fa6be25893d321f94d34886b8c5235bedbf Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 12 Dec 2024 16:07:49 +0000 Subject: [PATCH 05/11] wip: more fixes --- .../src/build/utils/getFileContent.ts | 10 ++- packages/myst-cli/src/process/mdast.ts | 10 +++ packages/myst-cli/src/process/site.ts | 77 ++++++++++++++----- 3 files changed, 75 insertions(+), 22 deletions(-) diff --git a/packages/myst-cli/src/build/utils/getFileContent.ts b/packages/myst-cli/src/build/utils/getFileContent.ts index e364c26fc..5531f1814 100644 --- a/packages/myst-cli/src/build/utils/getFileContent.ts +++ b/packages/myst-cli/src/build/utils/getFileContent.ts @@ -8,11 +8,15 @@ import { loadReferences } from '../../process/loadReferences.js'; import type { TransformFn } from '../../process/mdast.js'; import { transformMdast } from '../../process/mdast.js'; import { loadProject, selectPageReferenceStates } from '../../process/site.js'; +import { buildIndexTransform, MultiPageReferenceResolver } from 'myst-transforms'; import type { ISession } from '../../session/types.js'; import { selectors } from '../../store/index.js'; import type { ImageExtensions } from '../../utils/resolveExtension.js'; +import { castSession } from '../../session/cache.js'; +import { VFile } from 'vfile'; +import { logMessagesFromVFile } from '../../utils/logging.js'; -function makeSyncPoint(clients: string[]): { +export function makeSyncPoint(clients: string[]): { promises: Promise[]; dispatch: (client: string) => void; } { @@ -80,8 +84,8 @@ export async function getFileContent( // Keep 'files' indices consistent in 'allFiles' as index is used for other fields. const allFiles = [...files, ...projectFiles, ...projectParts]; - const { dispatchReferencing, promises: referencingPromises } = makeSyncPoint(allFiles); - const { dispatchIndexing, promises: indexingPromises } = makeSyncPoint(allFiles); + const { dispatch: dispatchReferencing, promises: referencingPromises } = makeSyncPoint(allFiles); + const { dispatch: dispatchIndexing, promises: indexingPromises } = makeSyncPoint(allFiles); // TODO: maybe move transformMdast into a multi-file function const referenceStateContext: { diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index 0708f90e6..9b7b72489 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -194,6 +194,7 @@ export async function transformMdast( session: ISession, opts: { referenceResolutionBlocker: () => void; + indexGenerationBlocker: () => void; file: string; projectPath?: string; projectSlug?: string; @@ -228,6 +229,8 @@ export async function transformMdast( execute, runPostProcess, referenceStateContext, + referenceResolutionBlocker, + indexGenerationBlocker, checkLinks, } = opts; const toc = tic(); @@ -429,6 +432,11 @@ export async function transformMdast( ); } }); + + // Blocking cross-project resolution + builder.addTransform('reference-resolution', referenceResolutionBlocker); + builder.addTransform('index-generation', indexGenerationBlocker); + const sharedStateContext: { sharedState?: any; externalReferences?: any; @@ -518,6 +526,8 @@ export async function transformMdast( const pipeline = builder.build(); await pipeline.run(mdast); logMessagesFromVFile(session, vfile); + + console.log(JSON.stringify(mdast, null, 2)); if (!watchMode) log.info(toc(`📖 Built ${file} in %s.`)); if (checkLinks) await checkLinksTransform(session, file, mdast); } diff --git a/packages/myst-cli/src/process/site.ts b/packages/myst-cli/src/process/site.ts index bc82ac57d..78acacff6 100644 --- a/packages/myst-cli/src/process/site.ts +++ b/packages/myst-cli/src/process/site.ts @@ -423,8 +423,8 @@ export async function fastProcessFile( const projectParts = selectors.selectProjectParts(state, projectPath); const allFiles = [file, ...fileParts]; - const { dispatchReferencing, promises: referencingPromises } = makeSyncPoint(allFiles); - const { dispatchIndexing, promises: indexingPromises } = makeSyncPoint(allFiles); + const { dispatch: dispatchReferencing, promises: referencingPromises } = makeSyncPoint(allFiles); + const { dispatch: dispatchIndexing, promises: indexingPromises } = makeSyncPoint(allFiles); // TODO: maybe move transformMdast into a multi-file function const referenceStateContext: { @@ -562,10 +562,54 @@ export async function processProject( }); const pagesToTransform: { file: string; slug?: string }[] = [...pages, ...projectParts]; const usedImageExtensions = imageExtensions ?? WEB_IMAGE_EXTENSIONS; - // Transform all pages + + //// + const { dispatch: dispatchReferencing, promises: referencingPromises } = makeSyncPoint( + pagesToTransform.map((f) => f.file), + ); + const { dispatch: dispatchIndexing, promises: indexingPromises } = makeSyncPoint( + pagesToTransform.map((f) => f.file), + ); + + // TODO: maybe move transformMdast into a multi-file function + const referenceStateContext: { + referenceStates: ReturnType; + } = { referenceStates: [] }; + Promise.all(referencingPromises).then(() => { + const pageReferenceStates = selectPageReferenceStates(session, pagesToTransform); + referenceStateContext.referenceStates.push(...pageReferenceStates); + }); + Promise.all(indexingPromises).then(() => { + const cache = castSession(session); + pagesToTransform.forEach((page) => { + const fileState = cache.$internalReferences[page.file]; + if (!fileState) return; + const { mdast } = cache.$getMdast(page.file)?.post ?? {}; + if (!mdast) return; + const vfile = new VFile(); + vfile.path = page.file; + buildIndexTransform( + mdast, + vfile, + fileState, + new MultiPageReferenceResolver(referenceStateContext.referenceStates, fileState.filePath), + ); + logMessagesFromVFile(session, vfile); + }); + }); await Promise.all( - pagesToTransform.map((page) => - transformMdast(session, { + pagesToTransform.map(async (page) => { + const referenceResolutionBlocker = async () => { + dispatchReferencing(page.file); + await Promise.all(referencingPromises); + }; + const indexGenerationBlocker = async () => { + dispatchIndexing(page.file); + await Promise.all(indexingPromises); + }; + await transformMdast(session, { + referenceResolutionBlocker, + indexGenerationBlocker, file: page.file, projectPath: project.path, projectSlug: siteProject.slug, @@ -574,22 +618,17 @@ export async function processProject( watchMode, execute, extraTransforms, - index: project.index, - }), - ), - ); - const pageReferenceStates = selectPageReferenceStates(session, pagesToTransform); - // Handle all cross references - await Promise.all( - pagesToTransform.map((page) => - postProcessMdast(session, { - file: page.file, - checkLinks: checkLinks || strict, - pageReferenceStates, extraLinkTransformers, - }), - ), + checkLinks: checkLinks || strict, + index: project.index, + runPostProcess: true, + referenceStateContext, + }); + }), ); + + /////// + // Write all pages if (writeFiles) { await Promise.all( From 945586f752a9d06da6dccb63002564bea980ce90 Mon Sep 17 00:00:00 2001 From: Franklin Koch Date: Thu, 12 Dec 2024 11:43:33 -0500 Subject: [PATCH 06/11] More pipeline stuff --- packages/myst-cli/src/process/mdast.ts | 20 +++++++++++--------- packages/myst-cli/src/process/site.ts | 4 ++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index 9b7b72489..4de45abcf 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -182,6 +182,9 @@ class TransformPipelineBuilder { } addTransform(name: string, transform?: TransformFunction, options?: TransformOptions) { + if (this.transforms.map((t) => t.name).includes(name)) { + throw new Error(`Duplicate transforms with name "${name}"`); + } this.transforms.push({ name, transform, @@ -312,7 +315,7 @@ export async function transformMdast( // Load custom transform plugins session.plugins?.transforms.forEach((t) => { - if (t.stage && t.stage !== 'document') return; + if (t.stage !== 'document') return; builder.addTransform( t.name, async (tree) => { @@ -418,7 +421,7 @@ export async function transformMdast( slug: pageSlug, dependencies, frontmatter, - tree, + mdast: tree, references, widgets, } as any; @@ -499,7 +502,7 @@ export async function transformMdast( transformOptions, ); session.plugins?.transforms.forEach((t) => { - if (t.stage && t.stage !== 'project') return; + if (t.stage === 'document') return; builder.addTransform( t.name, async (tree) => { @@ -527,7 +530,6 @@ export async function transformMdast( await pipeline.run(mdast); logMessagesFromVFile(session, vfile); - console.log(JSON.stringify(mdast, null, 2)); if (!watchMode) log.info(toc(`📖 Built ${file} in %s.`)); if (checkLinks) await checkLinksTransform(session, file, mdast); } @@ -578,7 +580,7 @@ export async function finalizeMdast( }), ); builder.addTransform( - '', + 'write-images', !useExistingImages ? (tree) => transformImagesToDisk(session, tree, file, imageWriteFolder, { @@ -589,7 +591,7 @@ export async function finalizeMdast( ); // Must happen after transformImages builder.addTransform( - '', + 'image-formats', !useExistingImages ? (tree) => transformImageFormats(session, tree, file, imageWriteFolder, { @@ -599,7 +601,7 @@ export async function finalizeMdast( : undefined, ); builder.addTransform( - '', + 'webp', !useExistingImages && optimizeWebp ? () => transformWebp(session, { file, imageWriteFolder, maxSizeWebp }) : undefined, @@ -607,7 +609,7 @@ export async function finalizeMdast( // Note, the thumbnail transform must be **after** images, as it may read the images builder.addTransform( - '', + 'thumbnails', !useExistingImages && processThumbnail ? (tree) => transformThumbnail(session, tree, file, frontmatter, imageWriteFolder, { @@ -618,7 +620,7 @@ export async function finalizeMdast( : undefined, ); builder.addTransform( - '', + 'banner', !useExistingImages && processThumbnail ? () => transformBanner(session, file, frontmatter, imageWriteFolder, { diff --git a/packages/myst-cli/src/process/site.ts b/packages/myst-cli/src/process/site.ts index 78acacff6..d630439be 100644 --- a/packages/myst-cli/src/process/site.ts +++ b/packages/myst-cli/src/process/site.ts @@ -459,7 +459,7 @@ export async function fastProcessFile( }); }); await Promise.all( - allFiles.map(async (f) => { + allFiles.map((f) => { const referenceResolutionBlocker = async () => { dispatchReferencing(file); await Promise.all(referencingPromises); @@ -647,7 +647,7 @@ export async function processProject( }), ); await Promise.all( - pages.map(async (page) => { + pages.map((page) => { return writeFile(session, { file: page.file, projectSlug: siteProject.slug as string, From 016b414980d23383df3bd61f81916a10590b06d6 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Fri, 13 Dec 2024 16:56:36 +0000 Subject: [PATCH 07/11] refactor: rename sync primitive --- .../src/build/utils/getFileContent.ts | 51 ++++++++----------- packages/myst-cli/src/process/site.ts | 50 ++++++------------ 2 files changed, 37 insertions(+), 64 deletions(-) diff --git a/packages/myst-cli/src/build/utils/getFileContent.ts b/packages/myst-cli/src/build/utils/getFileContent.ts index 5531f1814..235b87c6b 100644 --- a/packages/myst-cli/src/build/utils/getFileContent.ts +++ b/packages/myst-cli/src/build/utils/getFileContent.ts @@ -16,24 +16,25 @@ import { castSession } from '../../session/cache.js'; import { VFile } from 'vfile'; import { logMessagesFromVFile } from '../../utils/logging.js'; -export function makeSyncPoint(clients: string[]): { - promises: Promise[]; - dispatch: (client: string) => void; +export function makeBarrier(nClients: number): { + promise: Promise; + wait: () => Promise; } { - const promiseResolvers = new Map void>(); - const promises: Promise[] = []; - - clients.forEach((name) => { - const promise = new Promise((resolve) => { - promiseResolvers.set(name, resolve); - }); - promises.push(promise); + const ctx: { resolve?: () => void | undefined } = {}; + const promise = new Promise((resolve) => { + ctx.resolve = resolve; }); - const dispatch = (client: string) => { - const resolve = promiseResolvers.get(client)!; - resolve(); + + let nWaiting = nClients; + const wait = async () => { + nWaiting--; + if (!nWaiting) { + ctx.resolve!(); + } + await promise; + return nWaiting; }; - return { promises, dispatch }; + return { promise, wait }; } export async function getFileContent( @@ -84,8 +85,8 @@ export async function getFileContent( // Keep 'files' indices consistent in 'allFiles' as index is used for other fields. const allFiles = [...files, ...projectFiles, ...projectParts]; - const { dispatch: dispatchReferencing, promises: referencingPromises } = makeSyncPoint(allFiles); - const { dispatch: dispatchIndexing, promises: indexingPromises } = makeSyncPoint(allFiles); + const { wait: waitReferencing, promise: referencingPromise } = makeBarrier(allFiles.length); + const { wait: waitIndexing, promise: indexingPromise } = makeBarrier(allFiles.length); // TODO: maybe move transformMdast into a multi-file function const referenceStateContext: { @@ -94,11 +95,11 @@ export async function getFileContent( const referencingPages = allFiles.map((file) => { return { file }; }); - Promise.all(referencingPromises).then(() => { + referencingPromise.then(() => { const pageReferenceStates = selectPageReferenceStates(session, referencingPages); referenceStateContext.referenceStates.push(...pageReferenceStates); }); - Promise.all(indexingPromises).then(() => { + indexingPromise.then(() => { const cache = castSession(session); referencingPages.forEach((page) => { const fileState = cache.$internalReferences[page.file]; @@ -118,19 +119,11 @@ export async function getFileContent( }); await Promise.all( allFiles.map(async (file, ind) => { - const referenceResolutionBlocker = async () => { - dispatchReferencing(file); - await Promise.all(referencingPromises); - }; - const indexGenerationBlocker = async () => { - dispatchIndexing(file); - await Promise.all(indexingPromises); - }; const pageSlug = pages.find((page) => page.file === file)?.slug; const titleDepth = typeof titleDepths === 'number' ? titleDepths : titleDepths?.[ind]; await transformMdast(session, { - referenceResolutionBlocker, - indexGenerationBlocker, + referenceResolutionBlocker: waitReferencing, + indexGenerationBlocker: waitIndexing, file, imageExtensions, projectPath, diff --git a/packages/myst-cli/src/process/site.ts b/packages/myst-cli/src/process/site.ts index d630439be..dc41c1242 100644 --- a/packages/myst-cli/src/process/site.ts +++ b/packages/myst-cli/src/process/site.ts @@ -24,7 +24,7 @@ import { resolvePageExports, } from '../build/site/manifest.js'; import { writeRemoteDOIBibtex } from '../build/utils/bibtex.js'; -import { makeSyncPoint } from '../build/utils/getFileContent.js'; +import { makeBarrier } from '../build/utils/getFileContent.js'; import { MYST_DOI_BIB_FILE } from '../cli/options.js'; import { filterPages, loadProjectFromDisk } from '../project/load.js'; import { DEFAULT_INDEX_FILENAMES } from '../project/fromTOC.js'; @@ -423,8 +423,8 @@ export async function fastProcessFile( const projectParts = selectors.selectProjectParts(state, projectPath); const allFiles = [file, ...fileParts]; - const { dispatch: dispatchReferencing, promises: referencingPromises } = makeSyncPoint(allFiles); - const { dispatch: dispatchIndexing, promises: indexingPromises } = makeSyncPoint(allFiles); + const { wait: waitReferencing, promise: referencingPromise } = makeBarrier(allFiles.length); + const { wait: waitIndexing, promise: indexingPromise } = makeBarrier(allFiles.length); // TODO: maybe move transformMdast into a multi-file function const referenceStateContext: { @@ -436,11 +436,11 @@ export async function fastProcessFile( return { file: part }; }), ]; - Promise.all(referencingPromises).then(() => { + referencingPromise.then(() => { const pageReferenceStates = selectPageReferenceStates(session, referencingPages); referenceStateContext.referenceStates.push(...pageReferenceStates); }); - Promise.all(indexingPromises).then(() => { + indexingPromise.then(() => { const cache = castSession(session); referencingPages.forEach((page) => { const fileState = cache.$internalReferences[page.file]; @@ -459,18 +459,10 @@ export async function fastProcessFile( }); }); await Promise.all( - allFiles.map((f) => { - const referenceResolutionBlocker = async () => { - dispatchReferencing(file); - await Promise.all(referencingPromises); - }; - const indexGenerationBlocker = async () => { - dispatchIndexing(file); - await Promise.all(indexingPromises); - }; + allFiles.map(async (f) => { return transformMdast(session, { - referenceResolutionBlocker, - indexGenerationBlocker, + referenceResolutionBlocker: waitReferencing, + indexGenerationBlocker: waitIndexing, file: f, imageExtensions: imageExtensions ?? WEB_IMAGE_EXTENSIONS, projectPath, @@ -563,23 +555,19 @@ export async function processProject( const pagesToTransform: { file: string; slug?: string }[] = [...pages, ...projectParts]; const usedImageExtensions = imageExtensions ?? WEB_IMAGE_EXTENSIONS; - //// - const { dispatch: dispatchReferencing, promises: referencingPromises } = makeSyncPoint( - pagesToTransform.map((f) => f.file), - ); - const { dispatch: dispatchIndexing, promises: indexingPromises } = makeSyncPoint( - pagesToTransform.map((f) => f.file), + const { wait: waitReferencing, promise: referencingPromise } = makeBarrier( + pagesToTransform.length, ); + const { wait: waitIndexing, promise: indexingPromise } = makeBarrier(pagesToTransform.length); - // TODO: maybe move transformMdast into a multi-file function const referenceStateContext: { referenceStates: ReturnType; } = { referenceStates: [] }; - Promise.all(referencingPromises).then(() => { + referencingPromise.then(() => { const pageReferenceStates = selectPageReferenceStates(session, pagesToTransform); referenceStateContext.referenceStates.push(...pageReferenceStates); }); - Promise.all(indexingPromises).then(() => { + indexingPromise.then(() => { const cache = castSession(session); pagesToTransform.forEach((page) => { const fileState = cache.$internalReferences[page.file]; @@ -599,17 +587,9 @@ export async function processProject( }); await Promise.all( pagesToTransform.map(async (page) => { - const referenceResolutionBlocker = async () => { - dispatchReferencing(page.file); - await Promise.all(referencingPromises); - }; - const indexGenerationBlocker = async () => { - dispatchIndexing(page.file); - await Promise.all(indexingPromises); - }; await transformMdast(session, { - referenceResolutionBlocker, - indexGenerationBlocker, + referenceResolutionBlocker: waitReferencing, + indexGenerationBlocker: waitIndexing, file: page.file, projectPath: project.path, projectSlug: siteProject.slug, From df9726a2237659d8307cb012f4709800f1dea7eb Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Fri, 13 Dec 2024 16:55:00 +0000 Subject: [PATCH 08/11] docs: note on barrier --- packages/myst-cli/src/build/utils/getFileContent.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/myst-cli/src/build/utils/getFileContent.ts b/packages/myst-cli/src/build/utils/getFileContent.ts index 235b87c6b..6d68966e2 100644 --- a/packages/myst-cli/src/build/utils/getFileContent.ts +++ b/packages/myst-cli/src/build/utils/getFileContent.ts @@ -16,6 +16,11 @@ import { castSession } from '../../session/cache.js'; import { VFile } from 'vfile'; import { logMessagesFromVFile } from '../../utils/logging.js'; +/** + * A barrier synchronization primitive that blocks until a fixed number clients are waiting + * + * @param nClients - number of clients that must wait before unblocking + */ export function makeBarrier(nClients: number): { promise: Promise; wait: () => Promise; From d3af29cddf8255bc2c9b0c539f3b364e67de5aed Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Fri, 13 Dec 2024 20:33:55 +0000 Subject: [PATCH 09/11] refactor: move pipeline to new file --- packages/myst-cli/src/process/mdast.ts | 87 +--------------------- packages/myst-cli/src/process/pipeline.ts | 91 +++++++++++++++++++++++ 2 files changed, 92 insertions(+), 86 deletions(-) create mode 100644 packages/myst-cli/src/process/pipeline.ts diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index 4de45abcf..5e4f7a29e 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -79,6 +79,7 @@ import { parseMyst } from './myst.js'; import { kernelExecutionTransform, LocalDiskCache } from 'myst-execute'; import type { IOutput } from '@jupyterlab/nbformat'; import { rawDirectiveTransform } from '../transforms/raw.js'; +import { TransformPipelineBuilder } from './pipeline.js'; const LINKS_SELECTOR = 'link,card,linkBlock'; @@ -107,92 +108,6 @@ function referenceFileFromPartFile(session: ISession, partFile: string) { return file ?? partFile; } -type TransformFunction = (mdast: GenericParent) => void; - -type TransformOptions = { - after?: string; - before?: string; - skip?: boolean; -}; -type TransformObject = { - name: string; - transform?: TransformFunction; -} & TransformOptions; - -class TransformPipeline { - transforms: TransformFunction[]; - constructor(transforms: TransformFunction[]) { - this.transforms = transforms; - } - - async run(mdast: GenericParent) { - for (const transform of this.transforms) { - await Promise.resolve(transform(mdast)); - } - } -} - -class TransformPipelineBuilder { - transforms: TransformObject[]; - constructor() { - this.transforms = []; - } - - build() { - const transformNames = new Set(this.transforms.map((transform) => transform.name)); - this.transforms.forEach((transform) => { - // Prohibit transforms from defining multiple relationship constraints - // This assumption avoids a class of insertion conflicts - if (transform.before && transform.after) { - throw new Error('Transform cannot both define before and after'); - } - const comparison = transform.before ?? transform.after; - if (!comparison) return; - if (comparison === transform.name) { - throw new Error('Transform cannot refer to itself in before or after'); - } - - if (!transformNames.has(comparison)) { - throw new Error('Transform must refer to valid transform in before or after'); - } - }); - const namedTransforms = new Map( - this.transforms.map((transform) => [transform.name, transform]), - ); - const transformOrder = this.transforms - .filter((t) => !t.before && !t.after) - .map(({ name }) => name); - while (transformOrder.length !== namedTransforms.size) { - this.transforms.forEach((t) => { - // Have we handled this yet? - if (transformOrder.includes(t.name)) return; - // Otherwise, can we handle it? - if (t.before && transformOrder.includes(t.before)) { - transformOrder.splice(transformOrder.indexOf(t.before), 0, t.name); - } else if (t.after && transformOrder.includes(t.after)) { - transformOrder.splice(transformOrder.indexOf(t.after) + 1, 0, t.name); - } - }); - } - const transforms = transformOrder - .map((name) => namedTransforms.get(name)!) - .filter(({ skip, transform }) => !skip && !!transform) - .map(({ transform }) => transform) as TransformFunction[]; - return new TransformPipeline(transforms); - } - - addTransform(name: string, transform?: TransformFunction, options?: TransformOptions) { - if (this.transforms.map((t) => t.name).includes(name)) { - throw new Error(`Duplicate transforms with name "${name}"`); - } - this.transforms.push({ - name, - transform, - ...options, - }); - } -} - export async function transformMdast( session: ISession, opts: { diff --git a/packages/myst-cli/src/process/pipeline.ts b/packages/myst-cli/src/process/pipeline.ts new file mode 100644 index 000000000..3bc9fee9d --- /dev/null +++ b/packages/myst-cli/src/process/pipeline.ts @@ -0,0 +1,91 @@ +import type { GenericParent } from 'myst-common'; + +export type TransformFunction = (mdast: GenericParent) => void; + +export type TransformOptions = { + after?: string; + before?: string; + skip?: boolean; +}; + +type TransformObject = { + name: string; + transform?: TransformFunction; +} & TransformOptions; + +/** + * A sequential pipeline for transforming MyST AST + */ +export class TransformPipeline { + transforms: TransformFunction[]; + constructor(transforms: TransformFunction[]) { + this.transforms = transforms; + } + + async run(mdast: GenericParent) { + for (const transform of this.transforms) { + await Promise.resolve(transform(mdast)); + } + } +} + +export class TransformPipelineBuilder { + transforms: TransformObject[]; + constructor() { + this.transforms = []; + } + + build() { + const transformNames = new Set(this.transforms.map((transform) => transform.name)); + this.transforms.forEach((transform) => { + // Prohibit transforms from defining multiple relationship constraints + // This assumption avoids a class of insertion conflicts + if (transform.before && transform.after) { + throw new Error('Transform cannot both define before and after'); + } + const comparison = transform.before ?? transform.after; + if (!comparison) return; + if (comparison === transform.name) { + throw new Error('Transform cannot refer to itself in before or after'); + } + + if (!transformNames.has(comparison)) { + throw new Error('Transform must refer to valid transform in before or after'); + } + }); + const namedTransforms = new Map( + this.transforms.map((transform) => [transform.name, transform]), + ); + const transformOrder = this.transforms + .filter((t) => !t.before && !t.after) + .map(({ name }) => name); + while (transformOrder.length !== namedTransforms.size) { + this.transforms.forEach((t) => { + // Have we handled this yet? + if (transformOrder.includes(t.name)) return; + // Otherwise, can we handle it? + if (t.before && transformOrder.includes(t.before)) { + transformOrder.splice(transformOrder.indexOf(t.before), 0, t.name); + } else if (t.after && transformOrder.includes(t.after)) { + transformOrder.splice(transformOrder.indexOf(t.after) + 1, 0, t.name); + } + }); + } + const transforms = transformOrder + .map((name) => namedTransforms.get(name)!) + .filter(({ skip, transform }) => !skip && !!transform) + .map(({ transform }) => transform) as TransformFunction[]; + return new TransformPipeline(transforms); + } + + addTransform(name: string, transform?: TransformFunction, options?: TransformOptions) { + if (this.transforms.map((t) => t.name).includes(name)) { + throw new Error(`Duplicate transforms with name "${name}"`); + } + this.transforms.push({ + name, + transform, + ...options, + }); + } +} From ed432c5c6757f85bdb95d1c2b1dcc022657b2edc Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 16 Dec 2024 16:07:45 +0000 Subject: [PATCH 10/11] fix: remove circular lint --- .../src/build/utils/getFileContent.ts | 28 +------------------ packages/myst-cli/src/process/site.ts | 27 +++++++++++++++++- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/packages/myst-cli/src/build/utils/getFileContent.ts b/packages/myst-cli/src/build/utils/getFileContent.ts index 6d68966e2..bba9cb306 100644 --- a/packages/myst-cli/src/build/utils/getFileContent.ts +++ b/packages/myst-cli/src/build/utils/getFileContent.ts @@ -7,7 +7,7 @@ import { loadFile, selectFile } from '../../process/file.js'; import { loadReferences } from '../../process/loadReferences.js'; import type { TransformFn } from '../../process/mdast.js'; import { transformMdast } from '../../process/mdast.js'; -import { loadProject, selectPageReferenceStates } from '../../process/site.js'; +import { loadProject, selectPageReferenceStates, makeBarrier } from '../../process/site.js'; import { buildIndexTransform, MultiPageReferenceResolver } from 'myst-transforms'; import type { ISession } from '../../session/types.js'; import { selectors } from '../../store/index.js'; @@ -16,32 +16,6 @@ import { castSession } from '../../session/cache.js'; import { VFile } from 'vfile'; import { logMessagesFromVFile } from '../../utils/logging.js'; -/** - * A barrier synchronization primitive that blocks until a fixed number clients are waiting - * - * @param nClients - number of clients that must wait before unblocking - */ -export function makeBarrier(nClients: number): { - promise: Promise; - wait: () => Promise; -} { - const ctx: { resolve?: () => void | undefined } = {}; - const promise = new Promise((resolve) => { - ctx.resolve = resolve; - }); - - let nWaiting = nClients; - const wait = async () => { - nWaiting--; - if (!nWaiting) { - ctx.resolve!(); - } - await promise; - return nWaiting; - }; - return { promise, wait }; -} - export async function getFileContent( session: ISession, files: string[], diff --git a/packages/myst-cli/src/process/site.ts b/packages/myst-cli/src/process/site.ts index dc41c1242..3bfc7c6b3 100644 --- a/packages/myst-cli/src/process/site.ts +++ b/packages/myst-cli/src/process/site.ts @@ -24,7 +24,6 @@ import { resolvePageExports, } from '../build/site/manifest.js'; import { writeRemoteDOIBibtex } from '../build/utils/bibtex.js'; -import { makeBarrier } from '../build/utils/getFileContent.js'; import { MYST_DOI_BIB_FILE } from '../cli/options.js'; import { filterPages, loadProjectFromDisk } from '../project/load.js'; import { DEFAULT_INDEX_FILENAMES } from '../project/fromTOC.js'; @@ -392,6 +391,32 @@ export async function writeFile( session.log.debug(toc(`Wrote "${file}" in %s`)); } +/** + * A barrier synchronization primitive that blocks until a fixed number clients are waiting + * + * @param nClients - number of clients that must wait before unblocking + */ +export function makeBarrier(nClients: number): { + promise: Promise; + wait: () => Promise; +} { + const ctx: { resolve?: () => void | undefined } = {}; + const promise = new Promise((resolve) => { + ctx.resolve = resolve; + }); + + let nWaiting = nClients; + const wait = async () => { + nWaiting--; + if (!nWaiting) { + ctx.resolve!(); + } + await promise; + return nWaiting; + }; + return { promise, wait }; +} + export async function fastProcessFile( session: ISession, { From 11fc17a7c884c4273d012a5d79daa7424cd4cf8a Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 16 Dec 2024 16:14:41 +0000 Subject: [PATCH 11/11] docs: add docstrings --- packages/myst-cli/src/process/pipeline.ts | 26 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/packages/myst-cli/src/process/pipeline.ts b/packages/myst-cli/src/process/pipeline.ts index 3bc9fee9d..6718e4833 100644 --- a/packages/myst-cli/src/process/pipeline.ts +++ b/packages/myst-cli/src/process/pipeline.ts @@ -29,6 +29,10 @@ export class TransformPipeline { } } +/** + * Builder for assembling an asynchronous sequential pipeline for + * processing MyST AST + */ export class TransformPipelineBuilder { transforms: TransformObject[]; constructor() { @@ -36,7 +40,14 @@ export class TransformPipelineBuilder { } build() { - const transformNames = new Set(this.transforms.map((transform) => transform.name)); + const namedTransforms = new Map( + this.transforms.map((transform) => [transform.name, transform]), + ); + + // Check the following invariants: + // 1. Transform has _at most_ one of `before` or `after`, but not both + // 2. Transform does not refer to itself + // 3. Transform refers to another transform that exists this.transforms.forEach((transform) => { // Prohibit transforms from defining multiple relationship constraints // This assumption avoids a class of insertion conflicts @@ -49,13 +60,13 @@ export class TransformPipelineBuilder { throw new Error('Transform cannot refer to itself in before or after'); } - if (!transformNames.has(comparison)) { + if (!namedTransforms.has(comparison)) { throw new Error('Transform must refer to valid transform in before or after'); } }); - const namedTransforms = new Map( - this.transforms.map((transform) => [transform.name, transform]), - ); + + // Perform `after` and `before` handling + // Cyclic references will not be handled specially const transformOrder = this.transforms .filter((t) => !t.before && !t.after) .map(({ name }) => name); @@ -71,6 +82,7 @@ export class TransformPipelineBuilder { } }); } + // Pull out transform functions for non-skipped transforms const transforms = transformOrder .map((name) => namedTransforms.get(name)!) .filter(({ skip, transform }) => !skip && !!transform) @@ -78,6 +90,10 @@ export class TransformPipelineBuilder { return new TransformPipeline(transforms); } + /** + * Add AST transform function with `name`. + * @param options - options to control the insertion point + */ addTransform(name: string, transform?: TransformFunction, options?: TransformOptions) { if (this.transforms.map((t) => t.name).includes(name)) { throw new Error(`Duplicate transforms with name "${name}"`);