diff --git a/package-lock.json b/package-lock.json index 736e8d550..518b53bfd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15751,6 +15751,7 @@ "chalk": "^5.2.0", "myst-cli-utils": "^2.0.11", "myst-common": "^1.7.6", + "nbtx": "^0.2.3", "node-fetch": "^3.3.0", "unist-util-select": "^4.0.3", "vfile": "^5.3.7", @@ -15965,7 +15966,8 @@ "version": "1.7.6", "license": "MIT", "dependencies": { - "myst-spec": "^0.0.5" + "myst-spec": "^0.0.5", + "nbtx": "^0.2.3" } }, "packages/myst-templates": { diff --git a/packages/myst-cli/src/process/file.ts b/packages/myst-cli/src/process/file.ts index d2e93f730..d78db3263 100644 --- a/packages/myst-cli/src/process/file.ts +++ b/packages/myst-cli/src/process/file.ts @@ -36,6 +36,7 @@ type LoadFileOptions = { preFrontmatter?: Record; keepTitleNode?: boolean; kind?: SourceFileKind; + minifyMaxCharacters?: number; }; export type LoadFileResult = { @@ -85,7 +86,7 @@ export async function loadNotebookFile( mdast, frontmatter: nbFrontmatter, widgets, - } = await processNotebookFull(session, file, content); + } = await processNotebookFull(session, file, content, opts?.minifyMaxCharacters); const { frontmatter: cellFrontmatter, identifiers } = getPageFrontmatter( session, mdast, diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index 750331710..d8eceb6a7 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -52,13 +52,13 @@ import { transformImageFormats, transformLinkedDOIs, transformLinkedRORs, - transformOutputsToCache, transformRenderInlineExpressions, transformThumbnail, StaticFileTransformer, propagateBlockDataToCode, transformBanner, - reduceOutputs, + transformReduceOutputs, + transformLiftOutputs, transformPlaceholderImages, transformDeleteBase64UrlSource, transformWebp, @@ -76,7 +76,6 @@ import { combineCitationRenderers } from './citations.js'; import { bibFilesInDir, selectFile } from './file.js'; import { parseMyst } from './myst.js'; import { kernelExecutionTransform, LocalDiskCache } from 'myst-execute'; -import type { IOutput } from '@jupyterlab/nbformat'; import { rawDirectiveTransform } from '../transforms/raw.js'; const LINKS_SELECTOR = 'link,card,linkBlock'; @@ -198,16 +197,36 @@ export async function transformMdast( .use(abbreviationPlugin, { abbreviations: frontmatter.abbreviations }) .use(indexIdentifierPlugin) .use(enumerateTargetsPlugin, { state }); // This should be after math/container transforms - // Load custom transform plugins - session.plugins?.transforms.forEach((t) => { - if (t.stage !== 'document') return; - pipe.use(t.plugin, undefined, pluginUtils); - }); await pipe.run(mdast, vfile); // This needs to come after basic transformations since meta tags are added there propagateBlockDataToCode(session, vfile, mdast); + if (execute) { + const cachePath = path.join(session.buildPath(), 'execute'); + await kernelExecutionTransform(mdast, vfile, { + basePath: session.sourcePath(), + cache: new LocalDiskCache(cachePath), + sessionFactory: () => session.jupyterSessionManager(), + frontmatter: frontmatter, + ignoreCache: false, + errorIsFatal: false, + log: session.log, + outputCache: cache.$outputs, + minifyMaxCharacters, + }); + } + transformRenderInlineExpressions(mdast, vfile); + + // Run document plugins + const pluginPipe = unified(); + // Load custom transform plugins + session.plugins?.transforms.forEach((t) => { + if (t.stage !== 'document') return; + pluginPipe.use(t.plugin, undefined, pluginUtils); + }); + await pluginPipe.run(mdast, vfile); + // Initialize citation renderers for this (non-bib) file cache.$citationRenderers[file] = await transformLinkedDOIs( session, @@ -225,21 +244,6 @@ export async function transformMdast( } // Combine file-specific citation renderers with project renderers from bib files const fileCitationRenderer = combineCitationRenderers(cache, ...rendererFiles); - - if (execute) { - const cachePath = path.join(session.buildPath(), 'execute'); - await kernelExecutionTransform(mdast, vfile, { - basePath: session.sourcePath(), - cache: new LocalDiskCache<(IExpressionResult | IOutput[])[]>(cachePath), - sessionFactory: () => session.jupyterSessionManager(), - frontmatter: frontmatter, - ignoreCache: false, - errorIsFatal: false, - log: session.log, - }); - } - transformRenderInlineExpressions(mdast, vfile); - await transformOutputsToCache(session, mdast, kind, { minifyMaxCharacters }); transformFilterOutputStreams(mdast, vfile, frontmatter.settings); transformCitations(session, file, mdast, fileCitationRenderer, references); await unified() @@ -382,9 +386,10 @@ export async function finalizeMdast( vfile.path = file; if (simplifyFigures) { // Transform output nodes to images / text - reduceOutputs(session, mdast, file, imageWriteFolder, { + transformReduceOutputs(session, mdast, file, imageWriteFolder, { altOutputFolder: simplifyFigures ? undefined : imageAltOutputFolder, }); + transformLiftOutputs(mdast); } transformOutputsToFile(session, mdast, imageWriteFolder, { altOutputFolder: simplifyFigures ? undefined : imageAltOutputFolder, diff --git a/packages/myst-cli/src/process/notebook.ts b/packages/myst-cli/src/process/notebook.ts index 59257a5dc..8fd040451 100644 --- a/packages/myst-cli/src/process/notebook.ts +++ b/packages/myst-cli/src/process/notebook.ts @@ -7,14 +7,15 @@ import type { ICell, IMimeBundle, INotebookContent, - INotebookMetadata, IOutput, MultilineString, } from '@jupyterlab/nbformat'; -import { CELL_TYPES, ensureString } from 'nbtx'; +import { CELL_TYPES, ensureString, minifyCellOutput } from 'nbtx'; import { VFile } from 'vfile'; import { logMessagesFromVFile } from '../utils/logging.js'; import type { ISession } from '../session/types.js'; +import { castSession } from '../session/cache.js'; +import { computeHash } from 'myst-cli-utils'; import { BASE64_HEADER_SPLIT } from '../transforms/images.js'; import { parseMyst } from './myst.js'; import type { Code, InlineExpression } from 'myst-spec-ext'; @@ -88,8 +89,10 @@ export async function processNotebookFull( session: ISession, file: string, content: string, + minifyMaxCharacters?: number, ): Promise<{ mdast: GenericParent; frontmatter: PageFrontmatter; widgets: Record }> { const { log } = session; + const cache = castSession(session); const { metadata, cells } = JSON.parse(content) as INotebookContent; // notebook will be empty, use generateNotebookChildren, generateNotebookOrder here if we want to populate those @@ -165,15 +168,22 @@ export async function processNotebookFull( value: ensureString(cell.source), }; - const outputsChildren = (cell.outputs as IOutput[]).map((output) => { - // Embed outputs in an output block - const result = { - type: 'output', - jupyter_data: output, - children: [], - }; - return result; - }); + const outputsChildren = await Promise.all( + (cell.outputs as IOutput[]).map(async (output) => { + // Minify the output + const [minifiedOutput] = await minifyCellOutput([output] as IOutput[], cache.$outputs, { + computeHash, + maxCharacters: minifyMaxCharacters, + }); + // Embed outputs in an output block + const result = { + type: 'output', + jupyter_data: minifiedOutput, + children: [], + }; + return result; + }), + ); const outputs = { type: 'outputs', id: nanoid(), diff --git a/packages/myst-cli/src/transforms/outputs.ts b/packages/myst-cli/src/transforms/outputs.ts index 92990523d..f9f3033f2 100644 --- a/packages/myst-cli/src/transforms/outputs.ts +++ b/packages/myst-cli/src/transforms/outputs.ts @@ -1,13 +1,12 @@ import fs from 'node:fs'; import { dirname, join, relative } from 'node:path'; import { computeHash } from 'myst-cli-utils'; -import type { Image, SourceFileKind, Output } from 'myst-spec-ext'; +import type { Image, SourceFileKind } from 'myst-spec-ext'; import { liftChildren, fileError, RuleId, fileWarn } from 'myst-common'; import type { GenericNode, GenericParent } from 'myst-common'; import type { ProjectSettings } from 'myst-frontmatter'; import { htmlTransform } from 'myst-transforms'; import stripAnsi from 'strip-ansi'; -import { remove } from 'unist-util-remove'; import { selectAll } from 'unist-util-select'; import type { VFile } from 'vfile'; import type { IOutput, IStream } from '@jupyterlab/nbformat'; @@ -25,6 +24,38 @@ function getWriteDestination(hash: string, contentType: string, writeFolder: str return join(writeFolder, getFilename(hash, contentType)); } +const MARKDOWN_MIME_TYPE = 'text/markdown'; +const SUPPORTED_MARKDOWN_VARIANTS = ['Original', 'GFM', 'CommonMark', 'myst']; + +/** + * Extract the `variant` parameter from a Markdown MIME type + * + * @param mimeType MIME type of the form `text/markdown;FOO=BAR` + */ +function extractVariantParameter(mimeType: string): string | undefined { + const [variant] = Array.from(mimeType.matchAll(/;([^;]+)=([^;]+)/g)) + .filter(([name]) => name === 'variant') + .map((pair) => pair[1]); + return variant; +} + +function preferredMarkdownMIMEType( + mimeType: string, +): { variant?: string; mimeType: string } | undefined { + if (!mimeType.startsWith(MARKDOWN_MIME_TYPE)) { + return; + } + const variant = extractVariantParameter(mimeType); + if (!variant) { + return { mimeType }; + } + if (SUPPORTED_MARKDOWN_VARIANTS.includes(variant)) { + return { mimeType, variant }; + } + + return; +} + /** * Traverse all output nodes, minify their content, and cache on the session */ @@ -34,28 +65,18 @@ export async function transformOutputsToCache( kind: SourceFileKind, opts?: { minifyMaxCharacters?: number }, ) { - const outputsNodes = selectAll('outputs', mdast) as GenericNode[]; + const outputs = selectAll('output', mdast) as GenericNode[]; + if (!outputs.length) return; const cache = castSession(session); await Promise.all( - outputsNodes - // Ignore outputs that are hidden - .filter((outputs) => outputs.visibility !== 'remove') - // Pull out children - .map((outputs) => outputs.children as Output[]) - .flat() - // Filter outputs with no data - // TODO: can this ever occur? - .filter((output) => (output as any).jupyter_data !== undefined) - // Minify output data + outputs + .filter((output) => output.visibility !== 'remove') .map(async (output) => { - [(output as any).jupyter_data] = await minifyCellOutput( - [(output as any).jupyter_data] as IOutput[], - cache.$outputs, - { - computeHash, - maxCharacters: opts?.minifyMaxCharacters, - }, - ); + const [tmpData] = await minifyCellOutput([output.data] as IOutput[], cache.$outputs, { + computeHash, + maxCharacters: opts?.minifyMaxCharacters, + }); + output.data = tmpData; }), ); } @@ -86,10 +107,13 @@ export function transformFilterOutputStreams( const blockRemoveStderr = tags.includes('remove-stderr'); const blockRemoveStdout = tags.includes('remove-stdout'); const outputs = selectAll('output', block) as GenericNode[]; - outputs - .filter((output) => { - const data = output.jupyter_data; - + // There should be only one output in the block + outputs.forEach((output) => { + if (!output.data) { + return; + } + console.log(JSON.stringify(output.data, null, 2)); + output.data = [output.data].filter((data: IStream | MinifiedMimeOutput) => { if ( (stderr !== 'show' || blockRemoveStderr) && data.output_type === 'stream' && @@ -108,7 +132,7 @@ export function transformFilterOutputStreams( }, ); } - return doRemove; + return !doRemove; } if ( (stdout !== 'show' || blockRemoveStdout) && @@ -128,7 +152,7 @@ export function transformFilterOutputStreams( }, ); } - return doRemove; + return !doRemove; } if ( mpl !== 'show' && @@ -153,15 +177,12 @@ export function transformFilterOutputStreams( }, ); } - return doRemove; + return !doRemove; } - return false; - }) - .forEach((output) => { - output.type = '__delete__'; - }); + return true; + })[0]; + }); }); - remove(mdast, { cascade: false }, '__delete__'); } function writeCachedOutputToFile( @@ -207,19 +228,17 @@ export function transformOutputsToFile( const outputs = selectAll('output', mdast) as GenericNode[]; const cache = castSession(session); - outputs - .filter((output) => !!output.jupyter_data) - .forEach((node) => { - // TODO: output-refactoring -- drop to single output in future - walkOutputs([node.jupyter_data], (obj) => { - const { hash } = obj; - if (!hash || !cache.$outputs[hash]) return undefined; - obj.path = writeCachedOutputToFile(session, hash, cache.$outputs[hash], writeFolder, { - ...opts, - node, - }); + outputs.forEach((node) => { + const rawOutputs = node.data === undefined ? [] : [node.data]; + walkOutputs(rawOutputs, (obj) => { + const { hash } = obj; + if (!hash || !cache.$outputs[hash]) return undefined; + obj.path = writeCachedOutputToFile(session, hash, cache.$outputs[hash], writeFolder, { + ...opts, + node, }); }); + }); } /** @@ -231,120 +250,125 @@ export function transformOutputsToFile( * If the new and existing types are the same, always just keep existing. */ function isPreferredOutputType(newType: string, existingType: string) { - if (existingType.startsWith('image/')) return false; - if (newType.startsWith('image')) return true; - if (existingType === 'text/html') return false; - if (newType === 'text/html') return true; - return false; + const newIndex = PREFERRED_MIME_TYPES.findIndex((fn) => fn(newType)); + const existingIndex = PREFERRED_MIME_TYPES.findIndex((fn) => fn(existingType)); + if (newIndex === -1) { + return false; + } else if (existingIndex === -1) { + return true; + } else { + return newIndex < existingIndex; + } +} +/** + * Lift the children of outputs nodes into their parents + */ +export function transformLiftOutputs(mdast: GenericParent) { + const outputsNodes = selectAll('outputs', mdast) as GenericNode[]; + outputsNodes.forEach((node) => { + // Lift all output nodes + const outputNodes = selectAll('output', node) as GenericNode[]; + outputNodes.forEach((outputNode) => { + outputNode.type = '__lift__'; + }); + // Lift the outputs node + node.type = '__lift__'; + }); + liftChildren(mdast, '__lift__'); } +export const PREFERRED_MIME_TYPES: Array<(mime: string) => boolean> = [ + // Markdown (any variant) + (mime) => !!preferredMarkdownMIMEType(mime), + // Image (any type) + (mime) => mime.startsWith('image'), + (mime) => mime === 'text/html', + (mime) => mime === 'text/plain', +]; + /** * Convert output nodes with minified content to image or code * * This writes outputs of type image to file, modifies outputs of type * text to a code node, and removes other output types. */ -export function reduceOutputs( +export function transformReduceOutputs( session: ISession, mdast: GenericParent, file: string, writeFolder: string, opts?: { altOutputFolder?: string; vfile?: VFile }, ) { - const outputsNodes = selectAll('outputs', mdast) as GenericNode[]; + const outputs = selectAll('output', mdast) as GenericNode[]; const cache = castSession(session); - outputsNodes.forEach((outputsNode) => { - const outputs = outputsNode.children as GenericNode[]; - - outputs.forEach((outputNode) => { - if (outputNode.visibility === 'remove' || outputNode.visibility === 'hide') { - // Hidden nodes should not show up in simplified outputs for static export - outputNode.type = '__delete__'; - return; - } - if (!outputNode.jupyter_data && !outputNode.children?.length) { - outputNode.type = '__delete__'; - return; - } - // Lift the `output` node into `Outputs` - outputNode.type = '__lift__'; - - // If the output already has children, we don't need to do anything - if (outputNode.children?.length) { - return; - } - - // Find a preferred IOutput type to render into the AST - const selectedOutputs: { content_type: string; hash: string }[] = []; - if (outputNode.jupyter_data) { - const output = outputNode.jupyter_data; - - let selectedOutput: { content_type: string; hash: string } | undefined; - walkOutputs([output], (obj: any) => { - const { output_type, content_type, hash } = obj; - if (!hash) return undefined; - if (!selectedOutput || isPreferredOutputType(content_type, selectedOutput.content_type)) { - if (['error', 'stream'].includes(output_type)) { - selectedOutput = { content_type: 'text/plain', hash }; - } else if (typeof content_type === 'string') { - if ( - content_type.startsWith('image/') || - content_type === 'text/plain' || - content_type === 'text/html' - ) { - selectedOutput = { content_type, hash }; - } - } + outputs.forEach((node) => { + if (!node.data) { + return; + } + const selectedOutputs: { content_type: string; hash: string }[] = []; + const rawOutputs = node.data === undefined ? [] : [node.data]; + rawOutputs.forEach((output: MinifiedOutput) => { + let selectedOutput: { content_type: string; hash: string } | undefined; + walkOutputs([output], (obj: any) => { + const { output_type, content_type, hash } = obj; + if (!hash) return undefined; + if (!selectedOutput || isPreferredOutputType(content_type, selectedOutput.content_type)) { + // Check output type + if (['error', 'stream'].includes(output_type)) { + selectedOutput = { content_type: 'text/plain', hash }; } - }); - if (selectedOutput) selectedOutputs.push(selectedOutput); - } - const children: (Image | GenericNode)[] = selectedOutputs - .map((output): Image | GenericNode | GenericNode[] | undefined => { - const { content_type, hash } = output ?? {}; - if (!hash || !cache.$outputs[hash]) return undefined; - if (content_type === 'text/html') { - const htmlTree = { - type: 'root', - children: [ - { - type: 'html', - value: cache.$outputs[hash][0], - }, - ], - }; - htmlTransform(htmlTree); - return htmlTree.children; - } else if (content_type.startsWith('image/')) { - const path = writeCachedOutputToFile(session, hash, cache.$outputs[hash], writeFolder, { - ...opts, - node: outputNode, - }); - if (!path) return undefined; - const relativePath = relative(dirname(file), path); - return { - type: 'image', - data: { type: 'output' }, - url: relativePath, - urlSource: relativePath, - }; - } else if (content_type === 'text/plain' && cache.$outputs[hash]) { - const [content] = cache.$outputs[hash]; - return { - type: 'code', - data: { type: 'output' }, - value: stripAnsi(content), - }; + // Check content type + else if (typeof content_type === 'string') { + const preferredMime = PREFERRED_MIME_TYPES.some((matcher) => !!matcher(content_type)); + if (preferredMime) { + selectedOutput = { content_type, hash }; + } } - return undefined; - }) - .flat() - .filter((output): output is Image | GenericNode => !!output); - outputNode.children = children; + } + }); + if (selectedOutput) selectedOutputs.push(selectedOutput); }); - // Lift the `outputs` node - outputsNode.type = '__lift__'; + const children: (Image | GenericNode)[] = selectedOutputs + .map((output): Image | GenericNode | GenericNode[] | undefined => { + const { content_type, hash } = output ?? {}; + if (!hash || !cache.$outputs[hash]) return undefined; + if (content_type === 'text/html') { + const htmlTree = { + type: 'root', + children: [ + { + type: 'html', + value: cache.$outputs[hash][0], + }, + ], + }; + htmlTransform(htmlTree); + return htmlTree.children; + } else if (content_type.startsWith('image/')) { + const path = writeCachedOutputToFile(session, hash, cache.$outputs[hash], writeFolder, { + ...opts, + node, + }); + if (!path) return undefined; + const relativePath = relative(dirname(file), path); + return { + type: 'image', + data: { type: 'output' }, + url: relativePath, + urlSource: relativePath, + }; + } else if (content_type === 'text/plain' && cache.$outputs[hash]) { + const [content] = cache.$outputs[hash]; + return { + type: 'code', + data: { type: 'output' }, + value: stripAnsi(content), + }; + } + return undefined; + }) + .flat() + .filter((output): output is Image | GenericNode => !!output); + node.children = children; }); - remove(mdast, '__delete__'); - liftChildren(mdast, '__lift__'); } diff --git a/packages/myst-execute/package.json b/packages/myst-execute/package.json index af5718e7b..fc4fe4dda 100644 --- a/packages/myst-execute/package.json +++ b/packages/myst-execute/package.json @@ -39,7 +39,8 @@ "node-fetch": "^3.3.0", "unist-util-select": "^4.0.3", "vfile": "^5.3.7", - "which": "^4.0.0" + "which": "^4.0.0", + "nbtx": "^0.2.3" }, "devDependencies": { "@jupyterlab/nbformat": "^3.5.2", diff --git a/packages/myst-execute/src/cache.ts b/packages/myst-execute/src/cache.ts index af6f25f91..8cb576e8f 100644 --- a/packages/myst-execute/src/cache.ts +++ b/packages/myst-execute/src/cache.ts @@ -36,11 +36,13 @@ export class LocalDiskCache implements ICache { if (!existsSync(keyPath)) { return undefined; } - return JSON.parse(readFileSync(keyPath, { encoding: 'utf8' })); + const data = readFileSync(keyPath, { encoding: 'utf8' }); + return JSON.parse(data); } set(key: string, item: T) { + const data = JSON.stringify(item); const keyPath = this._makeKeyPath(key); - return writeFileSync(keyPath, JSON.stringify(item), { encoding: 'utf8' }); + return writeFileSync(keyPath, data, { encoding: 'utf8' }); } } diff --git a/packages/myst-execute/src/execute.ts b/packages/myst-execute/src/execute.ts index ab2d5bc1a..e9dfe4a6e 100644 --- a/packages/myst-execute/src/execute.ts +++ b/packages/myst-execute/src/execute.ts @@ -1,4 +1,5 @@ import { select, selectAll } from 'unist-util-select'; +import { computeHash } from 'myst-cli-utils'; import type { Logger } from 'myst-cli-utils'; import type { PageFrontmatter, KernelSpec } from 'myst-frontmatter'; import type { Kernel, KernelMessage, Session, SessionManager } from '@jupyterlab/services'; @@ -12,6 +13,8 @@ import assert from 'node:assert'; import { createHash } from 'node:crypto'; import type { Plugin } from 'unified'; import type { ICache } from './cache.js'; +import { minifyCellOutput } from 'nbtx'; +import type { MinifiedContentCache } from 'nbtx'; /** * Interpret an IOPub message as an IOutput object @@ -206,7 +209,7 @@ function isInlineExpression(node: GenericNode): node is InlineExpression { async function computeExecutableNodes( kernel: Kernel.IKernelConnection, nodes: (CodeBlock | InlineExpression)[], - opts: { vfile: VFile }, + vfile: VFile, ): Promise<{ results: (IOutput[] | IExpressionResult)[]; errorOccurred: boolean; @@ -215,9 +218,11 @@ async function computeExecutableNodes( const results: (IOutput[] | IExpressionResult)[] = []; for (const matchedNode of nodes) { + // An executable cell if (isCellBlock(matchedNode)) { // Pull out code to execute const code = select('code', matchedNode) as Code; + // Invoke kernel const { status, outputs } = await executeCode(kernel, code.value); // Cache result results.push(outputs); @@ -225,12 +230,14 @@ async function computeExecutableNodes( // Check for errors const allowErrors = codeBlockRaisesException(matchedNode); if (status === 'error' && !allowErrors) { + // Join all error tracebacks into a single newline-delimited string const errorMessage = outputs .map((item) => item.traceback) + .filter((item) => !!item) .flat() .join('\n'); fileError( - opts.vfile, + vfile, `An exception occurred during code execution, halting further execution:\n\n${errorMessage}`, { node: matchedNode, @@ -248,7 +255,7 @@ async function computeExecutableNodes( if (status === 'error') { const errorMessage = (result as IExpressionError).traceback.join('\n'); fileError( - opts.vfile, + vfile, `An exception occurred during expression evaluation, halting further execution:\n\n${errorMessage}`, { node: matchedNode }, ); @@ -273,9 +280,11 @@ async function computeExecutableNodes( * @param nodes executable MDAST nodes * @param computedResult computed results for each node */ -function applyComputedOutputsToNodes( +async function applyComputedOutputsToNodes( nodes: (CodeBlock | InlineExpression)[], computedResult: (IOutput[] | IExpressionResult)[], + outputCache: MinifiedContentCache, + minifyMaxCharacters?: number, ) { for (const matchedNode of nodes) { // Pull out the result for this node @@ -286,9 +295,17 @@ function applyComputedOutputsToNodes( // Pull out outputs to set data const outputs = select('outputs', matchedNode) as Outputs; // Ensure that whether this fails or succeeds, we write to `children` (e.g. due to a kernel error) - outputs.children = rawOutputData.map((data) => { - return { type: 'output', children: [], jupyter_data: data as any }; - }); + outputs.children = await Promise.all( + rawOutputData.map(async (data) => { + // Minify the output + const [minifiedOutput] = await minifyCellOutput([data], outputCache, { + computeHash, + maxCharacters: minifyMaxCharacters, + }); + const output: Output = { type: 'output', children: [], jupyter_data: minifiedOutput }; + return output; + }), + ); } else if (isInlineExpression(matchedNode)) { const rawOutputData = thisResult as Record | undefined; // Set data of expression to the result, or empty if we don't have one @@ -303,11 +320,14 @@ function applyComputedOutputsToNodes( export type Options = { basePath: string; cache: ICache<(IExpressionResult | IOutput[])[]>; + outputCache: MinifiedContentCache; sessionFactory: () => Promise; frontmatter: PageFrontmatter; + // Ignore output cache and forcibly re-execute ignoreCache?: boolean; errorIsFatal?: boolean; log?: Logger; + minifyMaxCharacters?: number; }; /** @@ -318,6 +338,8 @@ export type Options = { * @param opts */ export async function kernelExecutionTransform(tree: GenericParent, vfile: VFile, opts: Options) { + const { basePath, cache, sessionFactory, frontmatter, outputCache, minifyMaxCharacters } = opts; + const log = opts.log ?? console; // Pull out code-like nodes @@ -336,7 +358,7 @@ export async function kernelExecutionTransform(tree: GenericParent, vfile: VFile } // We need the kernelspec to proceed - if (opts.frontmatter.kernelspec === undefined) { + if (frontmatter.kernelspec === undefined) { return fileError( vfile, `Notebook does not declare the necessary 'kernelspec' frontmatter key required for execution`, @@ -344,8 +366,8 @@ export async function kernelExecutionTransform(tree: GenericParent, vfile: VFile } // See if we already cached this execution - const cacheKey = buildCacheKey(opts.frontmatter.kernelspec, executableNodes); - let cachedResults: (IExpressionResult | IOutput[])[] | undefined = opts.cache.get(cacheKey); + const cacheKey = buildCacheKey(frontmatter.kernelspec, executableNodes); + let cachedResults: (IExpressionResult | IOutput[])[] | undefined = cache.get(cacheKey); // Do we need to re-execute notebook? if (opts.ignoreCache || cachedResults === undefined) { @@ -354,7 +376,7 @@ export async function kernelExecutionTransform(tree: GenericParent, vfile: VFile opts.ignoreCache ? '[cache ignored]' : '[no execution cache found]' }`, ); - const sessionManager = await opts.sessionFactory(); + const sessionManager = await sessionFactory(); // Do we not have a working session? if (sessionManager === undefined) { fileError(vfile, `Could not load Jupyter session manager to run executable nodes`, { @@ -365,11 +387,11 @@ export async function kernelExecutionTransform(tree: GenericParent, vfile: VFile else { let sessionConnection: Session.ISessionConnection | undefined; const sessionOpts = { - path: path.relative(opts.basePath, vfile.path), + path: path.relative(basePath, vfile.path), type: 'notebook', name: path.basename(vfile.path), kernel: { - name: opts.frontmatter.kernelspec.name, + name: frontmatter.kernelspec.name, }, }; await sessionManager @@ -387,11 +409,11 @@ export async function kernelExecutionTransform(tree: GenericParent, vfile: VFile const { results, errorOccurred } = await computeExecutableNodes( conn.kernel, executableNodes, - { vfile }, + vfile, ); // Populate cache if things were successful if (!errorOccurred) { - opts.cache.set(cacheKey, results); + cache.set(cacheKey, results); } // Refer to these computed results cachedResults = results; @@ -406,7 +428,12 @@ export async function kernelExecutionTransform(tree: GenericParent, vfile: VFile if (cachedResults) { // Apply results to tree - applyComputedOutputsToNodes(executableNodes, cachedResults); + await applyComputedOutputsToNodes( + executableNodes, + cachedResults, + outputCache, + minifyMaxCharacters, + ); } } diff --git a/packages/myst-spec-ext/package.json b/packages/myst-spec-ext/package.json index 25e793419..b8b2b08b8 100644 --- a/packages/myst-spec-ext/package.json +++ b/packages/myst-spec-ext/package.json @@ -32,6 +32,7 @@ "url": "https://github.com/jupyter-book/mystmd/issues" }, "dependencies": { - "myst-spec": "^0.0.5" + "myst-spec": "^0.0.5", + "nbtx": "^0.2.3" } } diff --git a/packages/myst-spec-ext/src/types.ts b/packages/myst-spec-ext/src/types.ts index 3730cab53..85020ff8a 100644 --- a/packages/myst-spec-ext/src/types.ts +++ b/packages/myst-spec-ext/src/types.ts @@ -20,6 +20,7 @@ import type { CrossReference as SpecCrossReference, Link as SpecLink, } from 'myst-spec'; +import type { MinifiedOutput } from 'nbtx'; type Visibility = 'show' | 'hide' | 'remove'; @@ -255,7 +256,7 @@ export type Output = Node & Target & { type: 'output'; children: (FlowContent | ListContent | PhrasingContent)[]; - jupyter_data: any; // TODO: set this to IOutput + jupyter_data: MinifiedOutput; }; export type Outputs = Node &