perf: tokenize input and operate on analyzed array

chalk · Mar 20, 2023 · 5a7f251 · 5a7f251
1 parent d9f402c
commit 5a7f251
Show file tree

Hide file tree

Showing 2 changed files with 136 additions and 77 deletions.
diff --git a/index.js b/index.js
@@ -1,105 +1,160 @@
-import isFullwidthCodePoint from 'is-fullwidth-code-point';
 import ansiStyles from 'ansi-styles';
+import isFullwidthCodePoint from 'is-fullwidth-code-point';
 
-const astralRegex = /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/;
+const ESCAPES = new Set([27, 155]);
 
-const ESCAPES = [
- '\u001B',
- '\u009B'
-];
+const endCodesSet = new Set();
+const endCodesMap = new Map();
+for (const [start, end] of ansiStyles.codes) {
+ endCodesSet.add(ansiStyles.color.ansi(end));
+ endCodesMap.set(ansiStyles.color.ansi(start), ansiStyles.color.ansi(end));
+}
 
-const wrapAnsi = code => `${ESCAPES[0]}[${code}m`;
+function getEndCode(code) {
+ if (endCodesSet.has(code)) {
+ return code;
+ }
 
-const checkAnsi = (ansiCodes, isEscapes, endAnsiCode) => {
- let output = [];
- ansiCodes = [...ansiCodes];
+ if (endCodesMap.has(code)) {
+  return endCodesMap.get(code);
+ }
 
- for (let ansiCode of ansiCodes) {
- const ansiCodeOrigin = ansiCode;
- if (ansiCode.includes(';')) {
- ansiCode = ansiCode.split(';')[0][0] + '0';
- }
+ code = code.slice(2);
+ if (code.includes(';')) {
+ code = code[0] + '0';
+ }
 
- const item = ansiStyles.codes.get(Number.parseInt(ansiCode, 10));
- if (item) {
- const indexEscape = ansiCodes.indexOf(item.toString());
- if (indexEscape === -1) {
- output.push(wrapAnsi(isEscapes ? item : ansiCodeOrigin));
- } else {
- ansiCodes.splice(indexEscape, 1);
- }
- } else if (isEscapes) {
- output.push(wrapAnsi(0));
- break;
- } else {
- output.push(wrapAnsi(ansiCodeOrigin));
- }
+ const returnValue = ansiStyles.codes.get(Number.parseInt(code, 10));
+ if (returnValue) {
+ return ansiStyles.color.ansi(returnValue);
  }
 
- if (isEscapes) {
- output = output.filter((element, index) => output.indexOf(element) === index);
+ return ansiStyles.reset.open;
+}
 
- if (endAnsiCode !== undefined) {
-  const fistEscapeCode = wrapAnsi(ansiStyles.codes.get(Number.parseInt(endAnsiCode, 10)));
-  // TODO: Remove the use of `.reduce` here.
-  // eslint-disable-next-line unicorn/no-array-reduce
- output = output.reduce((current, next) => next === fistEscapeCode ? [next, ...current] : [...current, next], []);
+function findNumberIndex(string) {
+ for (let index = 0; index < string.length; index++) {
+ const charCode = string.charCodeAt(index);
+ if (charCode >= 48 && charCode <= 57) {
+ return index;
  }
  }
 
- return output.join('');
-};
-
-export default function sliceAnsi(string, begin, end) {
- const characters = [...string];
- const ansiCodes = [];
-
- let stringEnd = typeof end === 'number' ? end : characters.length;
- let isInsideEscape = false;
- let ansiCode;
- let visible = 0;
- let output = '';
+ return -1;
+}
 
- for (const [index, character] of characters.entries()) {
- let leftEscape = false;
+function parseAnsiCode(string, offset) {
+ string = string.slice(offset, offset + 19);
+ const startIndex = findNumberIndex(string);
+ if (startIndex !== -1) {
+ let endIndex = string.indexOf('m', startIndex);
+ if (endIndex === -1) {
+ endIndex = string.length;
+ }
 
- if (ESCAPES.includes(character)) {
-  const code = /\d[^m]*/.exec(string.slice(index, index + 18));
- ansiCode = code && code.length > 0 ? code[0] : undefined;
+ return string.slice(0, endIndex + 1);
+ }
+}
 
- if (visible < stringEnd) {
-  isInsideEscape = true;
+function tokenize(string, endChar = Number.POSITIVE_INFINITY) {
+ const returnValue = [];
 
- if (ansiCode !== undefined) {
- ansiCodes.push(ansiCode);
- }
+ let index = 0;
+ let visible = 0;
+ while (index < string.length) {
+ const codePoint = string.codePointAt(index);
+
+ if (ESCAPES.has(codePoint)) {
+ const code = parseAnsiCode(string, index);
+ if (code) {
+ returnValue.push({
+ type: 'ansi',
+ code,
+ endCode: getEndCode(code)
+ });
+ index += code.length;
+ continue;
  }
- } else if (isInsideEscape && character === 'm') {
- isInsideEscape = false;
- leftEscape = true;
  }
 
- if (!isInsideEscape && !leftEscape) {
- visible++;
+ const fullWidth = isFullwidthCodePoint(codePoint);
+ const character = String.fromCodePoint(codePoint);
+
+ returnValue.push({
+ type: 'char',
+ value: character,
+ fullWidth
+ });
+ index += character.length;
+ visible += fullWidth ? 2 : character.length;
+ if (visible >= endChar) {
+ break;
  }
+ }
 
-  if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt())) {
- visible++;
+ return returnValue;
+}
 
- if (typeof end !== 'number') {
- stringEnd++;
- }
+function reduceAnsiCodes(codes) {
+ let returnValue = [];
+ for (const code of codes) {
+ if (code.code === ansiStyles.reset.open) {
+ // Reset code, disable all codes
+ returnValue = [];
+ } else if (endCodesSet.has(code.code)) {
+ // This is an end code, disable all matching start codes
+ returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.code);
+ } else {
+ // This is a start code. Disable all styles this "overrides", then enable it
+ returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.endCode);
+ returnValue.push(code);
  }
+ }
+
+ return returnValue;
+}
+
+function undoAnsiCodes(codes) {
+ const reduced = reduceAnsiCodes(codes);
+ const endCodes = reduced.map(({endCode}) => endCode);
+ return endCodes.reverse().join('');
+}
 
- if (visible > begin && visible <= stringEnd) {
- output += character;
- } else if (visible === begin && !isInsideEscape && ansiCode !== undefined) {
- output = checkAnsi(ansiCodes);
- } else if (visible >= stringEnd) {
- output += checkAnsi(ansiCodes, true, ansiCode);
+export default function sliceAnsi(string, begin, end) {
+ const tokens = tokenize(string, end);
+ let activeCodes = [];
+ let pos = 0;
+ let returnValue = '';
+ let include = false;
+
+ for (const token of tokens) {
+ if (end !== undefined && pos >= end) {
  break;
  }
+
+ if (token.type === 'ansi') {
+ activeCodes.push(token);
+ if (include) {
+ returnValue += token.code;
+ }
+ } else {
+ // Char
+ if (!include && pos >= begin) {
+ include = true;
+ // Simplify active codes
+ activeCodes = reduceAnsiCodes(activeCodes);
+ returnValue = activeCodes.map(({code}) => code).join('');
+ }
+
+ if (include) {
+ returnValue += token.value;
+ }
+
+ pos += token.fullWidth ? 2 : token.value.length;
+ }
  }
 
- return output;
+ // Disable active codes at the end
+ returnValue += undoAnsiCodes(activeCodes);
+ return returnValue;
 }
diff --git a/test.js b/test.js
@@ -83,22 +83,26 @@ test('weird null issue', t => {
 });
 
 test('support true color escape sequences', t => {
- t.is(sliceAnsi('\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0municorn\u001B[39m\u001B[49m\u001B[22m', 0, 3), '\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0muni\u001B[22m\u001B[49m\u001B[39m');
+ t.is(sliceAnsi('\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0municorn\u001B[39m\u001B[49m\u001B[22m', 0, 3), '\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0muni\u001B[39m\u001B[49m\u001B[22m');
 });
 
 // See https://github.com/chalk/slice-ansi/issues/24
 test('doesn\'t add extra escapes', t => {
  const output = `${chalk.black.bgYellow(' RUNS ')} ${chalk.green('test')}`;
  t.is(sliceAnsi(output, 0, 7), `${chalk.black.bgYellow(' RUNS ')} `);
  t.is(sliceAnsi(output, 0, 8), `${chalk.black.bgYellow(' RUNS ')} `);
- t.is(JSON.stringify(sliceAnsi('\u001B[31m' + output, 0, 4)), JSON.stringify(`\u001B[31m${chalk.black.bgYellow(' RUN')}`));
+ t.is(JSON.stringify(sliceAnsi('\u001B[31m' + output, 0, 4)), JSON.stringify(chalk.black.bgYellow(' RUN')));
 });
 
 // See https://github.com/chalk/slice-ansi/issues/26
 test('does not lose fullwidth characters', t => {
  t.is(sliceAnsi('古古test', 0), '古古test');
 });
 
+test('can create empty slices', t => {
+ t.is(sliceAnsi('test', 0, 0), '');
+});
+
 test.failing('slice links', t => {
  const link = '\u001B]8;;https://google.com\u0007Google\u001B]8;;\u0007';
  t.is(sliceAnsi(link, 0, 6), link);