Skip to content

Commit

Permalink
perf: tokenize input and operate on analyzed array
Browse files Browse the repository at this point in the history
  • Loading branch information
AlCalzone committed Mar 20, 2023
1 parent d9f402c commit 5a7f251
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 77 deletions.
205 changes: 130 additions & 75 deletions index.js
@@ -1,105 +1,160 @@
import isFullwidthCodePoint from 'is-fullwidth-code-point';
import ansiStyles from 'ansi-styles';
import isFullwidthCodePoint from 'is-fullwidth-code-point';

const astralRegex = /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/;
const ESCAPES = new Set([27, 155]);

const ESCAPES = [
'\u001B',
'\u009B'
];
const endCodesSet = new Set();
const endCodesMap = new Map();
for (const [start, end] of ansiStyles.codes) {
endCodesSet.add(ansiStyles.color.ansi(end));
endCodesMap.set(ansiStyles.color.ansi(start), ansiStyles.color.ansi(end));
}

const wrapAnsi = code => `${ESCAPES[0]}[${code}m`;
function getEndCode(code) {
if (endCodesSet.has(code)) {
return code;
}

const checkAnsi = (ansiCodes, isEscapes, endAnsiCode) => {
let output = [];
ansiCodes = [...ansiCodes];
if (endCodesMap.has(code)) {
return endCodesMap.get(code);
}

for (let ansiCode of ansiCodes) {
const ansiCodeOrigin = ansiCode;
if (ansiCode.includes(';')) {
ansiCode = ansiCode.split(';')[0][0] + '0';
}
code = code.slice(2);
if (code.includes(';')) {
code = code[0] + '0';
}

const item = ansiStyles.codes.get(Number.parseInt(ansiCode, 10));
if (item) {
const indexEscape = ansiCodes.indexOf(item.toString());
if (indexEscape === -1) {
output.push(wrapAnsi(isEscapes ? item : ansiCodeOrigin));
} else {
ansiCodes.splice(indexEscape, 1);
}
} else if (isEscapes) {
output.push(wrapAnsi(0));
break;
} else {
output.push(wrapAnsi(ansiCodeOrigin));
}
const returnValue = ansiStyles.codes.get(Number.parseInt(code, 10));
if (returnValue) {
return ansiStyles.color.ansi(returnValue);
}

if (isEscapes) {
output = output.filter((element, index) => output.indexOf(element) === index);
return ansiStyles.reset.open;
}

if (endAnsiCode !== undefined) {
const fistEscapeCode = wrapAnsi(ansiStyles.codes.get(Number.parseInt(endAnsiCode, 10)));
// TODO: Remove the use of `.reduce` here.
// eslint-disable-next-line unicorn/no-array-reduce
output = output.reduce((current, next) => next === fistEscapeCode ? [next, ...current] : [...current, next], []);
function findNumberIndex(string) {
for (let index = 0; index < string.length; index++) {
const charCode = string.charCodeAt(index);
if (charCode >= 48 && charCode <= 57) {
return index;
}
}

return output.join('');
};

export default function sliceAnsi(string, begin, end) {
const characters = [...string];
const ansiCodes = [];

let stringEnd = typeof end === 'number' ? end : characters.length;
let isInsideEscape = false;
let ansiCode;
let visible = 0;
let output = '';
return -1;
}

for (const [index, character] of characters.entries()) {
let leftEscape = false;
function parseAnsiCode(string, offset) {
string = string.slice(offset, offset + 19);
const startIndex = findNumberIndex(string);
if (startIndex !== -1) {
let endIndex = string.indexOf('m', startIndex);
if (endIndex === -1) {
endIndex = string.length;
}

if (ESCAPES.includes(character)) {
const code = /\d[^m]*/.exec(string.slice(index, index + 18));
ansiCode = code && code.length > 0 ? code[0] : undefined;
return string.slice(0, endIndex + 1);
}
}

if (visible < stringEnd) {
isInsideEscape = true;
function tokenize(string, endChar = Number.POSITIVE_INFINITY) {
const returnValue = [];

if (ansiCode !== undefined) {
ansiCodes.push(ansiCode);
}
let index = 0;
let visible = 0;
while (index < string.length) {
const codePoint = string.codePointAt(index);

if (ESCAPES.has(codePoint)) {
const code = parseAnsiCode(string, index);
if (code) {
returnValue.push({
type: 'ansi',
code,
endCode: getEndCode(code)
});
index += code.length;
continue;
}
} else if (isInsideEscape && character === 'm') {
isInsideEscape = false;
leftEscape = true;
}

if (!isInsideEscape && !leftEscape) {
visible++;
const fullWidth = isFullwidthCodePoint(codePoint);
const character = String.fromCodePoint(codePoint);

returnValue.push({
type: 'char',
value: character,
fullWidth
});
index += character.length;
visible += fullWidth ? 2 : character.length;
if (visible >= endChar) {
break;
}
}

if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt())) {
visible++;
return returnValue;
}

if (typeof end !== 'number') {
stringEnd++;
}
function reduceAnsiCodes(codes) {
let returnValue = [];
for (const code of codes) {
if (code.code === ansiStyles.reset.open) {
// Reset code, disable all codes
returnValue = [];
} else if (endCodesSet.has(code.code)) {
// This is an end code, disable all matching start codes
returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.code);
} else {
// This is a start code. Disable all styles this "overrides", then enable it
returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.endCode);
returnValue.push(code);
}
}

return returnValue;
}

function undoAnsiCodes(codes) {
const reduced = reduceAnsiCodes(codes);
const endCodes = reduced.map(({endCode}) => endCode);
return endCodes.reverse().join('');
}

if (visible > begin && visible <= stringEnd) {
output += character;
} else if (visible === begin && !isInsideEscape && ansiCode !== undefined) {
output = checkAnsi(ansiCodes);
} else if (visible >= stringEnd) {
output += checkAnsi(ansiCodes, true, ansiCode);
export default function sliceAnsi(string, begin, end) {
const tokens = tokenize(string, end);
let activeCodes = [];
let pos = 0;
let returnValue = '';
let include = false;

for (const token of tokens) {
if (end !== undefined && pos >= end) {
break;
}

if (token.type === 'ansi') {
activeCodes.push(token);
if (include) {
returnValue += token.code;
}
} else {
// Char
if (!include && pos >= begin) {
include = true;
// Simplify active codes
activeCodes = reduceAnsiCodes(activeCodes);
returnValue = activeCodes.map(({code}) => code).join('');
}

if (include) {
returnValue += token.value;
}

pos += token.fullWidth ? 2 : token.value.length;
}
}

return output;
// Disable active codes at the end
returnValue += undoAnsiCodes(activeCodes);
return returnValue;
}
8 changes: 6 additions & 2 deletions test.js
Expand Up @@ -83,22 +83,26 @@ test('weird null issue', t => {
});

test('support true color escape sequences', t => {
t.is(sliceAnsi('\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0municorn\u001B[39m\u001B[49m\u001B[22m', 0, 3), '\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0muni\u001B[22m\u001B[49m\u001B[39m');
t.is(sliceAnsi('\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0municorn\u001B[39m\u001B[49m\u001B[22m', 0, 3), '\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0muni\u001B[39m\u001B[49m\u001B[22m');
});

// See https://github.com/chalk/slice-ansi/issues/24
test('doesn\'t add extra escapes', t => {
const output = `${chalk.black.bgYellow(' RUNS ')} ${chalk.green('test')}`;
t.is(sliceAnsi(output, 0, 7), `${chalk.black.bgYellow(' RUNS ')} `);
t.is(sliceAnsi(output, 0, 8), `${chalk.black.bgYellow(' RUNS ')} `);
t.is(JSON.stringify(sliceAnsi('\u001B[31m' + output, 0, 4)), JSON.stringify(`\u001B[31m${chalk.black.bgYellow(' RUN')}`));
t.is(JSON.stringify(sliceAnsi('\u001B[31m' + output, 0, 4)), JSON.stringify(chalk.black.bgYellow(' RUN')));
});

// See https://github.com/chalk/slice-ansi/issues/26
test('does not lose fullwidth characters', t => {
t.is(sliceAnsi('古古test', 0), '古古test');
});

test('can create empty slices', t => {
t.is(sliceAnsi('test', 0, 0), '');
});

test.failing('slice links', t => {
const link = '\u001B]8;;https://google.com\u0007Google\u001B]8;;\u0007';
t.is(sliceAnsi(link, 0, 6), link);
Expand Down

0 comments on commit 5a7f251

Please sign in to comment.