Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve performance, alternative approach (+149% throughput and more) #38

Merged
merged 2 commits into from Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
209 changes: 134 additions & 75 deletions index.js
@@ -1,105 +1,164 @@
import isFullwidthCodePoint from 'is-fullwidth-code-point';
import ansiStyles from 'ansi-styles';
import isFullwidthCodePoint from 'is-fullwidth-code-point';

const astralRegex = /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/;
// \x1b and \x9b
const ESCAPES = new Set([27, 155]);

const ESCAPES = [
'\u001B',
'\u009B'
];
const CHAR_CODE_0 = '0'.charCodeAt(0);
const CHAR_CODE_9 = '9'.charCodeAt(0);

const wrapAnsi = code => `${ESCAPES[0]}[${code}m`;
const endCodesSet = new Set();
const endCodesMap = new Map();
for (const [start, end] of ansiStyles.codes) {
endCodesSet.add(ansiStyles.color.ansi(end));
endCodesMap.set(ansiStyles.color.ansi(start), ansiStyles.color.ansi(end));
}

const checkAnsi = (ansiCodes, isEscapes, endAnsiCode) => {
let output = [];
ansiCodes = [...ansiCodes];
function getEndCode(code) {
if (endCodesSet.has(code)) {
return code;
}

for (let ansiCode of ansiCodes) {
const ansiCodeOrigin = ansiCode;
if (ansiCode.includes(';')) {
ansiCode = ansiCode.split(';')[0][0] + '0';
}
if (endCodesMap.has(code)) {
return endCodesMap.get(code);
}

const item = ansiStyles.codes.get(Number.parseInt(ansiCode, 10));
if (item) {
const indexEscape = ansiCodes.indexOf(item.toString());
if (indexEscape === -1) {
output.push(wrapAnsi(isEscapes ? item : ansiCodeOrigin));
} else {
ansiCodes.splice(indexEscape, 1);
}
} else if (isEscapes) {
output.push(wrapAnsi(0));
break;
} else {
output.push(wrapAnsi(ansiCodeOrigin));
}
code = code.slice(2);
if (code.includes(';')) {
code = code[0] + '0';
}

if (isEscapes) {
output = output.filter((element, index) => output.indexOf(element) === index);
const returnValue = ansiStyles.codes.get(Number.parseInt(code, 10));
if (returnValue) {
return ansiStyles.color.ansi(returnValue);
}

return ansiStyles.reset.open;
}

if (endAnsiCode !== undefined) {
const fistEscapeCode = wrapAnsi(ansiStyles.codes.get(Number.parseInt(endAnsiCode, 10)));
// TODO: Remove the use of `.reduce` here.
// eslint-disable-next-line unicorn/no-array-reduce
output = output.reduce((current, next) => next === fistEscapeCode ? [next, ...current] : [...current, next], []);
function findNumberIndex(string) {
for (let index = 0; index < string.length; index++) {
const charCode = string.charCodeAt(index);
if (charCode >= CHAR_CODE_0 && charCode <= CHAR_CODE_9) {
return index;
}
}

return output.join('');
};
return -1;
}

export default function sliceAnsi(string, begin, end) {
const characters = [...string];
const ansiCodes = [];
function parseAnsiCode(string, offset) {
string = string.slice(offset, offset + 19);
const startIndex = findNumberIndex(string);
if (startIndex !== -1) {
let endIndex = string.indexOf('m', startIndex);
if (endIndex === -1) {
endIndex = string.length;
}

let stringEnd = typeof end === 'number' ? end : characters.length;
let isInsideEscape = false;
let ansiCode;
let visible = 0;
let output = '';
return string.slice(0, endIndex + 1);
}
}

for (const [index, character] of characters.entries()) {
let leftEscape = false;
function tokenize(string, endChar = Number.POSITIVE_INFINITY) {
const returnValue = [];

let index = 0;
let visibleCount = 0;
while (index < string.length) {
const codePoint = string.codePointAt(index);

if (ESCAPES.has(codePoint)) {
const code = parseAnsiCode(string, index);
if (code) {
returnValue.push({
type: 'ansi',
code,
endCode: getEndCode(code)
});
index += code.length;
continue;
}
}

if (ESCAPES.includes(character)) {
const code = /\d[^m]*/.exec(string.slice(index, index + 18));
ansiCode = code && code.length > 0 ? code[0] : undefined;
const isFullWidth = isFullwidthCodePoint(codePoint);
const character = String.fromCodePoint(codePoint);

returnValue.push({
type: 'character',
value: character,
isFullWidth
});
index += character.length;
visibleCount += isFullWidth ? 2 : character.length;
if (visibleCount >= endChar) {
break;
}
}

if (visible < stringEnd) {
isInsideEscape = true;
return returnValue;
}

if (ansiCode !== undefined) {
ansiCodes.push(ansiCode);
}
}
} else if (isInsideEscape && character === 'm') {
isInsideEscape = false;
leftEscape = true;
function reduceAnsiCodes(codes) {
let returnValue = [];
for (const code of codes) {
if (code.code === ansiStyles.reset.open) {
// Reset code, disable all codes
returnValue = [];
} else if (endCodesSet.has(code.code)) {
// This is an end code, disable all matching start codes
returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.code);
} else {
// This is a start code. Disable all styles this "overrides", then enable it
returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.endCode);
returnValue.push(code);
}
}

return returnValue;
}

if (!isInsideEscape && !leftEscape) {
visible++;
function undoAnsiCodes(codes) {
const reduced = reduceAnsiCodes(codes);
const endCodes = reduced.map(({endCode}) => endCode);
return endCodes.reverse().join('');
}

export default function sliceAnsi(string, begin, end) {
const tokens = tokenize(string, end);
let activeCodes = [];
let position = 0;
let returnValue = '';
let include = false;

for (const token of tokens) {
if (end !== undefined && position >= end) {
break;
}

if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt())) {
visible++;
if (token.type === 'ansi') {
activeCodes.push(token);
if (include) {
returnValue += token.code;
}
} else {
// Char
if (!include && position >= begin) {
include = true;
// Simplify active codes
activeCodes = reduceAnsiCodes(activeCodes);
returnValue = activeCodes.map(({code}) => code).join('');
}

if (typeof end !== 'number') {
stringEnd++;
if (include) {
returnValue += token.value;
}
}

if (visible > begin && visible <= stringEnd) {
output += character;
} else if (visible === begin && !isInsideEscape && ansiCode !== undefined) {
output = checkAnsi(ansiCodes);
} else if (visible >= stringEnd) {
output += checkAnsi(ansiCodes, true, ansiCode);
break;
position += token.isFullWidth ? 2 : token.value.length;
}
}

return output;
// Disable active codes at the end
returnValue += undoAnsiCodes(activeCodes);
return returnValue;
}
8 changes: 6 additions & 2 deletions test.js
Expand Up @@ -83,22 +83,26 @@ test('weird null issue', t => {
});

test('support true color escape sequences', t => {
t.is(sliceAnsi('\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0municorn\u001B[39m\u001B[49m\u001B[22m', 0, 3), '\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0muni\u001B[22m\u001B[49m\u001B[39m');
t.is(sliceAnsi('\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0municorn\u001B[39m\u001B[49m\u001B[22m', 0, 3), '\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0muni\u001B[39m\u001B[49m\u001B[22m');
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after this change, the start codes get undone in the same order as in the input string.

});

// See https://github.com/chalk/slice-ansi/issues/24
test('doesn\'t add extra escapes', t => {
const output = `${chalk.black.bgYellow(' RUNS ')} ${chalk.green('test')}`;
t.is(sliceAnsi(output, 0, 7), `${chalk.black.bgYellow(' RUNS ')} `);
t.is(sliceAnsi(output, 0, 8), `${chalk.black.bgYellow(' RUNS ')} `);
t.is(JSON.stringify(sliceAnsi('\u001B[31m' + output, 0, 4)), JSON.stringify(`\u001B[31m${chalk.black.bgYellow(' RUN')}`));
t.is(JSON.stringify(sliceAnsi('\u001B[31m' + output, 0, 4)), JSON.stringify(chalk.black.bgYellow(' RUN')));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ANSI code for yellow is unnecessary in the output, because it immediately gets overwritten with black

});

// See https://github.com/chalk/slice-ansi/issues/26
test('does not lose fullwidth characters', t => {
t.is(sliceAnsi('古古test', 0), '古古test');
});

test('can create empty slices', t => {
t.is(sliceAnsi('test', 0, 0), '');
});

test.failing('slice links', t => {
const link = '\u001B]8;;https://google.com\u0007Google\u001B]8;;\u0007';
t.is(sliceAnsi(link, 0, 6), link);
Expand Down