Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve performance, alternative approach (+149% throughput and more) #38

Merged
merged 2 commits into from Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
205 changes: 130 additions & 75 deletions index.js
@@ -1,105 +1,160 @@
import isFullwidthCodePoint from 'is-fullwidth-code-point';
import ansiStyles from 'ansi-styles';
import isFullwidthCodePoint from 'is-fullwidth-code-point';

const astralRegex = /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/;
const ESCAPES = new Set([27, 155]);

const ESCAPES = [
'\u001B',
'\u009B'
];
const endCodesSet = new Set();
const endCodesMap = new Map();
for (const [start, end] of ansiStyles.codes) {
endCodesSet.add(ansiStyles.color.ansi(end));
endCodesMap.set(ansiStyles.color.ansi(start), ansiStyles.color.ansi(end));
}

const wrapAnsi = code => `${ESCAPES[0]}[${code}m`;
function getEndCode(code) {
if (endCodesSet.has(code)) {
return code;
}

const checkAnsi = (ansiCodes, isEscapes, endAnsiCode) => {
let output = [];
ansiCodes = [...ansiCodes];
if (endCodesMap.has(code)) {
return endCodesMap.get(code);
}

for (let ansiCode of ansiCodes) {
const ansiCodeOrigin = ansiCode;
if (ansiCode.includes(';')) {
ansiCode = ansiCode.split(';')[0][0] + '0';
}
code = code.slice(2);
if (code.includes(';')) {
code = code[0] + '0';
}

const item = ansiStyles.codes.get(Number.parseInt(ansiCode, 10));
if (item) {
const indexEscape = ansiCodes.indexOf(item.toString());
if (indexEscape === -1) {
output.push(wrapAnsi(isEscapes ? item : ansiCodeOrigin));
} else {
ansiCodes.splice(indexEscape, 1);
}
} else if (isEscapes) {
output.push(wrapAnsi(0));
break;
} else {
output.push(wrapAnsi(ansiCodeOrigin));
}
const returnValue = ansiStyles.codes.get(Number.parseInt(code, 10));
if (returnValue) {
return ansiStyles.color.ansi(returnValue);
}

if (isEscapes) {
output = output.filter((element, index) => output.indexOf(element) === index);
return ansiStyles.reset.open;
}

if (endAnsiCode !== undefined) {
const fistEscapeCode = wrapAnsi(ansiStyles.codes.get(Number.parseInt(endAnsiCode, 10)));
// TODO: Remove the use of `.reduce` here.
// eslint-disable-next-line unicorn/no-array-reduce
output = output.reduce((current, next) => next === fistEscapeCode ? [next, ...current] : [...current, next], []);
function findNumberIndex(string) {
for (let index = 0; index < string.length; index++) {
const charCode = string.charCodeAt(index);
if (charCode >= 48 && charCode <= 57) {
return index;
AlCalzone marked this conversation as resolved.
Show resolved Hide resolved
}
}

return output.join('');
};

export default function sliceAnsi(string, begin, end) {
const characters = [...string];
const ansiCodes = [];

let stringEnd = typeof end === 'number' ? end : characters.length;
let isInsideEscape = false;
let ansiCode;
let visible = 0;
let output = '';
return -1;
}

for (const [index, character] of characters.entries()) {
let leftEscape = false;
function parseAnsiCode(string, offset) {
string = string.slice(offset, offset + 19);
const startIndex = findNumberIndex(string);
if (startIndex !== -1) {
let endIndex = string.indexOf('m', startIndex);
if (endIndex === -1) {
endIndex = string.length;
}

if (ESCAPES.includes(character)) {
const code = /\d[^m]*/.exec(string.slice(index, index + 18));
ansiCode = code && code.length > 0 ? code[0] : undefined;
return string.slice(0, endIndex + 1);
}
}

if (visible < stringEnd) {
isInsideEscape = true;
function tokenize(string, endChar = Number.POSITIVE_INFINITY) {
const returnValue = [];

if (ansiCode !== undefined) {
ansiCodes.push(ansiCode);
}
let index = 0;
let visible = 0;
while (index < string.length) {
AlCalzone marked this conversation as resolved.
Show resolved Hide resolved
const codePoint = string.codePointAt(index);

if (ESCAPES.has(codePoint)) {
const code = parseAnsiCode(string, index);
if (code) {
returnValue.push({
type: 'ansi',
code,
endCode: getEndCode(code)
});
index += code.length;
continue;
}
} else if (isInsideEscape && character === 'm') {
isInsideEscape = false;
leftEscape = true;
}

if (!isInsideEscape && !leftEscape) {
visible++;
const fullWidth = isFullwidthCodePoint(codePoint);
const character = String.fromCodePoint(codePoint);
AlCalzone marked this conversation as resolved.
Show resolved Hide resolved

returnValue.push({
type: 'char',
value: character,
AlCalzone marked this conversation as resolved.
Show resolved Hide resolved
fullWidth
});
index += character.length;
visible += fullWidth ? 2 : character.length;
if (visible >= endChar) {
break;
}
}

if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt())) {
visible++;
return returnValue;
}

if (typeof end !== 'number') {
stringEnd++;
}
function reduceAnsiCodes(codes) {
let returnValue = [];
for (const code of codes) {
if (code.code === ansiStyles.reset.open) {
// Reset code, disable all codes
returnValue = [];
} else if (endCodesSet.has(code.code)) {
// This is an end code, disable all matching start codes
returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.code);
} else {
// This is a start code. Disable all styles this "overrides", then enable it
returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.endCode);
returnValue.push(code);
}
}

return returnValue;
}

function undoAnsiCodes(codes) {
const reduced = reduceAnsiCodes(codes);
const endCodes = reduced.map(({endCode}) => endCode);
return endCodes.reverse().join('');
}

if (visible > begin && visible <= stringEnd) {
output += character;
} else if (visible === begin && !isInsideEscape && ansiCode !== undefined) {
output = checkAnsi(ansiCodes);
} else if (visible >= stringEnd) {
output += checkAnsi(ansiCodes, true, ansiCode);
export default function sliceAnsi(string, begin, end) {
const tokens = tokenize(string, end);
let activeCodes = [];
let pos = 0;
let returnValue = '';
AlCalzone marked this conversation as resolved.
Show resolved Hide resolved
let include = false;

for (const token of tokens) {
if (end !== undefined && pos >= end) {
break;
}

if (token.type === 'ansi') {
activeCodes.push(token);
if (include) {
returnValue += token.code;
}
} else {
// Char
if (!include && pos >= begin) {
include = true;
// Simplify active codes
activeCodes = reduceAnsiCodes(activeCodes);
returnValue = activeCodes.map(({code}) => code).join('');
}

if (include) {
returnValue += token.value;
}

pos += token.fullWidth ? 2 : token.value.length;
}
}

return output;
// Disable active codes at the end
returnValue += undoAnsiCodes(activeCodes);
return returnValue;
}
8 changes: 6 additions & 2 deletions test.js
Expand Up @@ -83,22 +83,26 @@ test('weird null issue', t => {
});

test('support true color escape sequences', t => {
t.is(sliceAnsi('\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0municorn\u001B[39m\u001B[49m\u001B[22m', 0, 3), '\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0muni\u001B[22m\u001B[49m\u001B[39m');
t.is(sliceAnsi('\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0municorn\u001B[39m\u001B[49m\u001B[22m', 0, 3), '\u001B[1m\u001B[48;2;255;255;255m\u001B[38;2;255;0;0muni\u001B[39m\u001B[49m\u001B[22m');
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after this change, the start codes get undone in the same order as in the input string.

});

// See https://github.com/chalk/slice-ansi/issues/24
test('doesn\'t add extra escapes', t => {
const output = `${chalk.black.bgYellow(' RUNS ')} ${chalk.green('test')}`;
t.is(sliceAnsi(output, 0, 7), `${chalk.black.bgYellow(' RUNS ')} `);
t.is(sliceAnsi(output, 0, 8), `${chalk.black.bgYellow(' RUNS ')} `);
t.is(JSON.stringify(sliceAnsi('\u001B[31m' + output, 0, 4)), JSON.stringify(`\u001B[31m${chalk.black.bgYellow(' RUN')}`));
t.is(JSON.stringify(sliceAnsi('\u001B[31m' + output, 0, 4)), JSON.stringify(chalk.black.bgYellow(' RUN')));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ANSI code for yellow is unnecessary in the output, because it immediately gets overwritten with black

});

// See https://github.com/chalk/slice-ansi/issues/26
test('does not lose fullwidth characters', t => {
t.is(sliceAnsi('古古test', 0), '古古test');
});

test('can create empty slices', t => {
t.is(sliceAnsi('test', 0, 0), '');
});

test.failing('slice links', t => {
const link = '\u001B]8;;https://google.com\u0007Google\u001B]8;;\u0007';
t.is(sliceAnsi(link, 0, 6), link);
Expand Down