Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: tokenize input and operate on analyzed array
- Loading branch information
Showing
2 changed files
with
136 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,105 +1,160 @@ | ||
import isFullwidthCodePoint from 'is-fullwidth-code-point'; | ||
import ansiStyles from 'ansi-styles'; | ||
import isFullwidthCodePoint from 'is-fullwidth-code-point'; | ||
|
||
const astralRegex = /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/; | ||
const ESCAPES = new Set([27, 155]); | ||
|
||
const ESCAPES = [ | ||
'\u001B', | ||
'\u009B' | ||
]; | ||
const endCodesSet = new Set(); | ||
const endCodesMap = new Map(); | ||
for (const [start, end] of ansiStyles.codes) { | ||
endCodesSet.add(ansiStyles.color.ansi(end)); | ||
endCodesMap.set(ansiStyles.color.ansi(start), ansiStyles.color.ansi(end)); | ||
} | ||
|
||
const wrapAnsi = code => `${ESCAPES[0]}[${code}m`; | ||
function getEndCode(code) { | ||
if (endCodesSet.has(code)) { | ||
return code; | ||
} | ||
|
||
const checkAnsi = (ansiCodes, isEscapes, endAnsiCode) => { | ||
let output = []; | ||
ansiCodes = [...ansiCodes]; | ||
if (endCodesMap.has(code)) { | ||
return endCodesMap.get(code); | ||
} | ||
|
||
for (let ansiCode of ansiCodes) { | ||
const ansiCodeOrigin = ansiCode; | ||
if (ansiCode.includes(';')) { | ||
ansiCode = ansiCode.split(';')[0][0] + '0'; | ||
} | ||
code = code.slice(2); | ||
if (code.includes(';')) { | ||
code = code[0] + '0'; | ||
} | ||
|
||
const item = ansiStyles.codes.get(Number.parseInt(ansiCode, 10)); | ||
if (item) { | ||
const indexEscape = ansiCodes.indexOf(item.toString()); | ||
if (indexEscape === -1) { | ||
output.push(wrapAnsi(isEscapes ? item : ansiCodeOrigin)); | ||
} else { | ||
ansiCodes.splice(indexEscape, 1); | ||
} | ||
} else if (isEscapes) { | ||
output.push(wrapAnsi(0)); | ||
break; | ||
} else { | ||
output.push(wrapAnsi(ansiCodeOrigin)); | ||
} | ||
const returnValue = ansiStyles.codes.get(Number.parseInt(code, 10)); | ||
if (returnValue) { | ||
return ansiStyles.color.ansi(returnValue); | ||
} | ||
|
||
if (isEscapes) { | ||
output = output.filter((element, index) => output.indexOf(element) === index); | ||
return ansiStyles.reset.open; | ||
} | ||
|
||
if (endAnsiCode !== undefined) { | ||
const fistEscapeCode = wrapAnsi(ansiStyles.codes.get(Number.parseInt(endAnsiCode, 10))); | ||
// TODO: Remove the use of `.reduce` here. | ||
// eslint-disable-next-line unicorn/no-array-reduce | ||
output = output.reduce((current, next) => next === fistEscapeCode ? [next, ...current] : [...current, next], []); | ||
function findNumberIndex(string) { | ||
for (let index = 0; index < string.length; index++) { | ||
const charCode = string.charCodeAt(index); | ||
if (charCode >= 48 && charCode <= 57) { | ||
return index; | ||
} | ||
} | ||
|
||
return output.join(''); | ||
}; | ||
|
||
export default function sliceAnsi(string, begin, end) { | ||
const characters = [...string]; | ||
const ansiCodes = []; | ||
|
||
let stringEnd = typeof end === 'number' ? end : characters.length; | ||
let isInsideEscape = false; | ||
let ansiCode; | ||
let visible = 0; | ||
let output = ''; | ||
return -1; | ||
} | ||
|
||
for (const [index, character] of characters.entries()) { | ||
let leftEscape = false; | ||
function parseAnsiCode(string, offset) { | ||
string = string.slice(offset, offset + 19); | ||
const startIndex = findNumberIndex(string); | ||
if (startIndex !== -1) { | ||
let endIndex = string.indexOf('m', startIndex); | ||
if (endIndex === -1) { | ||
endIndex = string.length; | ||
} | ||
|
||
if (ESCAPES.includes(character)) { | ||
const code = /\d[^m]*/.exec(string.slice(index, index + 18)); | ||
ansiCode = code && code.length > 0 ? code[0] : undefined; | ||
return string.slice(0, endIndex + 1); | ||
} | ||
} | ||
|
||
if (visible < stringEnd) { | ||
isInsideEscape = true; | ||
function tokenize(string, endChar = Number.POSITIVE_INFINITY) { | ||
const returnValue = []; | ||
|
||
if (ansiCode !== undefined) { | ||
ansiCodes.push(ansiCode); | ||
} | ||
let index = 0; | ||
let visible = 0; | ||
while (index < string.length) { | ||
const codePoint = string.codePointAt(index); | ||
|
||
if (ESCAPES.has(codePoint)) { | ||
const code = parseAnsiCode(string, index); | ||
if (code) { | ||
returnValue.push({ | ||
type: 'ansi', | ||
code, | ||
endCode: getEndCode(code) | ||
}); | ||
index += code.length; | ||
continue; | ||
} | ||
} else if (isInsideEscape && character === 'm') { | ||
isInsideEscape = false; | ||
leftEscape = true; | ||
} | ||
|
||
if (!isInsideEscape && !leftEscape) { | ||
visible++; | ||
const fullWidth = isFullwidthCodePoint(codePoint); | ||
const character = String.fromCodePoint(codePoint); | ||
|
||
returnValue.push({ | ||
type: 'char', | ||
value: character, | ||
fullWidth | ||
}); | ||
index += character.length; | ||
visible += fullWidth ? 2 : character.length; | ||
if (visible >= endChar) { | ||
break; | ||
} | ||
} | ||
|
||
if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt())) { | ||
visible++; | ||
return returnValue; | ||
} | ||
|
||
if (typeof end !== 'number') { | ||
stringEnd++; | ||
} | ||
function reduceAnsiCodes(codes) { | ||
let returnValue = []; | ||
for (const code of codes) { | ||
if (code.code === ansiStyles.reset.open) { | ||
// Reset code, disable all codes | ||
returnValue = []; | ||
} else if (endCodesSet.has(code.code)) { | ||
// This is an end code, disable all matching start codes | ||
returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.code); | ||
} else { | ||
// This is a start code. Disable all styles this "overrides", then enable it | ||
returnValue = returnValue.filter(returnValueCode => returnValueCode.endCode !== code.endCode); | ||
returnValue.push(code); | ||
} | ||
} | ||
|
||
return returnValue; | ||
} | ||
|
||
function undoAnsiCodes(codes) { | ||
const reduced = reduceAnsiCodes(codes); | ||
const endCodes = reduced.map(({endCode}) => endCode); | ||
return endCodes.reverse().join(''); | ||
} | ||
|
||
if (visible > begin && visible <= stringEnd) { | ||
output += character; | ||
} else if (visible === begin && !isInsideEscape && ansiCode !== undefined) { | ||
output = checkAnsi(ansiCodes); | ||
} else if (visible >= stringEnd) { | ||
output += checkAnsi(ansiCodes, true, ansiCode); | ||
export default function sliceAnsi(string, begin, end) { | ||
const tokens = tokenize(string, end); | ||
let activeCodes = []; | ||
let pos = 0; | ||
let returnValue = ''; | ||
let include = false; | ||
|
||
for (const token of tokens) { | ||
if (end !== undefined && pos >= end) { | ||
break; | ||
} | ||
|
||
if (token.type === 'ansi') { | ||
activeCodes.push(token); | ||
if (include) { | ||
returnValue += token.code; | ||
} | ||
} else { | ||
// Char | ||
if (!include && pos >= begin) { | ||
include = true; | ||
// Simplify active codes | ||
activeCodes = reduceAnsiCodes(activeCodes); | ||
returnValue = activeCodes.map(({code}) => code).join(''); | ||
} | ||
|
||
if (include) { | ||
returnValue += token.value; | ||
} | ||
|
||
pos += token.fullWidth ? 2 : token.value.length; | ||
} | ||
} | ||
|
||
return output; | ||
// Disable active codes at the end | ||
returnValue += undoAnsiCodes(activeCodes); | ||
return returnValue; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters