Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(getStaticValue): string regex functions #82

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"watch": "warun \"{src,test}/**/*.mjs\" -- npm run -s test:mocha"
},
"dependencies": {
"@eslint-community/regexpp": "^4.9.1",
"eslint-visitor-keys": "^3.4.3"
},
"devDependencies": {
Expand Down
98 changes: 86 additions & 12 deletions src/get-static-value.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* globals globalThis, global, self, window */

import { findVariable } from "./find-variable.mjs"
import { isSafeRegex } from "./safe-regex.mjs"

const globalObject =
typeof globalThis !== "undefined"
Expand All @@ -13,6 +14,8 @@ const globalObject =
? global
: {}

class DangerousCallError extends Error {}

const builtinNames = Object.freeze(
new Set([
"Array",
Expand Down Expand Up @@ -169,6 +172,14 @@ const callPassThrough = new Set([
Object.preventExtensions,
Object.seal,
])
/** @type {ReadonlyMap<Function, ReplaceFn<unknown, unknown>>} */
const callReplacement = new Map([
checkArgs(String.prototype.match, checkSafeSearchValue),
checkArgs(String.prototype.matchAll, checkSafeSearchValue),
checkArgs(String.prototype.replace, checkSafeSearchValue),
checkArgs(String.prototype.replaceAll, checkSafeSearchValue),
checkArgs(String.prototype.split, checkSafeSearchValue),
])

/** @type {ReadonlyArray<readonly [Function, ReadonlySet<string>]>} */
const getterAllowed = [
Expand All @@ -190,6 +201,47 @@ const getterAllowed = [
[Set, new Set(["size"])],
]

/**
* @typedef {(thisArg: T, args: unknown[], original: (this: T, ...args: unknown[]) => R) => R} ReplaceFn
* @template T
* @template R
*/

/**
* A helper function that creates an entry for the given function.
* @param {T} fn
* @param {(args: unknown[]) => void} checkFn
* @returns {[T, ReplaceFn<unknown, ReturnType<T>>]}
* @template {Function} T
*/
function checkArgs(fn, checkFn) {
return [
fn,
(thisArg, args) => {
checkFn(args)
return fn.apply(thisArg, args)
},
]
}

/**
* Checks that the first argument is either a string or a safe regex.
* @param {unknown[]} args
*/
function checkSafeSearchValue(args) {
const searchValue = args[0]
if (typeof searchValue === "string") {
// strings are always safe search values
return
}
if (searchValue instanceof RegExp && isSafeRegex(searchValue)) {
// we verified that the regex is safe
return
}
// we were unable to verify that the search value is safe,
throw new DangerousCallError()
}

/**
* Get the property descriptor.
* @param {object} object The object to get.
Expand Down Expand Up @@ -249,6 +301,34 @@ function getElementValues(nodeList, initialScope) {
return valueList
}

/**
* Calls the given function if it is one of the allowed functions.
* @param {Function} func The function to call.
* @param {unknown} thisArg The `this` arg of the function. Use `undefined` when calling a free function.
* @param {unknown[]} args
*/
function callFunction(func, thisArg, args) {
if (callAllowed.has(func)) {
return { value: func.apply(thisArg, args) }
}
if (callPassThrough.has(func)) {
return { value: args[0] }
}

const replacement = callReplacement.get(func)
if (replacement) {
try {
return { value: replacement(thisArg, args, func) }
} catch (error) {
if (!(error instanceof DangerousCallError)) {
throw error
}
}
}

return null
}

/**
* Returns whether the given variable is never written to after initialization.
* @param {import("eslint").Scope.Variable} variable
Expand Down Expand Up @@ -363,12 +443,11 @@ const operations = Object.freeze({
if (property != null) {
const receiver = object.value
const methodName = property.value
if (callAllowed.has(receiver[methodName])) {
return { value: receiver[methodName](...args) }
}
if (callPassThrough.has(receiver[methodName])) {
return { value: args[0] }
}
return callFunction(
receiver[methodName],
receiver,
args,
)
}
}
} else {
Expand All @@ -378,12 +457,7 @@ const operations = Object.freeze({
return { value: undefined, optional: true }
}
const func = callee.value
if (callAllowed.has(func)) {
return { value: func(...args) }
}
if (callPassThrough.has(func)) {
return { value: args[0] }
}
return callFunction(func, undefined, args)
}
}
}
Expand Down
243 changes: 243 additions & 0 deletions src/safe-regex.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
import { RegExpParser } from "@eslint-community/regexpp"

/**
* Returns whether the given regex will execute in O(n) (with a decently small
* constant factor) on any string. This is a conservative check. If the check
* returns `true`, then the regex is guaranteed to be safe.
* @param {RegExp | string} regex
* @returns {boolean}
*/
export function isSafeRegex(regex) {
try {
const parser = new RegExpParser()
const ast = parser.parseLiteral(regex.toString())
const paths = maxPossiblePaths(ast.pattern, "ltr")
return paths < 100
} catch {
// can't parse regex, or there are some elements we don't support
return false
}
}

/**
* @typedef {import("@eslint-community/regexpp").AST} AST
*/

/**
* Returns the maximum number of possible paths through a given regex node.
* @param {import("@eslint-community/regexpp/ast").Element
* | import("@eslint-community/regexpp/ast").Alternative
* | import("@eslint-community/regexpp/ast").Pattern
* } n
* @param {"ltr" | "rtl"} direction The matching direction.
* @returns {number}
*/
// eslint-disable-next-line complexity
export function maxPossiblePaths(n, direction) {
switch (n.type) {
case "Alternative": {
let elements = n.elements
if (direction === "rtl") {
elements = [...elements].reverse()
}
let paths = 1
for (const e of elements) {
paths *= maxPossiblePaths(e, direction)
if (paths === 0 || paths === Infinity) {
return paths
}
}
return paths
}

case "Assertion": {
if (n.kind === "lookahead" || n.kind === "lookbehind") {
const d = n.kind === "lookahead" ? "ltr" : "rtl"
let paths = 0
for (const e of n.alternatives) {
paths += maxPossiblePaths(e, d)
}
return paths
}
// built-in assertions are always constant
return 1
}

case "Backreference":
return 1

case "Character":
case "CharacterSet":
case "CharacterClass":
case "ExpressionCharacterClass":
return getStringsInCharacters(n) + (hasNoCharacters(n) ? 0 : 1)

case "Quantifier": {
if (n.max === 0) {
return 1
}
const inner = maxPossiblePaths(n.element, direction)
if (inner === 0) {
return n.min === 0 ? 1 : 0
}
if (n.max === Infinity) {
return Infinity
}
if (inner === Infinity) {
return inner
}
const constant = inner ** n.min
if (n.min === n.max) {
return constant
}
// The {n,m} case (n!=m) is bit harder.
// Example: (a|b){2,4} is equivalent to (a|b){2}(a|b){0,2}
// To get the maximum possible paths of any x{0,p}, we first note
// that this is the same as x{0}|x|xx|xxx|...|x{p}. So the max
// paths of x{0,p} is the sum of the max paths of x{0}, x{1}, ..., x{p}.
// Let y=maxPossiblePaths(x). Then maxPossiblePaths(x{0,p}) =
// = 1 + y + y^2 + y^3 + ... y^p
// = ceil(y*(p+1)/(y-1))-1 (if y>=2)
// = p+1 (if y=1)
// = 1 (if y=0)
const p = n.max - n.min
let e
if (inner < 2) {
e = p * inner + 1
} else {
e = Math.ceil(inner ** (p + 1) / (inner - 1)) - 1
}
return constant * e
}

case "CapturingGroup":
case "Group":
case "Pattern": {
let paths = 0
for (const e of n.alternatives) {
paths += maxPossiblePaths(e, direction)
if (paths === Infinity) {
return paths
}
}
return paths
}

default:
return assertNever(n)

Check warning on line 127 in src/safe-regex.mjs

View check run for this annotation

Codecov / codecov/patch

src/safe-regex.mjs#L127

Added line #L127 was not covered by tests
}
}

/**
* Returns the worst-case (=maximum) number of string (length!=1) elements in the given character element.
* @param {import("@eslint-community/regexpp/ast").CharacterClassElement
* | import("@eslint-community/regexpp/ast").ExpressionCharacterClass["expression"]
* | import("@eslint-community/regexpp/ast").CharacterSet
* | import("@eslint-community/regexpp/ast").CharacterClass
* } n
* @returns {number}
*
* @typedef {import("@eslint-community/regexpp").AST} AST
*/
function getStringsInCharacters(n) {
switch (n.type) {
case "Character":
case "CharacterClassRange":
return 0

case "CharacterSet":
// since we can't know how many strings the set contains, we
// just assume 1000
return n.kind === "property" && n.strings ? 1000 : 0

case "ClassStringDisjunction":
return n.alternatives.filter((a) => a.elements.length !== 1).length

case "CharacterClass":
if (n.negate || !n.unicodeSets) {
return 0
}
return n.elements.reduce((a, b) => a + getStringsInCharacters(b), 0)

case "ExpressionCharacterClass":
if (n.negate) {
return 0
}
return getStringsInCharacters(n.expression)

case "ClassIntersection":
return Math.min(
getStringsInCharacters(n.left),
getStringsInCharacters(n.right),
)
case "ClassSubtraction":
return getStringsInCharacters(n.left)

default:
return assertNever(n)

Check warning on line 177 in src/safe-regex.mjs

View check run for this annotation

Codecov / codecov/patch

src/safe-regex.mjs#L177

Added line #L177 was not covered by tests
}
}

/**
* Returns `true` if the given elements does not contain any single-character
* elements. If `false` is returned, then the given element might still contain
* single-character elements.
* @param {import("@eslint-community/regexpp/ast").CharacterClassElement
* | import("@eslint-community/regexpp/ast").ExpressionCharacterClass["expression"]
* | import("@eslint-community/regexpp/ast").CharacterSet
* | import("@eslint-community/regexpp/ast").CharacterClass
* } n
* @returns {boolean}
*
* @typedef {import("@eslint-community/regexpp").AST} AST
*/
function hasNoCharacters(n) {
switch (n.type) {
case "Character":
case "CharacterClassRange":
return false

case "CharacterSet":
// while not exactly true, we'll just assume that character sets
// always contain at least one character
return false

case "ClassStringDisjunction":
return n.alternatives.every((a) => a.elements.length !== 1)

case "CharacterClass":
if (n.negate) {
// since we can't know whether the elements contains all
// characters, we have have to assume that [^not all] will
// contains at least some
return false
}
return n.elements.every(hasNoCharacters)

case "ExpressionCharacterClass":
if (n.negate) {
// since we can't know whether the expression contains all
// characters, we have have to assume that [^not all] will
// contains at least some
return false
}
return hasNoCharacters(n.expression)

case "ClassIntersection":
return hasNoCharacters(n.left) || hasNoCharacters(n.right)
case "ClassSubtraction":
return hasNoCharacters(n.left)

default:
return assertNever(n)

Check warning on line 232 in src/safe-regex.mjs

View check run for this annotation

Codecov / codecov/patch

src/safe-regex.mjs#L232

Added line #L232 was not covered by tests
}
}

/**
* A function that should never be called.
* @param {never} value
* @returns {never}
*/
function assertNever(value) {
throw new Error(`Unexpected value: ${value}`)
}

Check warning on line 243 in src/safe-regex.mjs

View check run for this annotation

Codecov / codecov/patch

src/safe-regex.mjs#L241-L243

Added lines #L241 - L243 were not covered by tests
Loading