Skip to content

fix: support CSS selectors in targeted extract #797

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 43 additions & 4 deletions lib/a11y/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -311,15 +311,15 @@ export async function getAccessibilityTree(
let nodes = fullNodes;

if (selector) {
const objectId = await resolveObjectIdForXPath(page, selector);
const objectId = await resolveObjectIdForSelector(page, selector);

const { node } = await page.sendCDP<{
node: { backendNodeId: number };
}>("DOM.describeNode", { objectId: objectId });

if (!node?.backendNodeId) {
throw new StagehandDomProcessError(
`Unable to resolve backendNodeId for XPath "${selector}"`,
`Unable to resolve backendNodeId for selector "${selector}"`,
);
}

Expand All @@ -328,7 +328,7 @@ export async function getAccessibilityTree(
);
if (!target) {
throw new StagehandDomProcessError(
`No AX node found for backendNodeId ${node.backendNodeId} (XPath "${selector}")`,
`No AX node found for backendNodeId ${node.backendNodeId} (selector "${selector}")`,
);
}

Expand Down Expand Up @@ -424,7 +424,7 @@ export async function getAccessibilityTree(
* - During each iteration, we call `Runtime.evaluate` to run `document.evaluate(...)`
* with each XPath, obtaining a `RemoteObject` reference if it exists.
* - Then, for each valid object reference, we call `DOM.describeNode` to retrieve
* the elements `backendNodeId`.
* the element's `backendNodeId`.
* - Collects all resulting `backendNodeId`s in a Set and returns them.
*
* @param stagehandPage - A StagehandPage instance with built-in CDP helpers.
Expand Down Expand Up @@ -464,6 +464,45 @@ export async function findScrollableElementIds(
return scrollableBackendIds;
}

/**
* Resolve a selector (XPath or CSS) to a Chrome-DevTools-Protocol (CDP) remote-object ID.
*
* @param page A StagehandPage (or Playwright.Page with .sendCDP)
* @param selector An XPath (with xpath= prefix) or CSS selector
* @returns The remote objectId for the matched node, or null
*/
export async function resolveObjectIdForSelector(
page: StagehandPage,
selector: string,
): Promise<string | null> {
const expression = selector.startsWith("xpath=")
? `
(function () {
const res = document.evaluate(
${JSON.stringify(selector.substring(6))},
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
);
return res.singleNodeValue;
})();
`
: `document.querySelector(${JSON.stringify(selector)})`;

const { result } = await page.sendCDP<{
result?: { objectId?: string };
}>("Runtime.evaluate", {
expression,
returnByValue: false,
});

if (!result?.objectId) {
throw new StagehandElementNotFoundError([selector]);
}
return result.objectId;
}

/**
* Resolve an XPath to a Chrome-DevTools-Protocol (CDP) remote-object ID.
*
Expand Down
3 changes: 1 addition & 2 deletions lib/handlers/extractHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,10 @@ export class StagehandExtractHandler {
});

await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs);
const targetXpath = selector?.replace(/^xpath=/, "") ?? "";
const tree = await getAccessibilityTree(
this.stagehandPage,
this.logger,
targetXpath,
selector,
);
this.logger({
category: "extraction",
Expand Down