Skip to content

Commit 6002ef0

Browse files
authored
feat: import xpath from nodriver (#141)
* feat: import xpath from nodriver * using disable_dom_agent directly in finally * merge two identical methods (less duplicate code)
1 parent 4d126f3 commit 6002ef0

File tree

3 files changed

+59
-62
lines changed

3 files changed

+59
-62
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414
- Added `speed` in `Tab.scroll_down` and `Tab.scroll_up` methods to control the scroll speed @nathanfallet
1515
- Allow to wait for promise in `Element.apply` method @nathanfallet
1616
- Added `Element.clear_input_by_deleting` to handle inputs with custom delete behavior @nathanfallet
17+
- Added `Tab.xpath` from nodriver @nathanfallet
1718

1819
### Changed
1920

tests/core/test_tab.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,19 @@ async def test_select(browser: zd.Browser):
6060
assert result.text == "Apples"
6161

6262

63+
async def test_xpath(browser: zd.Browser):
64+
tab = await browser.get(sample_file("groceries.html"))
65+
66+
results = await tab.xpath('//li[@aria-label="Apples (42)"]')
67+
68+
assert len(results) == 1
69+
result = results[0]
70+
71+
assert result is not None
72+
assert result.tag == "li"
73+
assert result.text == "Apples"
74+
75+
6376
async def test_add_handler_type_event(browser: zd.Browser):
6477
tab = await browser.get(sample_file("groceries.html"))
6578

zendriver/core/tab.py

Lines changed: 45 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,47 @@ async def select_all(
342342
await self.sleep(0.5)
343343
return items
344344

345+
async def xpath(self, xpath: str, timeout: float = 2.5) -> List[Element]: # noqa
346+
"""
347+
find elements by xpath string.
348+
if not immediately found, retries are attempted until :ref:`timeout` is reached (default 2.5 seconds).
349+
in case nothing is found, it returns an empty list. It will not raise.
350+
this timeout mechanism helps when relying on some element to appear before continuing your script.
351+
352+
353+
.. code-block:: python
354+
355+
# find all the inline scripts (script elements without src attribute)
356+
await tab.xpath('//script[not(@src)]')
357+
358+
# or here, more complex, but my personal favorite to case-insensitive text search
359+
360+
await tab.xpath('//text()[ contains( translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"),"test")]')
361+
362+
363+
:param xpath:
364+
:type xpath: str
365+
:param timeout: 2.5
366+
:type timeout: float
367+
:return:List[Element] or []
368+
:rtype:
369+
"""
370+
items: List[Element] = []
371+
try:
372+
await self.send(cdp.dom.enable(), True)
373+
items = await self.find_all(xpath, timeout=0)
374+
if not items:
375+
loop = asyncio.get_running_loop()
376+
start_time = loop.time()
377+
while not items:
378+
items = await self.find_all(xpath, timeout=0)
379+
await self.sleep(0.1)
380+
if loop.time() - start_time > timeout:
381+
break
382+
finally:
383+
await self.disable_dom_agent()
384+
return items
385+
345386
async def get(
346387
self, url="about:blank", new_tab: bool = False, new_window: bool = False
347388
):
@@ -511,6 +552,8 @@ async def find_elements_by_text(
511552

512553
await self.send(cdp.dom.discard_search_results(search_id))
513554

555+
if not node_ids:
556+
node_ids = []
514557
items = []
515558
for nid in node_ids:
516559
node = util.filter_recurse(doc, lambda n: n.node_id == nid)
@@ -589,67 +632,7 @@ async def find_element_by_text(
589632
:return:
590633
:rtype:
591634
"""
592-
doc = await self.send(cdp.dom.get_document(-1, True))
593-
text = text.strip()
594-
search_id, nresult = await self.send(cdp.dom.perform_search(text, True))
595-
596-
if nresult:
597-
node_ids = await self.send(
598-
cdp.dom.get_search_results(search_id, 0, nresult)
599-
)
600-
else:
601-
node_ids = []
602-
await self.send(cdp.dom.discard_search_results(search_id))
603-
604-
if not node_ids:
605-
node_ids = []
606-
items = []
607-
for nid in node_ids:
608-
node = util.filter_recurse(doc, lambda n: n.node_id == nid)
609-
if node is None:
610-
continue
611-
612-
try:
613-
elem = element.create(node, self, doc)
614-
except: # noqa
615-
continue
616-
if elem.node_type == 3:
617-
# if found element is a text node (which is plain text, and useless for our purpose),
618-
# we return the parent element of the node (which is often a tag which can have text between their
619-
# opening and closing tags (that is most tags, except for example "img" and "video", "br")
620-
621-
if not elem.parent:
622-
# check if parent actually has a parent and update it to be absolutely sure
623-
await elem.update()
624-
625-
items.append(
626-
elem.parent or elem
627-
) # when it really has no parent, use the text node itself
628-
continue
629-
else:
630-
# just add the element itself
631-
items.append(elem)
632-
633-
# since we already fetched the entire doc, including shadow and frames
634-
# let's also search through the iframes
635-
iframes = util.filter_recurse_all(doc, lambda node: node.node_name == "IFRAME")
636-
if iframes:
637-
iframes_elems = [
638-
element.create(iframe, self, iframe.content_document)
639-
for iframe in iframes
640-
]
641-
for iframe_elem in iframes_elems:
642-
iframe_text_nodes = util.filter_recurse_all(
643-
iframe_elem,
644-
lambda node: node.node_type == 3 # noqa
645-
and text.lower() in node.node_value.lower(),
646-
)
647-
if iframe_text_nodes:
648-
iframe_text_elems = [
649-
element.create(text_node, self, iframe_elem.tree)
650-
for text_node in iframe_text_nodes
651-
]
652-
items.extend(text_node.parent for text_node in iframe_text_elems)
635+
items = await self.find_elements_by_text(text)
653636
try:
654637
if not items:
655638
return None
@@ -666,7 +649,7 @@ async def find_element_by_text(
666649
if elem:
667650
return elem
668651
finally:
669-
await self.disable_dom_agent()
652+
pass
670653

671654
return None
672655

0 commit comments

Comments
 (0)