@@ -342,6 +342,47 @@ async def select_all(
342
342
await self .sleep (0.5 )
343
343
return items
344
344
345
+ async def xpath (self , xpath : str , timeout : float = 2.5 ) -> List [Element ]: # noqa
346
+ """
347
+ find elements by xpath string.
348
+ if not immediately found, retries are attempted until :ref:`timeout` is reached (default 2.5 seconds).
349
+ in case nothing is found, it returns an empty list. It will not raise.
350
+ this timeout mechanism helps when relying on some element to appear before continuing your script.
351
+
352
+
353
+ .. code-block:: python
354
+
355
+ # find all the inline scripts (script elements without src attribute)
356
+ await tab.xpath('//script[not(@src)]')
357
+
358
+ # or here, more complex, but my personal favorite to case-insensitive text search
359
+
360
+ await tab.xpath('//text()[ contains( translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"),"test")]')
361
+
362
+
363
+ :param xpath:
364
+ :type xpath: str
365
+ :param timeout: 2.5
366
+ :type timeout: float
367
+ :return:List[Element] or []
368
+ :rtype:
369
+ """
370
+ items : List [Element ] = []
371
+ try :
372
+ await self .send (cdp .dom .enable (), True )
373
+ items = await self .find_all (xpath , timeout = 0 )
374
+ if not items :
375
+ loop = asyncio .get_running_loop ()
376
+ start_time = loop .time ()
377
+ while not items :
378
+ items = await self .find_all (xpath , timeout = 0 )
379
+ await self .sleep (0.1 )
380
+ if loop .time () - start_time > timeout :
381
+ break
382
+ finally :
383
+ await self .disable_dom_agent ()
384
+ return items
385
+
345
386
async def get (
346
387
self , url = "about:blank" , new_tab : bool = False , new_window : bool = False
347
388
):
@@ -511,6 +552,8 @@ async def find_elements_by_text(
511
552
512
553
await self .send (cdp .dom .discard_search_results (search_id ))
513
554
555
+ if not node_ids :
556
+ node_ids = []
514
557
items = []
515
558
for nid in node_ids :
516
559
node = util .filter_recurse (doc , lambda n : n .node_id == nid )
@@ -589,67 +632,7 @@ async def find_element_by_text(
589
632
:return:
590
633
:rtype:
591
634
"""
592
- doc = await self .send (cdp .dom .get_document (- 1 , True ))
593
- text = text .strip ()
594
- search_id , nresult = await self .send (cdp .dom .perform_search (text , True ))
595
-
596
- if nresult :
597
- node_ids = await self .send (
598
- cdp .dom .get_search_results (search_id , 0 , nresult )
599
- )
600
- else :
601
- node_ids = []
602
- await self .send (cdp .dom .discard_search_results (search_id ))
603
-
604
- if not node_ids :
605
- node_ids = []
606
- items = []
607
- for nid in node_ids :
608
- node = util .filter_recurse (doc , lambda n : n .node_id == nid )
609
- if node is None :
610
- continue
611
-
612
- try :
613
- elem = element .create (node , self , doc )
614
- except : # noqa
615
- continue
616
- if elem .node_type == 3 :
617
- # if found element is a text node (which is plain text, and useless for our purpose),
618
- # we return the parent element of the node (which is often a tag which can have text between their
619
- # opening and closing tags (that is most tags, except for example "img" and "video", "br")
620
-
621
- if not elem .parent :
622
- # check if parent actually has a parent and update it to be absolutely sure
623
- await elem .update ()
624
-
625
- items .append (
626
- elem .parent or elem
627
- ) # when it really has no parent, use the text node itself
628
- continue
629
- else :
630
- # just add the element itself
631
- items .append (elem )
632
-
633
- # since we already fetched the entire doc, including shadow and frames
634
- # let's also search through the iframes
635
- iframes = util .filter_recurse_all (doc , lambda node : node .node_name == "IFRAME" )
636
- if iframes :
637
- iframes_elems = [
638
- element .create (iframe , self , iframe .content_document )
639
- for iframe in iframes
640
- ]
641
- for iframe_elem in iframes_elems :
642
- iframe_text_nodes = util .filter_recurse_all (
643
- iframe_elem ,
644
- lambda node : node .node_type == 3 # noqa
645
- and text .lower () in node .node_value .lower (),
646
- )
647
- if iframe_text_nodes :
648
- iframe_text_elems = [
649
- element .create (text_node , self , iframe_elem .tree )
650
- for text_node in iframe_text_nodes
651
- ]
652
- items .extend (text_node .parent for text_node in iframe_text_elems )
635
+ items = await self .find_elements_by_text (text )
653
636
try :
654
637
if not items :
655
638
return None
@@ -666,7 +649,7 @@ async def find_element_by_text(
666
649
if elem :
667
650
return elem
668
651
finally :
669
- await self . disable_dom_agent ()
652
+ pass
670
653
671
654
return None
672
655
0 commit comments