11import collections
22import string
3- from typing import Dict , List
3+ from typing import Dict , List , Optional
44from selenium .webdriver .remote .webelement import WebElement
55
66
7- def data_from_row (row : WebElement , cell_tag = "td" ) -> List [str ]:
7+ def data_from_row (row : WebElement , cell_tag = "td" , cell_xpath = None ) -> List [str ]:
88 """Extract data from a row and return it as a list.
99
1010 Args:
1111 row (WebElement): The row element.
1212 cell_tag (str, optional): The HTML tag associated with the row cells. Defaults to "td".
13+ cell_xpath (str, optional): The XPath expression associated with the row cels. Defaults to None.
14+ If informed, overwrites the `cell_tag` definition.
1315
1416 Returns:
1517 list: List of strings with the contents.
1618 """
19+ if cell_xpath :
20+ return [
21+ col .text for col in row .find_elements_by_xpath (cell_xpath )
22+ ]
23+
1724 return [
1825 col .text for col in row .find_elements_by_tag_name (cell_tag )
1926 ]
@@ -48,14 +55,17 @@ def sanitize_header(labels: List[str]):
4855
4956
5057def table_to_dict (table : WebElement , has_header : bool = True ,
51- skip_rows : int = 0 , header_tag : str = "th" ) -> List [Dict ]:
58+ skip_rows : int = 0 , header_tag : str = "th" ,
59+ cell_xpath : Optional [str ] = None ) -> List [Dict ]:
5260 """Convert a table WebElement to a dict of lists.
5361
5462 Args:
5563 table (WebElement): The table element.
5664 has_header (bool, optional): Whether or not to parse a header. Defaults to True.
5765 skip_rows (int, optional): Number of rows to skip from the top. Defaults to 0.
5866 header_tag (str, optional): The HTML tag associated with the header cell. Defaults to "th".
67+ cell_xpath (str, optional): Optional cell XPath selector for complex row constructions.
68+ If `cell_xpath` is not informed, the row data will come from `<td>` elements.
5969
6070 Returns:
6171 list: List with dict for each row.
@@ -68,6 +78,10 @@ def table_to_dict(table: WebElement, has_header: bool = True,
6878 if skip_rows :
6979 rows = rows [skip_rows :]
7080
81+ if cell_xpath and not cell_xpath .startswith ('.' ):
82+ # Convert into relative xpath
83+ cell_xpath = f'.{ cell_xpath } '
84+
7185 # Parse header labels
7286 if has_header :
7387 # Read header labels
@@ -78,13 +92,18 @@ def table_to_dict(table: WebElement, has_header: bool = True,
7892 rows = rows [1 :]
7993 else :
8094 # Make up header labels
81- num_cols = len (rows [0 ].find_elements_by_tag_name ("td" ))
95+ if cell_xpath :
96+ cols = rows [0 ].find_elements_by_xpath (cell_xpath )
97+ else :
98+ cols = rows [0 ].find_elements_by_tag_name ("td" )
99+
100+ num_cols = len (cols )
82101 labels = [f"col_{ i } " for i in range (num_cols )]
83102
84103 # Assemble output dictionary
85104 out_list = []
86105 for row in rows :
87- row_data = data_from_row (row )
106+ row_data = data_from_row (row , cell_xpath = cell_xpath )
88107 out_list .append (dict (zip (labels , row_data )))
89108
90109 return out_list
0 commit comments