Skip to content

Commit 51984b2

Browse files
authored
Merge pull request #24 from joao-voltarelli/fix/master/print_pdf_fix
BUG: fix related to saving files in print_pdf method - Issue #20
2 parents 389c9b3 + cad5f41 commit 51984b2

File tree

3 files changed

+79
-5
lines changed

3 files changed

+79
-5
lines changed

botcity/web/bot.py

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import base64
22
import functools
3+
import glob
34
import io
45
import json
56
import logging
@@ -973,16 +974,27 @@ def print_pdf(self, path=None, print_options=None):
973974
"""
974975
title = self.page_title() or "document"
975976
timeout = 60000
976-
if not self.page_title():
977-
timeout = 1000
978977
default_path = os.path.expanduser(os.path.join(self.download_folder_path, f"{title}.pdf"))
979978

980979
if self.browser in [Browser.CHROME, Browser.EDGE] and not self.headless:
980+
pdf_current_count = self.get_file_count(file_extension=".pdf")
981981
# Chrome still does not support headless webdriver print
982982
# but Firefox does.
983983
self.execute_javascript("window.print();")
984+
984985
# We need to wait for the file to be available in this case.
985-
self.wait_for_file(default_path, timeout=timeout)
986+
if self.page_title():
987+
self.wait_for_file(default_path, timeout=timeout)
988+
else:
989+
# Waiting when the file don't have the page title in path
990+
self.wait_for_new_file(file_extension=".pdf", current_count=pdf_current_count)
991+
992+
# Move the downloaded pdf file if the path is not None
993+
if path:
994+
last_downloaded_pdf = self.get_last_created_file(self.download_folder_path, ".pdf")
995+
os.rename(last_downloaded_pdf, path)
996+
return path
997+
self.wait(2000)
986998
return default_path
987999

9881000
if print_options is None:
@@ -1858,3 +1870,63 @@ def wait_for_file(self, path, timeout=60000):
18581870
continue
18591871
return True
18601872
self.sleep(config.DEFAULT_SLEEP_AFTER_ACTION)
1873+
1874+
def get_last_created_file(self, path=None, file_extension=""):
1875+
"""Returns the last created file in a specific folder path.
1876+
1877+
Args:
1878+
path (str, optional): The path of the folder where the file is expected. Defaults to None.
1879+
file_extension (str, optional): The extension of the file to be searched for (e.g., .pdf, .txt).
1880+
1881+
Returns:
1882+
str: the path of the last created file
1883+
"""
1884+
if not path:
1885+
path = self.download_folder_path
1886+
1887+
files_path = glob.glob(os.path.expanduser(os.path.join(path, f"*{file_extension}")))
1888+
last_created_file = max(files_path, key=os.path.getctime)
1889+
return last_created_file
1890+
1891+
def get_file_count(self, path=None, file_extension=""):
1892+
"""Get the total number of files of the same type.
1893+
1894+
Args:
1895+
path (str, optional): The path of the folder where the files are saved.
1896+
file_extension (str, optional): The extension of the files to be searched for (e.g., .pdf, .txt).
1897+
1898+
Returns:
1899+
int: the number of files of the given type
1900+
"""
1901+
if not path:
1902+
path = self.download_folder_path
1903+
1904+
files_path = glob.glob(os.path.expanduser(os.path.join(path, f"*{file_extension}")))
1905+
return len(files_path)
1906+
1907+
def wait_for_new_file(self, path=None, file_extension="", current_count=0, timeout=60000):
1908+
"""
1909+
Wait for a new file to be available on disk without the file path.
1910+
1911+
Args:
1912+
path (str, optional): The path of the folder where the file is expected. Defaults to None.
1913+
file_extension (str, optional): The extension of the file to be searched for (e.g., .pdf, .txt).
1914+
current_count (int): The current number of files in the folder of the given type. Defaults to 0 files
1915+
timeout (int, optional): Maximum wait time (ms) to search for a hit.
1916+
Defaults to 60000ms (60s).
1917+
1918+
Returns:
1919+
str: the path of the last created file of the given type
1920+
"""
1921+
if not path:
1922+
path = self.download_folder_path
1923+
1924+
start_time = time.time()
1925+
while True:
1926+
elapsed_time = (time.time() - start_time) * 1000
1927+
if elapsed_time > timeout:
1928+
return None
1929+
pdf_count = self.get_file_count(path, f"*{file_extension}")
1930+
if pdf_count == current_count + 1:
1931+
return self.get_last_created_file(path, f"*{file_extension}")
1932+
self.sleep(config.DEFAULT_SLEEP_AFTER_ACTION)

botcity/web/browsers/chrome.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ def default_options(headless=False, download_folder_path=None, user_data_dir=Non
8383
},
8484
"safebrowsing.enabled": True,
8585
"credentials_enable_service": False,
86-
"profile.password_manager_enabled": False
86+
"profile.password_manager_enabled": False,
87+
"plugins.always_open_pdf_externally": True
8788
}
8889

8990
chrome_options.add_experimental_option("prefs", prefs)

botcity/web/browsers/edge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ def default_options(headless=False, download_folder_path=None, user_data_dir=Non
8686
},
8787
"safebrowsing.enabled": True,
8888
"credentials_enable_service": False,
89-
"profile.password_manager_enabled": False
89+
"profile.password_manager_enabled": False,
90+
"plugins.always_open_pdf_externally": True
9091
}
9192

9293
edge_options.add_experimental_option("prefs", prefs)

0 commit comments

Comments
 (0)