Skip to content

Commit ce2e19e

Browse files
BUG: fix related to saving files in print_pdf method - Issue #20
1 parent 007455b commit ce2e19e

File tree

3 files changed

+72
-6
lines changed

3 files changed

+72
-6
lines changed

botcity/web/bot.py

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import base64
22
import functools
3+
import glob
34
import io
45
import json
56
import logging
@@ -972,18 +973,29 @@ def print_pdf(self, path=None, print_options=None):
972973
"""
973974
title = self.page_title() or "document"
974975
timeout = 60000
975-
if not self.page_title():
976-
timeout = 1000
977976
default_path = os.path.expanduser(os.path.join(self.download_folder_path, f"{title}.pdf"))
978977

979978
if self.browser in [Browser.CHROME, Browser.EDGE] and not self.headless:
979+
pdf_current_count = self.check_file_count(file_extension=".pdf")
980980
# Chrome still does not support headless webdriver print
981981
# but Firefox does.
982982
self.execute_javascript("window.print();")
983+
983984
# We need to wait for the file to be available in this case.
984-
self.wait_for_file(default_path, timeout=timeout)
985+
if self.page_title():
986+
self.wait_for_file(default_path, timeout=timeout)
987+
else:
988+
# Waiting when the file don't have the page title in path
989+
self.wait_for_new_pdf(pdf_current_count, timeout=timeout)
990+
991+
# Move the downloaded pdf file if the path is not None
992+
if path:
993+
last_downloaded_pdf = self.return_last_created_file(self.download_folder_path, ".pdf")
994+
os.rename(last_downloaded_pdf, path)
995+
return path
996+
self.wait(2000)
985997
return default_path
986-
998+
987999
if print_options is None:
9881000
print_options = {
9891001
'landscape': False,
@@ -1853,3 +1865,55 @@ def wait_for_file(self, path, timeout=60000):
18531865
if os.path.isfile(path) and os.access(path, os.R_OK):
18541866
return True
18551867
self.sleep(config.DEFAULT_SLEEP_AFTER_ACTION)
1868+
1869+
def return_last_created_file(self, path=None, file_extension=""):
1870+
"""Returns the last created file in a specific folder path.
1871+
1872+
Args:
1873+
path (str, optional): The path of the folder where the file is expected. Defaults to None.
1874+
file_extension (str, optional): The extension of the file to be searched for (e.g., .pdf, .txt).
1875+
1876+
Returns:
1877+
str: the path of the last created file
1878+
"""
1879+
if not path:
1880+
path = self.download_folder_path
1881+
1882+
files_path = glob.glob(os.path.expanduser(os.path.join(path, f"*{file_extension}")))
1883+
last_created_file = max(files_path, key=os.path.getctime)
1884+
return last_created_file
1885+
1886+
def check_file_count(self, path=None, file_extension=""):
1887+
"""Get the total number of files of the same type.
1888+
1889+
Args:
1890+
path (str, optional): The path of the folder where the files are saved.
1891+
file_extension (str, optional): The extension of the files to be searched for (e.g., .pdf, .txt).
1892+
1893+
Returns:
1894+
int: the number of files of the given type
1895+
"""
1896+
if not path:
1897+
path = self.download_folder_path
1898+
1899+
files_path = glob.glob(os.path.expanduser(os.path.join(path, f"*{file_extension}")))
1900+
return len(files_path)
1901+
1902+
def wait_for_new_pdf(self, current_count=0, timeout=60000):
1903+
"""
1904+
Wait for a new pdf file to be available on download folder path.
1905+
1906+
Args:
1907+
current_count (int): The current number of pdf files in the folder. Defaults to 0 files
1908+
timeout (int, optional): Maximum wait time (ms) to search for a hit.
1909+
Defaults to 60000ms (60s).
1910+
"""
1911+
pdf_count = 0
1912+
wait_time = 0
1913+
while pdf_count != current_count + 1:
1914+
self.wait(2000)
1915+
pdf_count = self.check_file_count(file_extension=".pdf")
1916+
1917+
wait_time = wait_time + 2000
1918+
if wait_time >= timeout:
1919+
break

botcity/web/browsers/chrome.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ def default_options(headless=False, download_folder_path=None, user_data_dir=Non
8383
},
8484
"safebrowsing.enabled": True,
8585
"credentials_enable_service": False,
86-
"profile.password_manager_enabled": False
86+
"profile.password_manager_enabled": False,
87+
"plugins.always_open_pdf_externally": True
8788
}
8889

8990
chrome_options.add_experimental_option("prefs", prefs)

botcity/web/browsers/edge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ def default_options(headless=False, download_folder_path=None, user_data_dir=Non
8686
},
8787
"safebrowsing.enabled": True,
8888
"credentials_enable_service": False,
89-
"profile.password_manager_enabled": False
89+
"profile.password_manager_enabled": False,
90+
"plugins.always_open_pdf_externally": True
9091
}
9192

9293
edge_options.add_experimental_option("prefs", prefs)

0 commit comments

Comments
 (0)