@@ -17802,6 +17802,14 @@ def get_tessdata(tessdata=None):
1780217802 # Try to locate the tesseract-ocr installation.
1780317803
1780417804 import subprocess
17805+
17806+ cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True)
17807+ if cp.returncode == 0:
17808+ m = re.search('List of available languages in "(.+)"', cp.stdout)
17809+ if m:
17810+ tessdata = m.group(1)
17811+ return tessdata
17812+
1780517813 # Windows systems:
1780617814 if sys.platform == "win32":
1780717815 cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True)
@@ -17816,20 +17824,27 @@ def get_tessdata(tessdata=None):
1781617824 raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder")
1781717825
1781817826 # Unix-like systems:
17819- cp = subprocess.run("whereis tesseract-ocr", shell=1, capture_output=1, check=0, text=True)
17820- response = cp.stdout.strip().split()
17821- if cp.returncode or len(response) != 2: # if not 2 tokens: no tesseract-ocr
17822- raise RuntimeError("No tessdata specified and Tesseract is not installed")
17823-
17824- # search tessdata in folder structure
17825- dirname = response[1] # contains tesseract-ocr installation folder
17826- pattern = f"{dirname}/*/tessdata"
17827- tessdatas = glob.glob(pattern)
17828- tessdatas.sort()
17829- if tessdatas:
17830- return tessdatas[-1]
17827+ attempts = list()
17828+ for path in 'tesseract-ocr', 'tesseract':
17829+ cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True)
17830+ if cp.returncode == 0:
17831+ response = cp.stdout.strip().split()
17832+ if len(response) == 2:
17833+ # search tessdata in folder structure
17834+ dirname = response[1] # contains tesseract-ocr installation folder
17835+ pattern = f"{dirname}/*/tessdata"
17836+ attempts.append(pattern)
17837+ tessdatas = glob.glob(pattern)
17838+ tessdatas.sort()
17839+ if tessdatas:
17840+ return tessdatas[-1]
17841+ if attempts:
17842+ text = 'No tessdata specified and no match for:\n'
17843+ for attempt in attempts:
17844+ text += f' {attempt}'
17845+ raise RuntimeError(text)
1783117846 else:
17832- raise RuntimeError(" No tessdata specified and Tesseract installation has no {pattern} folder." )
17847+ raise RuntimeError(' No tessdata specified and Tesseract is not installed' )
1783317848
1783417849
1783517850def css_for_pymupdf_font(
0 commit comments