KatalonStudio 7.4 tesseract (OCR) not work as it works in IntelliJ

this will work fine in IntelliJ

package com.read.image;

import java.io.File;
import net.sourceforge.tess4j.*;

public class ReadImageText {

public static void main(String[] args) throws TesseractException {

    ReadText();

}

public static void ReadText() throws TesseractException {

    File image = new File("C:\\Users\\fitim\\IdeaProjects\\JideaProjects\\src\\main\\java\\com\\read.image\\eurotext.png");
    Tesseract tesseract = new Tesseract();
    tesseract.setDatapath("C:/Users/fitim/IdeaProjects/JideaProjects/src/main/tessdata");
    tesseract.setLanguage("eng");
    tesseract.setPageSegMode(1);
    tesseract.setOcrEngineMode(1);
    String result = tesseract.doOCR(image);
    System.out.println(result);
}

}

“C:\Program Files\Java\jdk1.8.0_241\bin\java.exe” “-javaagent:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2019.3.1\lib\idea_rt.jar=52817:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2019.3.1\bin” -Dfile.encoding=UTF-8 -classpath “C:\Program Files\Java\jdk1.8.0_241\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_241\jre\lib\rt.jar;C:\Users\fitim\IdeaProjects\JideaProjects\target\classes;C:\Users\fitim.m2\repository\org\codehaus\groovy\groovy-json\2.4.4\groovy-json-2.4.4.jar;C:\Users\fitim.m2\repository\org\codehaus\groovy\groovy\2.4.4\groovy-2.4.4.jar;C:\Users\fitim.m2\repository\net\sourceforge\tess4j\tess4j\4.5.1\tess4j-4.5.1.jar;C:\Users\fitim.m2\repository\net\java\dev\jna\jna\5.5.0\jna-5.5.0.jar;C:\Users\fitim.m2\repository\com\github\jai-imageio\jai-imageio-core\1.4.0\jai-imageio-core-1.4.0.jar;C:\Users\fitim.m2\repository\org\ghost4j\ghost4j\1.0.1\ghost4j-1.0.1.jar;C:\Users\fitim.m2\repository\log4j\log4j\1.2.17\log4j-1.2.17.jar;C:\Users\fitim.m2\repository\commons-beanutils\commons-beanutils\1.9.2\commons-beanutils-1.9.2.jar;C:\Users\fitim.m2\repository\org\apache\xmlgraphics\xmlgraphics-commons\1.4\xmlgraphics-commons-1.4.jar;C:\Users\fitim.m2\repository\com\lowagie\itext\2.1.7\itext-2.1.7.jar;C:\Users\fitim.m2\repository\org\apache\pdfbox\pdfbox-tools\2.0.18\pdfbox-tools-2.0.18.jar;C:\Users\fitim.m2\repository\org\apache\pdfbox\pdfbox-debugger\2.0.18\pdfbox-debugger-2.0.18.jar;C:\Users\fitim.m2\repository\org\apache\pdfbox\jbig2-imageio\3.0.3\jbig2-imageio-3.0.3.jar;C:\Users\fitim.m2\repository\commons-io\commons-io\2.6\commons-io-2.6.jar;C:\Users\fitim.m2\repository\net\sourceforge\lept4j\lept4j\1.13.0\lept4j-1.13.0.jar;C:\Users\fitim.m2\repository\org\jboss\jboss-vfs\3.2.15.Final\jboss-vfs-3.2.15.Final.jar;C:\Users\fitim.m2\repository\org\jboss\logging\jboss-logging\3.1.4.GA\jboss-logging-3.1.4.GA.jar;C:\Users\fitim.m2\repository\ch\qos\logback\logback-classic\1.2.3\logback-classic-1.2.3.jar;C:\Users\fitim.m2\repository\ch\qos\logback\logback-core\1.2.3\logback-core-1.2.3.jar;C:\Users\fitim.m2\repository\org\slf4j\slf4j-api\1.7.25\slf4j-api-1.7.25.jar;C:\Users\fitim.m2\repository\org\slf4j\jul-to-slf4j\1.7.30\jul-to-slf4j-1.7.30.jar;C:\Users\fitim.m2\repository\org\slf4j\jcl-over-slf4j\1.7.30\jcl-over-slf4j-1.7.30.jar;C:\Users\fitim.m2\repository\org\slf4j\log4j-over-slf4j\1.7.30\log4j-over-slf4j-1.7.30.jar;C:\Users\fitim.m2\repository\com\googlecode\json-simple\json-simple\1.1.1\json-simple-1.1.1.jar;C:\Users\fitim.m2\repository\junit\junit\4.10\junit-4.10.jar;C:\Users\fitim.m2\repository\org\hamcrest\hamcrest-core\1.1\hamcrest-core-1.1.jar;C:\Users\fitim.m2\repository\org\apache\pdfbox\pdfbox\2.0.18\pdfbox-2.0.18.jar;C:\Users\fitim.m2\repository\org\apache\pdfbox\fontbox\2.0.18\fontbox-2.0.18.jar;C:\Users\fitim.m2\repository\commons-logging\commons-logging\1.2\commons-logging-1.2.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-chrome-driver\3.141.59\selenium-chrome-driver-3.141.59.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-api\3.141.59\selenium-api-3.141.59.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-remote-driver\3.141.59\selenium-remote-driver-3.141.59.jar;C:\Users\fitim.m2\repository\org\apache\commons\commons-exec\1.3\commons-exec-1.3.jar;C:\Users\fitim.m2\repository\com\google\guava\guava\25.0-jre\guava-25.0-jre.jar;C:\Users\fitim.m2\repository\com\google\code\findbugs\jsr305\1.3.9\jsr305-1.3.9.jar;C:\Users\fitim.m2\repository\org\checkerframework\checker-compat-qual\2.0.0\checker-compat-qual-2.0.0.jar;C:\Users\fitim.m2\repository\com\google\errorprone\error_prone_annotations\2.1.3\error_prone_annotations-2.1.3.jar;C:\Users\fitim.m2\repository\com\google\j2objc\j2objc-annotations\1.1\j2objc-annotations-1.1.jar;C:\Users\fitim.m2\repository\org\codehaus\mojo\animal-sniffer-annotations\1.14\animal-sniffer-annotations-1.14.jar;C:\Users\fitim.m2\repository\com\squareup\okhttp3\okhttp\3.11.0\okhttp-3.11.0.jar;C:\Users\fitim.m2\repository\com\squareup\okio\okio\1.14.0\okio-1.14.0.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-java\3.141.59\selenium-java-3.141.59.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-edge-driver\3.141.59\selenium-edge-driver-3.141.59.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-firefox-driver\3.141.59\selenium-firefox-driver-3.141.59.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-ie-driver\3.141.59\selenium-ie-driver-3.141.59.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-opera-driver\3.141.59\selenium-opera-driver-3.141.59.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-safari-driver\3.141.59\selenium-safari-driver-3.141.59.jar;C:\Users\fitim.m2\repository\org\seleniumhq\selenium\selenium-support\3.141.59\selenium-support-3.141.59.jar;C:\Users\fitim.m2\repository\com\profesorfalken\jPowerShell\3.1.1\jPowerShell-3.1.1.jar;C:\Users\fitim.m2\repository\org\robotframework\javalib-core\1.2.1\javalib-core-1.2.1.jar;C:\Users\fitim.m2\repository\commons-collections\commons-collections\3.2\commons-collections-3.2.jar;C:\Users\fitim.m2\repository\com\thoughtworks\paranamer\paranamer\1.1.2\paranamer-1.1.2.jar;C:\Users\fitim.m2\repository\org\hamcrest\hamcrest-all\1.3\hamcrest-all-1.3.jar;C:\Users\fitim.m2\repository\net\bytebuddy\byte-buddy\1.10.7\byte-buddy-1.10.7.jar” com.read.image.ReadImageText
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 377
The (quick) [brown] {fox} jumps!
Over the $43,456.78 #90 dog
& duck/goose, as 12.5% of E-mail
from aspammer@website.com is spam.
Der ,schnelle” braune Fuchs springt
iiber den faulen Hund. Le renard brun
«rapide» saute par-dessus le chien
paresseux. La volpe marrone rapida
salta sopra il cane pigro. El zorro
marron rapido salta sobre el perro
perezoso. A raposa marrom ripida
salta sobre o cdo preguigoso.

Process finished with exit code 0

but when tried in KatalonStudio getting error

2020-05-14 21:59:54.119 ERROR c.k.katalon.core.main.TestCaseExecutor - :x: Test Cases/ReadImageTxt/readImageText FAILED.
Reason:
java.lang.Error: Invalid memory access
at com.sun.jna.Native.invokePointer(Native Method)
at com.sun.jna.Function.invokePointer(Function.java:470)
at com.sun.jna.Function.invoke(Function.java:404)
at com.sun.jna.Function.invoke(Function.java:315)
at com.sun.jna.Library$Handler.invoke(Library.java:212)
at com.sun.proxy.$Proxy10.TessBaseAPIGetUTF8Text(Unknown Source)
at net.sourceforge.tess4j.Tesseract.getOCRText(Tesseract.java:437)
at net.sourceforge.tess4j.Tesseract.doOCR(Tesseract.java:292)
at net.sourceforge.tess4j.Tesseract.doOCR(Tesseract.java:213)
at net.sourceforge.tess4j.Tesseract.doOCR(Tesseract.java:197)
at net.sourceforge.tess4j.ITesseract$doOCR$3.call(Unknown Source)
at ReadImageText.ReadText(Script1589480965982.groovy:41)
at ReadImageText$ReadText.call(Unknown Source)
at readImageText.run(readImageText:23)
at com.kms.katalon.core.main.ScriptEngine.run(ScriptEngine.java:194)
at com.kms.katalon.core.main.ScriptEngine.runScriptAsRawText(ScriptEngine.java:119)
at com.kms.katalon.core.main.TestCaseExecutor.runScript(TestCaseExecutor.java:337)
at com.kms.katalon.core.main.TestCaseExecutor.doExecute(TestCaseExecutor.java:328)
at com.kms.katalon.core.main.TestCaseExecutor.processExecutionPhase(TestCaseExecutor.java:307)
at com.kms.katalon.core.main.TestCaseExecutor.accessMainPhase(TestCaseExecutor.java:299)
at com.kms.katalon.core.main.TestCaseExecutor.execute(TestCaseExecutor.java:233)
at com.kms.katalon.core.main.TestCaseMain.runTestCase(TestCaseMain.java:114)
at com.kms.katalon.core.main.TestCaseMain$runTestCase$0.call(Unknown Source)
at TempTestCase1589482771562.run(TempTestCase1589482771562.groovy:25)

@kazurayam, hi do you have any tips for that issue?

I do not know tesseact at all.

But I can read the message and found that the program tried to call OS native binary module and failed.

I looked at the project site

http://tess4j.sourceforge.net/usage.html

It says

The Windows native libraries were built with VS2019 and therefore depend on the Visual C++ 2019 Redistributable Packages. Use Visual C++ 2017 Redistributable for Tess4J 4.1.x as they are built using VS2017.
The Linux shared object library ( libtesseract.so ) equivalent to the DLL can be installed or built from the source with the instructions given in Tesseract Wiki.

This description tells that some configuraiton efforts required. No more idea I have.

hi, thanks for your quick resp,
I wonder why in same Win machine with IntelliJ no issues
This is related to somehow to KS

Similar to this?

https://groups.google.com/forum/#!topic/tesseract-ocr/5VaXcfo8F0Q

thanks,
need to check image size
but why IntelliJ will accept as it is (size)
this is still weird for me

JDK version, could be different?

hi,

these are my java paths
C:\Users\fitim>for %i in (javac.exe) do @echo. %~$PATH:i
C:\Program Files\Java\jdk-14.0.1\bin\javac.exe

C:\Users\fitim>dir /b /s java.exe
C:\Users\fitim.IdeaIC2019.3\system\tmp\patch-update\jre\bin\java.exe
C:\Users\fitim.katalon\7.4.2\Katalon_Studio_Engine_Windows_64-7.4.2\jre\bin\java.exe
C:\Users\fitim.PyCharmCE2019.3\system\tmp\patch-update\jre\bin\java.exe
C:\Users\fitim\Google Drive\Katalon\KatalonStudio\jre\bin\java.exe
C:\Users\fitim\katalon\Katalon_Studio_Engine_Windows_64-7.2.5\jre\bin\java.exe
C:\Users\fitim\katalon\Katalon_Studio_Windows_64-7.2.5\jre\bin\java.exe

is there own JDK in a KatalonStudio?

@ThanhTo could you give some support why KS not work but IntelliJ will work?

Your Intellij IDEA project and your Katalon Studio project — both would be difficult for other people to reproduce. If you want to ask about those project, you should create the distributables of them and share it.

ok,

got it to work with python in KatalonStudio

2020-05-15 16:55:24.611 INFO com.kms.katalon.core.util.KeywordUtil - {“keyword”:“keywords.get_image_text”,“keywordArgs”:[“C:\Users\fitim\KatalonProjectFromGit\KatalonProject\Include\tessdata\eurotext.png”],“outputPath”:“C:\Users\fitim\AppData\Local\Temp\ks_py_output_3401877788599434501.tmp”}
2020-05-15 16:55:24.668 INFO com.kms.katalon.core.util.KeywordUtil - Project directory C:/Users/fitim/KatalonProjectFromGit/KatalonProject
2020-05-15 16:55:34.634 INFO com.kms.katalon.core.util.KeywordUtil - DEBUGThe (quick) [brown] {fox} jumps!
2020-05-15 16:55:34.636 INFO com.kms.katalon.core.util.KeywordUtil - Over the $43,456.78 #90 dog
2020-05-15 16:55:34.638 INFO com.kms.katalon.core.util.KeywordUtil - & duck/goose, as 12.5% of E-mail
2020-05-15 16:55:34.638 INFO com.kms.katalon.core.util.KeywordUtil - from aspammer@website.com is spam.
2020-05-15 16:55:34.654 INFO com.kms.katalon.core.util.KeywordUtil - Der ,schnelle� braune Fuchs springt
2020-05-15 16:55:34.654 INFO com.kms.katalon.core.util.KeywordUtil - iiber den faulen Hund. Le renard brun
2020-05-15 16:55:34.654 INFO com.kms.katalon.core.util.KeywordUtil - �rapide� saute par-dessus le chien
2020-05-15 16:55:34.654 INFO com.kms.katalon.core.util.KeywordUtil - paresseux. La volpe marrone rapida
2020-05-15 16:55:34.654 INFO com.kms.katalon.core.util.KeywordUtil - salta sopra il cane pigro. El zorro
2020-05-15 16:55:34.654 INFO com.kms.katalon.core.util.KeywordUtil - marron rapido salta sobre el perro
2020-05-15 16:55:34.654 INFO com.kms.katalon.core.util.KeywordUtil - perezoso. A raposa marrom ripida
2020-05-15 16:55:34.654 INFO com.kms.katalon.core.util.KeywordUtil - salta sobre o cdo preguigoso.
2020-05-15 16:55:34.669 INFO com.kms.katalon.core.util.KeywordUtil - Finish Python keyword execution

thanks for your supports :slight_smile:

and if it’s more clearer what there was done
image which text is read

and result KS used to read it
DEBUGThe (quick) [brown] {fox} jumps!
2020-05-15 17:17:44.707 DEBUG testcase.passToPython - 1: println(a)
Over the $43,456.78 #90 dog
2020-05-15 17:17:44.707 DEBUG testcase.passToPython - 1: println(a)
& duck/goose, as 12.5% of E-mail
2020-05-15 17:17:44.723 DEBUG testcase.passToPython - 1: println(a)
from aspammer@website.com is spam.
2020-05-15 17:17:44.733 DEBUG testcase.passToPython - 1: println(a)
Der ,schnelle� braune Fuchs springt
2020-05-15 17:17:44.733 DEBUG testcase.passToPython - 1: println(a)
iiber den faulen Hund. Le renard brun
2020-05-15 17:17:44.733 DEBUG testcase.passToPython - 1: println(a)
�rapide� saute par-dessus le chien
2020-05-15 17:17:44.749 DEBUG testcase.passToPython - 1: println(a)
paresseux. La volpe marrone rapida
2020-05-15 17:17:44.749 DEBUG testcase.passToPython - 1: println(a)
salta sopra il cane pigro. El zorro
2020-05-15 17:17:44.764 DEBUG testcase.passToPython - 1: println(a)
marron rapido salta sobre el perro
2020-05-15 17:17:44.764 DEBUG testcase.passToPython - 1: println(a)
perezoso. A raposa marrom ripida
2020-05-15 17:17:44.764 DEBUG testcase.passToPython - 1: println(a)
salta sobre o cdo preguigoso.

Hi @kazurayam,
here more info about KS Java issue

TESTCASE
def ret = CustomKeywords.'com.tesseract.image.TesseractUtil.readImageText'()
print ret


when debug this
def ret = CustomKeywords.'com.tesseract.image.TesseractUtil.readImageText'()
cursor jump to line 64
in class
public class CustomKeywordDelegatingMetaClass extends DelegatingMetaClass {
        } catch (Throwable throwable) {
          errorCollector.addError(throwable);

and simple example keyword

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import static org.junit.Assert.*;


public class TesseractUtil {
	
	@Keyword
	public String readImageText(){
		
		ITesseract instance;

		String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the";
		
		def datapath = "C:\\Users\\fitim\\KatalonProjectFromGit\\KatalonProject\\Include\\tessdata"
		def filepath = "C:\\Users\\fitim\\KatalonProjectFromGit\\KatalonProject\\Include\\tessdata\\eurotext.png"
		
		instance = new Tesseract();
		instance.setDatapath(datapath);
		
		File image = new File(filepath);
		
		instance.setOcrEngineMode(1);
		
		String expResult = expOCRResult;
		
		String result = instance.doOCR(image); //cursor jump here to line 64 in class CustomKeywordDelegatingMetaClass
		print(result);
		
		assertEquals(expResult, result.substring(0, expResult.length()));
		return result
	}
}

testing will need download files eng.traineddata and osd.traineddata would be in project folder tessdata

here all documentation


https://github.com/tesseract-ocr/tessdata/blob/master/osd.traineddata
https://github.com/tesseract-ocr/tessdata/blob/master/eng.traineddata

and testimage could be

and
IntelliJ project is here

1 Like

I think you are done and satisfied with running tesseract with python in KS. Still do you want me to do anything? I am not so much …

hi,
not sure if I can add java version to WebUI block
at least Python keyword not work inside WebUI block

my goal is take screenshot after each test step and read the imagetext and verify it to expected word(s)

Hello, thank you.
Can you please share the KS project too. Thanks in advance.