From b7d6d959f5ac66996728740cf6d5114a630bb4a2 Mon Sep 17 00:00:00 2001 From: xulihang Date: Wed, 5 Jun 2019 19:30:50 +0800 Subject: [PATCH] use thread for PDFBOX to make the app responsive --- BasicCAT/BasicCAT.b4j | 48 +++++++++++++++--------------- BasicCAT/BasicCAT.b4j.meta | 2 +- BasicCAT/PDF2TXT.bas | 40 +++++++++++++------------ BasicCAT/Project.bas | 5 ++-- BasicCAT/languageChooser.bas | 19 ++++++++++-- BasicCAT/pdfbox.bas | 57 +++++++++++++++++++++++++----------- 6 files changed, 106 insertions(+), 65 deletions(-) diff --git a/BasicCAT/BasicCAT.b4j b/BasicCAT/BasicCAT.b4j index cce6201..86a10f8 100644 --- a/BasicCAT/BasicCAT.b4j +++ b/BasicCAT/BasicCAT.b4j @@ -207,32 +207,32 @@ Module38=Map2Xml Module39=SRX Module4=CustomListView Module40=POIWord -Module41=pdfbox -Module42=git -Module43=dictWebView -Module44=FontPicker -Module45=TermEditor -Module46=TermManager -Module47=HistoryViewer -Module48=ClientKVS -Module49=statistics +Module41=git +Module42=dictWebView +Module43=FontPicker +Module44=TermEditor +Module45=TermManager +Module46=HistoryViewer +Module47=ClientKVS +Module48=statistics +Module49=LanguagePairSelector Module5=txtFilter -Module50=LanguagePairSelector -Module51=idmlUtils -Module52=editDistance -Module53=InputBox -Module54=MTParamsFiller -Module55=filterGenericUtils -Module56=TBX -Module57=CallSubUtils -Module58=viewSegment -Module59=serverLauncher +Module50=idmlUtils +Module51=editDistance +Module52=InputBox +Module53=MTParamsFiller +Module54=filterGenericUtils +Module55=TBX +Module56=CallSubUtils +Module57=viewSegment +Module58=serverLauncher +Module59=languageChooser Module6=TM -Module60=languageChooser -Module61=TMEditor -Module62=TextFlow -Module63=opennlp -Module64=coordinate +Module60=TMEditor +Module61=TextFlow +Module62=opennlp +Module63=coordinate +Module64=pdfbox Module7=Term Module8=KeyValueStore Module9=ProjectSettings diff --git a/BasicCAT/BasicCAT.b4j.meta b/BasicCAT/BasicCAT.b4j.meta index a61b074..d0eb689 100644 --- a/BasicCAT/BasicCAT.b4j.meta +++ b/BasicCAT/BasicCAT.b4j.meta @@ -194,4 +194,4 @@ ModuleClosedNodes7= ModuleClosedNodes8= ModuleClosedNodes9= SelectedBuild=0 -VisibleModules=1,2,3,4,5,6,7,8,9,10,42,15,12,21 +VisibleModules=1,2,3,4,5,6,7,8,9,10,41,15,12,21,64,36,59 diff --git a/BasicCAT/PDF2TXT.bas b/BasicCAT/PDF2TXT.bas index b6e9978..f66e2eb 100644 --- a/BasicCAT/PDF2TXT.bas +++ b/BasicCAT/PDF2TXT.bas @@ -31,14 +31,16 @@ Sub StripButton_MouseClicked (EventData As MouseEvent) fx.Msgbox(frm,"Please choose a pdf file first.","") Return End If + Dim pdfbox1 As pdfbox + pdfbox1.Initialize(label1.Text) If IncludePageNumCheckBox.Checked Then If offsetTextField.Text="" Or PageAffixTextField.Text="" Then fx.Msgbox(frm,"Please fill affix and offset first.","") Return End If - TextArea1.text=pdfbox.stripPDFText(label1.Text,True,FacingPageCheckBox.Checked,PageAffixTextField.Text,offsetTextField.Text) + TextArea1.text=pdfbox1.stripPDFText(True,FacingPageCheckBox.Checked,PageAffixTextField.Text,offsetTextField.Text) Else - TextArea1.text=pdfbox.stripPDFText(label1.Text,False,False,"",0) + TextArea1.text=pdfbox1.stripPDFText(False,False,"",0) End If End Sub @@ -117,26 +119,23 @@ Sub ocrButton_MouseClicked (EventData As MouseEvent) Dim lc As languageChooser lc.Initialize - Dim langs As List - langs=lc.ShowAndWait - Dim langsParam As String - For Each chkBox As CheckBox In langs - If chkBox.Checked Then - langsParam=langsParam&chkBox.Text&"+" - End If - Next - If langsParam.EndsWith("+") Then - langsParam=langsParam.SubString2(0,langsParam.Length-1) - End If - Log(langsParam) + Dim langsParam As String=lc.ShowAndWait If langsParam="" Then Return End If Label2.Text="Converting pdf to images..." Dim files As List - wait for (pdfbox.getImage(File.GetFileParent(label1.Text),File.GetName(label1.Text))) complete (result As List) - files=result + files.Initialize + Dim dir As String=File.GetFileParent(label1.Text) + Dim filename As String=File.GetName(label1.Text) + Dim pdfbox1 As pdfbox + pdfbox1.Initialize(File.Combine(dir,filename)) + wait for (pdfbox1.getImageAsync()) complete (result As Object) + For i=0 To pdfbox1.PageNum-1 + files.Add(File.Combine(dir,i&".jpg")) + Next + Label2.Text="OCRing..." If IncludePageNumCheckBox.Checked Then wait for (ocrWithPagenum(files,langsParam,PageAffixTextField.Text,offsetTextField.Text)) complete (text As String) @@ -200,7 +199,8 @@ Sub ocrWithPagenum(files As List,langsParam As String,affix As String,offset As Else path="tesseract" End If - Dim content As String + Dim contentSB As StringBuilder + contentSB.Initialize Dim pdfnum As Int=0 For i=0 To files.Size-1 pdfnum=pdfnum+1 @@ -228,7 +228,9 @@ Sub ocrWithPagenum(files As List,langsParam As String,affix As String,offset As If Success And ExitCode = 0 Then Log("Success") Log(StdOut) - content=content&pageStart&CRLF&CRLF&removeLines(File.ReadString(dir,i&".txt")) + contentSB.Append(pageStart).Append(CRLF) + contentSB.Append(CRLF).Append(File.ReadString(dir,i&".txt")) + 'removeLines Else Log("Error: " & StdErr) End If @@ -236,7 +238,7 @@ Sub ocrWithPagenum(files As List,langsParam As String,affix As String,offset As Next - Return content + Return contentSB.ToString End Sub Sub ocr(files As List,langsParam As String) As ResumableSub diff --git a/BasicCAT/Project.bas b/BasicCAT/Project.bas index f2d800f..bb6f850 100644 --- a/BasicCAT/Project.bas +++ b/BasicCAT/Project.bas @@ -515,7 +515,7 @@ Public Sub commitAndPush(commitMessage As String) End If End If Else - wait for (updateLocalFileBasedonFetch(username,password,email)) Complete (success as Object) + wait for (updateLocalFileBasedonFetch(username,password,email)) Complete (success As Object) Dim diffList As List diffList=projectGit.diffList Log(diffList) @@ -575,7 +575,7 @@ Sub samelocalHeadAndRemoteHead(username As String,password As String,fetch As Bo Return result End Sub -Sub updateLocalFileBasedonFetch(username As String,password As String,email As String) as ResumableSub +Sub updateLocalFileBasedonFetch(username As String,password As String,email As String) As ResumableSub wait for (samelocalHeadAndRemoteHead(username,password,True)) Complete (isSame As Boolean) If isSame = False Then Dim localHead,remoteHead As String @@ -637,6 +637,7 @@ Sub updateLocalFileBasedonFetch(username As String,password As String,email As S End If Log("worddir,after: "&projectGit.getWorkdirPath) + return True End Sub Sub updateWorkFile(filename As String) As Boolean diff --git a/BasicCAT/languageChooser.bas b/BasicCAT/languageChooser.bas index b99a1ff..4d7c029 100644 --- a/BasicCAT/languageChooser.bas +++ b/BasicCAT/languageChooser.bas @@ -24,11 +24,26 @@ Public Sub Initialize Next End Sub -Public Sub ShowAndWait As List +Public Sub ShowAndWait As String frm.ShowAndWait - Return ListView1.Items + Dim langsParam As String + For Each chkBox As CheckBox In ListView1.Items + If chkBox.Checked Then + langsParam=langsParam&chkBox.Text&"+" + End If + Next + If langsParam.EndsWith("+") Then + langsParam=langsParam.SubString2(0,langsParam.Length-1) + End If + Log(langsParam) + Return langsParam End Sub Sub OKButton_MouseClicked (EventData As MouseEvent) frm.Close +End Sub + +Sub frm_CloseRequest (EventData As Event) + ListView1.Items.Clear + frm.Close End Sub \ No newline at end of file diff --git a/BasicCAT/pdfbox.bas b/BasicCAT/pdfbox.bas index 3dd5c1f..73bdd72 100644 --- a/BasicCAT/pdfbox.bas +++ b/BasicCAT/pdfbox.bas @@ -1,19 +1,27 @@ B4J=true Group=Default Group ModulesStructureVersion=1 -Type=StaticCode -Version=6.51 +Type=Class +Version=7.32 @EndOfDesignText@ -'Static code module -Sub Process_Globals +Sub Class_Globals Private fx As JFX + Private th As Thread + Private doc As JavaObject + Private path As String End Sub -Sub stripPDFText(filepath As String, includePageNum As Boolean,isFacingPage As Boolean,affix As String,offset As Int) As String +'Initializes the object. You can add parameters to this method if needed. +Public Sub Initialize(filePath As String) + th.Initialise("th") + path=filePath Dim PDDocument As JavaObject PDDocument.InitializeStatic("org.apache.pdfbox.pdmodel.PDDocument") - Dim doc As JavaObject - doc=PDDocument.RunMethodJO("load",Array(getFile(filepath))) + doc=PDDocument.RunMethodJO("load",Array(getFile(filePath))) +End Sub + +Public Sub stripPDFText(includePageNum As Boolean,isFacingPage As Boolean,affix As String,offset As Int) As String + Dim PDFTextStripper As JavaObject PDFTextStripper.InitializeNewInstance("org.apache.pdfbox.text.PDFTextStripper",Null) Dim pageNum As Int @@ -45,27 +53,43 @@ Sub stripPDFText(filepath As String, includePageNum As Boolean,isFacingPage As B Return text End Sub -Sub getImage(dir As String,filename As String) As ResumableSub + +Public Sub getPageNum As Int + Dim files As List + files.Initialize + SetSystemProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider") + Dim pageNum As Int + pageNum=doc.RunMethod("getNumberOfPages",Null) + Return pageNum +End Sub + +Public Sub getImageAsync As ResumableSub + th.Start(Me,"getImage",Array As Object("placeholder")) + wait for th_Ended(endedOK As Boolean, error As String) + Log(endedOK) + Return endedOK +End Sub + +Public Sub getImage(param As String) Dim files As List files.Initialize SetSystemProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider") Dim PDDocument As JavaObject PDDocument.InitializeStatic("org.apache.pdfbox.pdmodel.PDDocument") - Dim doc As JavaObject - doc=PDDocument.RunMethodJO("load",Array(getFile(File.Combine(dir,filename)))) Dim pageNum As Int pageNum=doc.RunMethod("getNumberOfPages",Null) Dim PDFRenderer As JavaObject PDFRenderer.InitializeNewInstance("org.apache.pdfbox.rendering.PDFRenderer",Array(doc)) For i=0 To pageNum-1 Log(i) - Sleep(0) - renderImageToFile(PDFRenderer,files,dir,i) + 'Sleep(0) + 'files.Add(File.Combine(dir,i&".jpg")) + renderImageToFile(PDFRenderer,File.GetFileParent(path),i) Next - Return files + 'Return files End Sub -Sub renderImageToFile(PDFRenderer As JavaObject,files As List,dir As String,i As Int) +Sub renderImageToFile(PDFRenderer As JavaObject,dir As String,i As Int) Dim bi As JavaObject Dim dpi As Float dpi=150 @@ -76,11 +100,10 @@ Sub renderImageToFile(PDFRenderer As JavaObject,files As List,dir As String,i As imageIO.InitializeStatic("javax.imageio.ImageIO") imageIO.RunMethod("write",Array(bi,"jpg",out)) out.Close - files.Add(File.Combine(dir,i&".jpg")) End Sub -Sub getFile(path As String) As JavaObject +Sub getFile(filepath As String) As JavaObject Dim fileJO As JavaObject - fileJO.InitializeNewInstance("java.io.File",Array(path)) + fileJO.InitializeNewInstance("java.io.File",Array(filepath)) Return fileJO End Sub \ No newline at end of file