diff --git a/HocrConverter.py b/HocrConverter.py index 7cf9206..33e2451 100755 --- a/HocrConverter.py +++ b/HocrConverter.py @@ -421,6 +421,8 @@ def to_pdf(self, imageFileNames, outFileName, fontname="Courier", fontsize=12, w textContent = unicodedata.normalize("NFC",unicode(" ".join([elem for elem in map((lambda text: text.strip()),line.itertext()) if len(elem) > 0]))) else: textContent = line.text + if ( textContent == None ): + textContent = line.findtext("%sstrong"%(self.xmlns)) if ( textContent == None ): textContent = u"" textContent = textContent.rstrip()