diff --git a/src/main/java/com/jaeksoft/searchlib/parser/htmlParser/HtmlCleanerParser.java b/src/main/java/com/jaeksoft/searchlib/parser/htmlParser/HtmlCleanerParser.java index 2afacd09e..cecd47e19 100644 --- a/src/main/java/com/jaeksoft/searchlib/parser/htmlParser/HtmlCleanerParser.java +++ b/src/main/java/com/jaeksoft/searchlib/parser/htmlParser/HtmlCleanerParser.java @@ -80,6 +80,9 @@ protected HtmlNodeAbstract getDocument(String pageSource) private DomHtmlNode getDomHtmlNode() throws ParserConfigurationException { Document document = new DomSerializer(cleaner.getProperties(), true) .createDOM(rootTagNode); + String lang = rootTagNode.getAttributeByName("lang"); + if (lang != null) + document.getDocumentElement().setAttribute("lang", lang); return new DomHtmlNode(document); }