Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
<version>1.3</version>
</dependency>
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>2.16</version>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.9.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
Expand Down
82 changes: 41 additions & 41 deletions src/main/java/org/opengraph/OpenGraph.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package org.opengraph;

import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;

import java.io.BufferedReader;
import java.io.InputStreamReader;
Expand All @@ -13,6 +11,13 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;



/**
* A Java object representation of an Open Graph enabled webpage.
* A simplified layer over a Hastable.
Expand Down Expand Up @@ -88,51 +93,46 @@ public OpenGraph(String url, boolean ignoreSpecErrors) throws java.io.IOExceptio
}

String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr);

// read in the declared namespaces
boolean hasOGspec = false;
TagNode headElement = pageData.findElementByName("head", true);
if (headElement.hasAttribute("prefix"))
{
String namespaceData = headElement.getAttributeByName("prefix");
Pattern pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*");
Matcher matcher = pattern.matcher(namespaceData);
while (matcher.find())
{
// parse the string HTML
Document parsedDocument = Jsoup.parse(headContentsStr);
// read in the declared namespaces
Elements headElement = parsedDocument.getElementsByTag("head");

boolean hasOGspec = false;
if (headElement.hasAttr("prefix")) {
String namespaceData = headElement.attr("prefix");
Pattern pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*");
Matcher matcher = pattern.matcher(namespaceData);
while (matcher.find()) {
String prefix = matcher.group(2);
String documentURI = matcher.group(3);
pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI));
if (prefix.equals("og"))
hasOGspec = true;
String documentURI = matcher.group(3);
pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI));
if (prefix.equals("og"))
hasOGspec = true;
}
}
}

// some pages do not include the new OG spec
// this fixes compatibility
if (!hasOGspec)
pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#"));
// some pages do not include the new OG spec
// this fixes compatibility
if (!hasOGspec)
pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#"));

// open only the meta tags
TagNode[] metaData = pageData.getElementsByName("meta", true);
for (TagNode metaElement : metaData)
{
for (OpenGraphNamespace namespace : pageNamespaces)
{
String target = null;
if (metaElement.hasAttribute("property"))
target = "property";
else if (metaElement.hasAttribute("name"))
target = "name";

if (target != null && metaElement.getAttributeByName(target).startsWith(namespace.getPrefix() + ":"))
{
setProperty(namespace, metaElement.getAttributeByName(target), metaElement.getAttributeByName("content"));
break;
}
}
Elements metaData = parsedDocument.getElementsByTag("meta");
for (Element metaElement : metaData) {
for (OpenGraphNamespace namespace : pageNamespaces) {
String target = null;
if (metaElement.hasAttr("property"))
target = "property";
else if (metaElement.hasAttr("name"))
target = "name";

if (target != null && metaElement.attr(target).startsWith(namespace.getPrefix() + ":")) {
setProperty(namespace, metaElement.attr(target), metaElement.attr("content"));
break;
}
}
}

/**
Expand Down