Skip to content

Commit

Permalink
nokogumbo-import: restructure as a mini_portile recipe
Browse files Browse the repository at this point in the history
- move nokogumbo.c to ext/nokogiri/gumbo.c
- remove the nokogumbo extension
- treat the gumbo parser as a mini_portile recipe

I'm not 100% sure this is the right thing to do, but let's see how far
I get on cross-compiling before I give up.
  • Loading branch information
flavorjones committed Apr 26, 2021
1 parent fc5ae1c commit 0f22de1
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 26 deletions.
45 changes: 42 additions & 3 deletions ext/nokogiri/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def process_recipe(name, version, static_p, cross_p)
require 'mini_portile2'
message("Using mini_portile version #{MiniPortile::VERSION}\n")

if name != "libxml2" && name != "libxslt"
unless ["libxml2", "libxslt"].include?(name)
OTHER_LIBRARY_VERSIONS[name] = version
end

Expand Down Expand Up @@ -486,7 +486,7 @@ def process_recipe(name, version, static_p, cross_p)
end
end

message(<<~EOM)
message(<<~EOM) if name != "libgumbo"
The Nokogiri maintainers intend to provide timely security updates, but if
this is a concern for you and want to use your OS/distro system library
Expand All @@ -498,7 +498,7 @@ def process_recipe(name, version, static_p, cross_p)
EOM

message(<<~EOM) if name == 'libxml2'
Note, however, that nokogiri cannot guarantee compatiblity with every
Note, however, that nokogiri cannot guarantee compatibility with every
version of libxml2 that may be provided by OS/package vendors.
EOM
Expand Down Expand Up @@ -865,6 +865,45 @@ def compile
ensure_func("exsltFuncRegister", "libexslt/exslt.h")
end

libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_build_p) do |recipe|
recipe.configure_options = []

class << recipe
def downloaded?
true
end

def extract
target = File.join(tmp_path, "gumbo-parser")
output "Copying gumbo-parser files into #{target}..."
FileUtils.mkdir_p target
FileUtils.cp Dir.glob(File.join(PACKAGE_ROOT_DIR, "gumbo-parser/src/*")), target
end

def configured?
true
end

def install
lib_dir = File.join(port_path, "lib")
inc_dir = File.join(port_path, "include")
FileUtils.mkdir_p([lib_dir, inc_dir])
FileUtils.cp File.join(work_path, "libgumbo.a"), lib_dir
FileUtils.cp Dir.glob(File.join(work_path, "*.h")), inc_dir
end

def compile
cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
command = [make_cmd, "CC=#{gcc_cmd}", "CFLAGS=#{cflags}"]
execute("compile", command)
end
end
end
append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}")
$libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a")
$LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")]
ensure_func("gumbo_parse_with_options", "gumbo.h")

have_func('xmlHasFeature') || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21
have_func('xmlFirstElementChild') # introduced in libxml 2.7.3
have_func('xmlRelaxNGSetParserStructuredErrors') # introduced in libxml 2.6.24
Expand Down
21 changes: 8 additions & 13 deletions ext/nokogumbo/nokogumbo.c → ext/nokogiri/gumbo.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,11 @@
// requirements as Ruby objects are only built when necessary.
//

#include <assert.h>
#include <ruby.h>
#include <ruby/version.h>
#include <nokogiri.h>

#include "gumbo.h"

// class constants
static VALUE Document;
VALUE cNokogiriHtml5Document;

// Interned symbols
static ID internal_subset;
Expand Down Expand Up @@ -384,7 +381,7 @@ parse_continue(VALUE parse_args)
}
args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
build_tree(doc, (xmlNodePtr)doc, output->document);
VALUE rdoc = Nokogiri_wrap_xml_document(Document, doc);
VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc);
args->doc = NULL; // The Ruby runtime now owns doc so don't delete it.
add_errors(output, rdoc, args->input, args->url_or_frag);
return rdoc;
Expand Down Expand Up @@ -591,21 +588,19 @@ fragment_continue(VALUE parse_args)

// Initialize the Nokogumbo class and fetch constants we will use later.
void
Init_nokogumbo()
noko_init_gumbo()
{
// Class constants.
VALUE HTML5 = rb_const_get(mNokogiri, rb_intern_const("HTML5"));
Document = rb_const_get(HTML5, rb_intern_const("Document"));
rb_gc_register_mark_object(Document);
cNokogiriHtml5Document = rb_define_class_under(mNokogiriHtml5, "Document", cNokogiriHtmlDocument);
rb_gc_register_mark_object(cNokogiriHtml5Document);

// Interned symbols.
internal_subset = rb_intern_const("internal_subset");
parent = rb_intern_const("parent");

// Define Nokogumbo module with parse and fragment methods.
VALUE Gumbo = rb_define_module_under(mNokogiri, "Gumbo");
rb_define_singleton_method(Gumbo, "parse", parse, 5);
rb_define_singleton_method(Gumbo, "fragment", fragment, 6);
rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 5);
rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6);
}

// vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
14 changes: 10 additions & 4 deletions ext/nokogiri/nokogiri.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#include <nokogiri.h>

VALUE mNokogiri ;
VALUE mNokogiriGumbo ;
VALUE mNokogiriHtml ;
VALUE mNokogiriHtmlSax ;
VALUE mNokogiriHtml5 ;
VALUE mNokogiriXml ;
VALUE mNokogiriXmlSax ;
VALUE mNokogiriXmlXpath ;
Expand Down Expand Up @@ -44,6 +46,7 @@ void noko_init_html_element_description();
void noko_init_html_entity_lookup();
void noko_init_html_sax_parser_context();
void noko_init_html_sax_push_parser();
void noko_init_gumbo();
void noko_init_test_global_handlers();

static ID id_read, id_write;
Expand Down Expand Up @@ -152,12 +155,14 @@ void
Init_nokogiri()
{
mNokogiri = rb_define_module("Nokogiri");
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");

rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
Expand Down Expand Up @@ -238,6 +243,7 @@ Init_nokogiri()
noko_init_xml_document_fragment();
noko_init_xml_document();
noko_init_html_document();
noko_init_gumbo();

noko_init_test_global_handlers();

Expand Down
5 changes: 5 additions & 0 deletions ext/nokogiri/nokogiri.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
#include <ruby/st.h>
#include <ruby/encoding.h>
#include <ruby/util.h>
#include <ruby/version.h>

#define NOKOGIRI_STR_NEW2(str) NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
#define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
Expand All @@ -92,10 +93,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);


NOKOPUBVAR VALUE mNokogiri ;
NOKOPUBVAR VALUE mNokogiriGumbo ;
NOKOPUBVAR VALUE mNokogiriHtml ;
NOKOPUBVAR VALUE mNokogiriHtmlSax ;
NOKOPUBVAR VALUE mNokogiriHtml5 ;
NOKOPUBVAR VALUE mNokogiriXml ;
NOKOPUBVAR VALUE mNokogiriXmlSax ;
NOKOPUBVAR VALUE mNokogiriXmlXpath ;
NOKOPUBVAR VALUE mNokogiriXslt ;

NOKOPUBVAR VALUE cNokogiriSyntaxError;
Expand Down Expand Up @@ -129,6 +133,7 @@ NOKOPUBVAR VALUE cNokogiriXmlXpathSyntaxError;
NOKOPUBVAR VALUE cNokogiriXsltStylesheet ;

NOKOPUBVAR VALUE cNokogiriHtmlDocument ;
NOKOPUBVAR VALUE cNokogiriHtml5Document ;
NOKOPUBVAR VALUE cNokogiriHtmlSaxPushParser ;
NOKOPUBVAR VALUE cNokogiriHtmlElementDescription ;
NOKOPUBVAR VALUE cNokogiriHtmlSaxParserContext;
Expand Down
13 changes: 13 additions & 0 deletions gumbo-parser/src/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# this Makefile is used by ext/nokogiri/extconf.rb
# to enable a mini_portile2 recipe to build the gumbo parser
.PHONY: clean

override CFLAGS += -std=c99 -Wall

gumbo_objs := $(patsubst %.c,%.o,$(wildcard *.c))

libgumbo.a: $(gumbo_objs)
ar rcs $@ $^

clean:
rm -f $(gumbo_objs) libgumbo.a
1 change: 0 additions & 1 deletion lib/nokogiri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

if Nokogiri.uses_gumbo?
require 'nokogiri/html5'
require 'nokogumbo/nokogumbo'
end

# Nokogiri parses and searches XML/HTML very quickly, and also has
Expand Down
5 changes: 0 additions & 5 deletions rakelib/extensions.rake
Original file line number Diff line number Diff line change
Expand Up @@ -372,9 +372,4 @@ else
end
end
end

Rake::ExtensionTask.new 'nokogumbo' do |e|
e.lib_dir = 'lib/nokogumbo'
e.source_pattern = '{,../../gumbo-parser/src/}*.[hc]'
end
end

0 comments on commit 0f22de1

Please sign in to comment.