Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a fast memory-only XML parser (9.0) #299

Merged
merged 4 commits into from
Nov 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Added
- Add option to set finished hosts in OSP targets [#298](https://github.com/greenbone/gvm-libs/pull/298)
- Add a fast memory-only XML parser [#299](https://github.com/greenbone/gvm-libs/pull/299)

### Fixed
- Fix sigsegv when no plugin_feed_info.inc file present. [#278](https://github.com/greenbone/gvm-libs/pull/278)
Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ configure_file (VERSION.in ${CMAKE_BINARY_DIR}/VERSION @ONLY)

enable_testing ()

add_custom_target (tests
DEPENDS array-test xmlutils-test)

## Program

if (NOT SKIP_SRC)
Expand Down
3 changes: 0 additions & 3 deletions base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,6 @@ add_test (array-test array-test)

target_link_libraries (array-test cgreen ${GLIB_LDFLAGS} ${LINKER_HARDENING_FLAGS})

add_custom_target (tests
DEPENDS array-test)


## Install

Expand Down
29 changes: 27 additions & 2 deletions util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ pkg_check_modules (LIBSSH REQUIRED libssh>=0.6.0)
# for kb we need libhiredis
pkg_check_modules (REDIS REQUIRED hiredis>=0.10.1)

# for fast XML we need libxml2
pkg_check_modules (LIBXML2 REQUIRED libxml-2.0>=2.0)

# Set NVTICACHE name with the version
set (NVTICACHE_STR "nvticache${PROJECT_VERSION}")
add_definitions (-DNVTICACHE_STR="${NVTICACHE_STR}")
Expand Down Expand Up @@ -131,7 +134,8 @@ if (BUILD_WITH_LDAP)
endif (NOT LIBLDAP)
endif (BUILD_WITH_LDAP)

include_directories (${GLIB_INCLUDE_DIRS} ${GPGME_INCLUDE_DIRS} ${GCRYPT_INCLUDE_DIRS})
include_directories (${GLIB_INCLUDE_DIRS} ${GPGME_INCLUDE_DIRS} ${GCRYPT_INCLUDE_DIRS}
${LIBXML2_INCLUDE_DIRS})

set (FILES authutils.c compressutils.c fileutils.c gpgmeutils.c kb.c ldaputils.c
nvticache.c radiusutils.c serverutils.c sshutils.c uuidutils.c
Expand Down Expand Up @@ -160,9 +164,30 @@ if (BUILD_SHARED)
${GIO_LDFLAGS} ${GPGME_LDFLAGS} ${ZLIB_LDFLAGS}
${RADIUS_LDFLAGS} ${LIBSSH_LDFLAGS} ${GNUTLS_LDFLAGS}
${GCRYPT_LDFLAGS} ${LDAP_LDFLAGS} ${REDIS_LDFLAGS}
${UUID_LDFLAGS} ${LINKER_HARDENING_FLAGS})
${LIBXML2_LDFLAGS} ${UUID_LDFLAGS}
${LINKER_HARDENING_FLAGS})
endif (BUILD_SHARED)


## Tests

add_executable (xmlutils-test
EXCLUDE_FROM_ALL
xmlutils_tests.c)

add_test (xmlutils-test xmlutils-test)

target_link_libraries (xmlutils-test cgreen
${GLIB_LDFLAGS} ${GIO_LDFLAGS} ${GPGME_LDFLAGS} ${ZLIB_LDFLAGS}
${RADIUS_LDFLAGS} ${LIBSSH_LDFLAGS} ${GNUTLS_LDFLAGS}
${GCRYPT_LDFLAGS} ${LDAP_LDFLAGS} ${REDIS_LDFLAGS}
${LIBXML2_LDFLAGS} ${UUID_LDFLAGS}
${LINKER_HARDENING_FLAGS})

add_custom_target (tests-xmlutils
DEPENDS xmlutils-test)


## Install
configure_file (libgvm_util.pc.in ${CMAKE_BINARY_DIR}/libgvm_util.pc @ONLY)

Expand Down
246 changes: 246 additions & 0 deletions util/xmlutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <fcntl.h> /* for fcntl, F_SETFL, O_NONBLOCK */
#include <glib.h> /* for g_free, GSList, g_markup_parse_context_free */
#include <glib/gtypes.h> /* for GPOINTER_TO_INT, GINT_TO_POINTER, gsize */
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <string.h> /* for strcmp, strerror, strlen */
#include <time.h> /* for time, time_t */
#include <unistd.h> /* for ssize_t */
Expand Down Expand Up @@ -1566,3 +1568,247 @@ find_element_in_xml_file (gchar *file_path, gchar *find_element,
return search_data.found;
}
#undef XML_FILE_BUFFER_SIZE


/* The new faster parser that uses libxml2. */

/**
* @brief Read an XML element tree from a string.
*
* Caller must not free string until caller is finished using element.
*
* @param[in] string Input string.
* @param[out] element Location for parsed element tree, or NULL if not
* required. If given, set to NULL on failure.
* Free with element_free.
*
* @return 0 success, -1 read error, -2 parse error, -3 XML ended prematurely,
* -4 setup error.
*/
int
parse_element (const gchar *string, element_t *element)
{
xmlDocPtr doc;

LIBXML_TEST_VERSION

if (element)
*element = NULL;

if (xmlMemSetup (g_free, g_malloc, g_realloc, g_strdup))
return -4;

doc = xmlReadMemory (string, strlen (string), "noname.xml", NULL, 0);
if (doc == NULL)
return -2;

if (element)
*element = xmlDocGetRootElement (doc);

return 0;
}

/**
* @brief Free an entire element tree.
*
* Beware that this frees the entire tree that element is part of, including
* any ancestors.
*
* @param[in] element Element.
*/
void
element_free (element_t element)
{
if (element)
{
assert (element->doc);
xmlFreeDoc (element->doc);
}
}

/**
* @brief Get the name of an element.
*
* @param[in] element Element.
*
* @return Element name.
*/
const gchar *
element_name (element_t element)
{
if (element
&& (element->type == XML_ELEMENT_NODE))
return (const gchar *) element->name;

return "";
}

/**
* @brief Find child in an element.
*
* @param[in] element Element.
* @param[in] name Name of child.
*
* @return Child if found, else NULL.
*/
static element_t
find_child (element_t element, const gchar *name)
{
for (xmlNode *node = element->children; node; node = node->next)
if (xmlStrcmp (node->name, (const xmlChar *) name) == 0)
return node;
return NULL;
}

/**
* @brief Get a child of an element.
*
* @param[in] element Element.
* @param[in] name Name of the child.
*
* @return Element if found, else NULL.
*/
element_t
element_child (element_t element, const gchar *name)
{
const gchar *stripped_name;

if (!element)
return NULL;

stripped_name = strchr (name, ':');
if (stripped_name)
{
element_t child;

/* There was a namespace in the name.
*
* First try without the namespace, because libxml2 doesn't consider the
* namespace in the name when the namespace is defined. */

stripped_name++;

if (*stripped_name == '\0')
/* Don't search for child with empty stripped name, because we'll
* find text nodes. But search with just the namespace for glib
* compatibility. */
return find_child (element, name);

child = find_child (element, stripped_name);
if (child)
return child;

/* Didn't find anything. */
}

/* There was no namespace, or we didn't find anything without the namespace.
*
* Try with the full name. */

return find_child (element, name);
}

/**
* @brief Get text of an element.
*
* If element is not NULL then the return is guaranteed to be a string.
* So if the caller has NULL checked element then there is no need for
* the caller to NULL check the return.
*
* @param[in] element Element.
*
* @return NULL if element is NULL, else the text. Caller must g_free.
*/
gchar *
element_text (element_t element)
{
gchar *string;

if (!element)
return NULL;

string = (gchar *) xmlNodeListGetString (element->doc, element->xmlChildrenNode, 1);
if (string)
return string;
string = xmlMalloc (1);
string[0] = '\0';
return string;
}

/**
* @brief Get an attribute of an element.
*
* @param[in] element Element.
* @param[in] name Name of the attribute.
*
* @return Attribute value if found, else NULL. Caller must g_free.
*/
gchar *
element_attribute (element_t element, const gchar *name)
{
const gchar *stripped_name;

if (!element)
return NULL;

stripped_name = strchr (name, ':');
if (stripped_name)
{
gchar *attribute;

/* There was a namespace in the name.
*
* First try without the namespace, because libxml2 doesn't consider the
* namespace in the name when the namespace is defined. */

stripped_name++;

if (*stripped_name == '\0')
/* Don't search for child with empty stripped name, because we'll
* find text nodes. But search with just the namespace for glib
* compatibility. */
return (gchar *) xmlGetProp (element, (const xmlChar *) name);

attribute = (gchar *) xmlGetProp (element, (const xmlChar *) stripped_name);
if (attribute)
return attribute;

/* Didn't find anything. */
}

/* There was no namespace, or we didn't find anything without the namespace.
*
* Try with the full name. */

return (gchar *) xmlGetProp (element, (const xmlChar *) name);
}

/**
* @brief Get the first child of an element.
*
* @param[in] element Element.
*
* @return Child if there is one, else NULL.
*/
element_t
element_first_child (element_t element)
{
if (element)
return element->children;
return NULL;
}

/**
* @brief Get the next sibling of an element
*
* @param[in] element Element.
*
* @return Next sibling element if there is one, else NULL.
*/
element_t
element_next (element_t element)
{
if (element)
return element->next;
return NULL;
}
30 changes: 30 additions & 0 deletions util/xmlutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,39 @@ int xml_count_entities (entities_t);
void
xml_string_append (GString *, const char *, ...);


/* XML file utilities */

int
find_element_in_xml_file (gchar *, gchar *, GHashTable *);


/* The new faster parser that uses libxml2. */

typedef struct _xmlNode *element_t;

int
parse_element (const gchar *, element_t *);

void
element_free (element_t);

const gchar *
element_name (element_t);

gchar *
element_attribute (element_t, const gchar *);

gchar *
element_text (element_t);

element_t
element_child (element_t, const gchar *);

element_t
element_first_child (element_t);

element_t
element_next (element_t);

#endif /* not _GVM_XMLUTILS_H */
Loading