diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3a023a1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,97 @@ +# JetBrains.gitignore +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +*.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + + +# Go.gitignore +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + + +# Other files +release/ +*.def +*.a +.rustc_info.json diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..cf1da86 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "lol-html"] + path = lol-html + url = https://github.com/cloudflare/lol-html/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..1adc30a --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/../../../../../../../:\Users\cools\Documents\Projects\hello-cgo\.idea/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml new file mode 100644 index 0000000..645f4a0 --- /dev/null +++ b/.idea/codeStyles/Project.xml @@ -0,0 +1,22 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000..a55e7a1 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..639900d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..ef78a97 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..18c7ecd --- /dev/null +++ b/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2020, CoolSpring8 +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1a1ab23 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +# lolhtml + +Go bindings for [cloudflare/lol-html](https://github.com/cloudflare/lol-html/), the *Low Output Latency streaming HTML rewriter/parser with CSS-selector based API.* + +**Status:** All abilities provided by C-API implemented, except for customized user data in handlers. The code is at its early stage and the API is therefore subject to change. If you have any ideas on how API can be better structured, feel free to open a PR or an issue. + +## Installation + +Rust is required to build the lol-html library. + +For Linux: + +```bash +git clone --recursive https://github.com/coolspring8/lolhtml.git +cargo build --release --manifest-path ./lol-html/c-api/ --target-dir ./ +go intall +``` + +For Windows users, as Rust relies on MSVC toolchain by default, one more step is needed between `cargo build` and `go install`: create a `.a` file from compiled artifacts. This snippet works for me: + +```powershell +gendef ./release/lolhtml.dll +dlltool --as-flags=--64 -m i386:x86-64 -k --output-lib ./lolhtml.a --input-def lolhtml.def +cp ./release/lolhtml.dll ./ +``` + +Now let's initialize a project and create `main.go`: + +```go +package main + +import ( + "fmt" + "github.com/coolspring8/lolhtml" +) + +func main() { + rb := lolhtml.NewRewriterBuilder() + defer rb.Free() + s, _ := lolhtml.NewSelector("span") + defer s.Free() + rb.AddElementContentHandlers( + s, + func(e *lolhtml.Element) lolhtml.RewriterDirective { + e.SetInnerContentAsRaw("World") + return lolhtml.Continue + }, + nil, + func(*lolhtml.TextChunk) lolhtml.RewriterDirective { + return lolhtml.Continue + }, + ) + r, _ := rb.Build( + lolhtml.Config{ + Encoding: "utf-8", + Memory: &lolhtml.MemorySettings{ + PreallocatedParsingBufferSize: 1024, + MaxAllowedMemoryUsage: 1<<63 - 1, + }, + Sink: func(s string) { fmt.Print(s) }, + Strict: true, + }, + ) + defer r.Free() + r.WriteString("

Hello ") + r.WriteString("LOL-HTML!

") + r.End() +} +``` + +This program takes chunked input `

Hello LOL-HTML!

` and rewrites texts in `span` tag to "World". The output is ``

Hello World!

`` . + +## Documentation + +Available at [pkg.go.dev](https://pkg.go.dev/github.com/coolspring8/lolhtml). (WIP) + +## Known Issue + +- For now, to use `Rewriter.End()` without causing panic, you will probably need to assign a stub `DocEndHandler` function when calling `AddDocumentContentHandlers()`. + +## Other Bindings + +- Rust (native), C, JavaScript - [cloudflare/lol-html](https://github.com/cloudflare/lol-html/) +- Lua - [jdesgats/lua-lolhtml](https://github.com/jdesgats/lua-lolhtml/) + +## License + +BSD 3-Clause "New" or "Revised" License \ No newline at end of file diff --git a/callback.go b/callback.go new file mode 100644 index 0000000..152b568 --- /dev/null +++ b/callback.go @@ -0,0 +1,43 @@ +package lolhtml + +/* +#include +#include "lol_html.h" + +extern void callbackSink(const char *chunk, size_t chunk_len, void *); + +extern lol_html_rewriter_directive_t callbackDoctype(lol_html_doctype_t *doctype, void *user_data); + +extern lol_html_rewriter_directive_t callbackComment(lol_html_comment_t *comment, void *user_data); + +extern lol_html_rewriter_directive_t callbackTextChunk(lol_html_text_chunk_t *text_chunk, void *user_data); + +extern lol_html_rewriter_directive_t callbackElement(lol_html_element_t *element, void *user_data); + +extern lol_html_rewriter_directive_t callbackDocEnd(lol_html_doc_end_t *doc_end, void *user_data); + +void callback_sink(const char *chunk, size_t chunk_len, void *user_data) { + return callbackSink(chunk, chunk_len, user_data); +} + +lol_html_rewriter_directive_t callback_doctype(lol_html_doctype_t *doctype, void *user_data) { + return callbackDoctype(doctype, user_data); +} + +lol_html_rewriter_directive_t callback_comment(lol_html_comment_t *comment, void *user_data) { + return callbackComment(comment, user_data); +} + +lol_html_rewriter_directive_t callback_text_chunk(lol_html_text_chunk_t *text_chunk, void *user_data) { + return callbackTextChunk(text_chunk, user_data); +} + +lol_html_rewriter_directive_t callback_element(lol_html_element_t *element, void *user_data){ + return callbackElement(element, user_data); +} + +lol_html_rewriter_directive_t callback_doc_end(lol_html_doc_end_t *doc_end, void *user_data) { + return callbackDocEnd(doc_end, user_data); +} +*/ +import "C" diff --git a/export_test.go b/export_test.go new file mode 100644 index 0000000..5268ba6 --- /dev/null +++ b/export_test.go @@ -0,0 +1,5 @@ +package lolhtml + +// export some internal functions for test + +var GetError = getError diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..44c2cb5 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/coolspring8/lolhtml + +go 1.15 + +require github.com/mattn/go-pointer v0.0.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..1fdefea --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/mattn/go-pointer v0.0.1 h1:n+XhsuGeVO6MEAp7xyEukFINEa+Quek5psIR/ylA6o0= +github.com/mattn/go-pointer v0.0.1/go.mod h1:2zXcozF6qYGgmsG+SeTZz3oAbFLdD3OWqnUbNvJZAlc= diff --git a/lol-html b/lol-html new file mode 160000 index 0000000..e675570 --- /dev/null +++ b/lol-html @@ -0,0 +1 @@ +Subproject commit e67557065f6c5ecdff186b74b7361f25cbc8b56b diff --git a/lolhtml.go b/lolhtml.go new file mode 100644 index 0000000..de9a301 --- /dev/null +++ b/lolhtml.go @@ -0,0 +1,783 @@ +package lolhtml + +/* +#cgo CFLAGS: -I${SRCDIR}/lol-html/c-api/include +#cgo LDFLAGS: ${SRCDIR}/lolhtml.a +#include +#include "lol_html.h" +extern void callback_sink(const char *chunk, size_t chunk_len, void *user_data); +extern lol_html_rewriter_directive_t callback_doctype(lol_html_doctype_t *doctype, void *user_data); +extern lol_html_rewriter_directive_t callback_comment(lol_html_comment_t *comment, void *user_data); +extern lol_html_rewriter_directive_t callback_text_chunk(lol_html_text_chunk_t *text_chunk, void *user_data); +extern lol_html_rewriter_directive_t callback_element(lol_html_element_t *element, void *user_data); +extern lol_html_rewriter_directive_t callback_doc_end(lol_html_doc_end_t *doc_end, void *user_data); +*/ +import "C" +import ( + "errors" + "github.com/mattn/go-pointer" + "unsafe" +) + +var ErrCannotGetErrorMessage = errors.New("cannot get error message from underlying lol-html lib") + +// RewriterDirective as declared in include/lol_html.h:84 +type RewriterDirective int + +// RewriterDirective enumeration from include/lol_html.h:84 +const ( + Continue RewriterDirective = iota + Stop +) + +// RewriterBuilder as declared in include/lol_html.h:22 +type RewriterBuilder C.lol_html_rewriter_builder_t + +// Rewriter as declared in include/lol_html.h:23 +type Rewriter C.lol_html_rewriter_t + +// Doctype as declared in include/lol_html.h:24 +type Doctype C.lol_html_doctype_t + +// DocEnd as declared in include/lol_html.h:25 +type DocEnd C.lol_html_doc_end_t + +// Comment as declared in include/lol_html.h:26 +type Comment C.lol_html_comment_t + +// TextChunk as declared in include/lol_html.h:27 +type TextChunk C.lol_html_text_chunk_t + +// Element as declared in include/lol_html.h:28 +type Element C.lol_html_element_t + +// AttributeIterator as declared in include/lol_html.h:29 +type AttributeIterator C.lol_html_attributes_iterator_t + +// Attribute as declared in include/lol_html.h:30 +type Attribute C.lol_html_attribute_t + +// Selector as declared in include/lol_html.h:31 +type Selector C.lol_html_selector_t + +// str as declared in include/lol_html.h:45 +type str C.lol_html_str_t + +// TextChunkContent as declared in include/lol_html.h:60 +type TextChunkContent C.lol_html_text_chunk_content_t + +type OutputSink func(string) + +// DoctypeHandler type as declared in include/lol_html.h:86 +type DoctypeHandler func(*Doctype) RewriterDirective + +// CommentHandler type as declared in include/lol_html.h:91 +type CommentHandler func(*Comment) RewriterDirective + +// TextChunkHandler type as declared in include/lol_html.h:96 +type TextChunkHandler func(*TextChunk) RewriterDirective + +// ElementHandler type as declared in include/lol_html.h:101 +type ElementHandler func(*Element) RewriterDirective + +// DocEndHandler type as declared in include/lol_html.h:106 +type DocEndHandler func(*DocEnd) RewriterDirective + +type Config struct { + Encoding string + Memory *MemorySettings + Sink OutputSink + //UserData interface{} + Strict bool +} + +func NewDefaultConfig() Config { + return Config{ + Encoding: "utf-8", + Memory: &MemorySettings{ + PreallocatedParsingBufferSize: 1024, + MaxAllowedMemoryUsage: 1<<63 - 1, + }, + Sink: func(string) {}, + Strict: true, + } +} + +type MemorySettings struct { + PreallocatedParsingBufferSize int + MaxAllowedMemoryUsage int +} + +func NewRewriterBuilder() *RewriterBuilder { + return (*RewriterBuilder)(C.lol_html_rewriter_builder_new()) +} + +func (rb *RewriterBuilder) Free() { + if rb != nil { + C.lol_html_rewriter_builder_free((*C.lol_html_rewriter_builder_t)(rb)) + } +} + +// TODO: BUG? For now, to use *Rewriter.End() without causing panic, you will probably need to assign +// a stub handler function to it. +func (rb *RewriterBuilder) AddDocumentContentHandlers( + doctypeHandler DoctypeHandler, + commentHandler CommentHandler, + textChunkHandler TextChunkHandler, + docEndHandler DocEndHandler, +) { + doctypeHandlerPointer := pointer.Save(doctypeHandler) + commentHandlerPointer := pointer.Save(commentHandler) + textChunkHandlerPointer := pointer.Save(textChunkHandler) + docEndHandlerPointer := pointer.Save(docEndHandler) + C.lol_html_rewriter_builder_add_document_content_handlers( + (*C.lol_html_rewriter_builder_t)(rb), + (*[0]byte)(C.callback_doctype), + doctypeHandlerPointer, + (*[0]byte)(C.callback_comment), + commentHandlerPointer, + (*[0]byte)(C.callback_text_chunk), + textChunkHandlerPointer, + (*[0]byte)(C.callback_doc_end), + docEndHandlerPointer, + ) +} + +func (rb *RewriterBuilder) AddElementContentHandlers( + selector *Selector, + elementHandler ElementHandler, + commentHandler CommentHandler, + textChunkHandler TextChunkHandler, +) { + commentHandlerPointer := pointer.Save(commentHandler) + elementHandlerPointer := pointer.Save(elementHandler) + textChunkHandlerPointer := pointer.Save(textChunkHandler) + C.lol_html_rewriter_builder_add_element_content_handlers( + (*C.lol_html_rewriter_builder_t)(rb), + (*C.lol_html_selector_t)(selector), + (*[0]byte)(C.callback_element), + elementHandlerPointer, + (*[0]byte)(C.callback_comment), + commentHandlerPointer, + (*[0]byte)(C.callback_text_chunk), + textChunkHandlerPointer, + ) +} + +func (rb *RewriterBuilder) Build(config Config) (*Rewriter, error) { + encodingC := C.CString(config.Encoding) + defer C.free(unsafe.Pointer(encodingC)) + encodingLen := len(config.Encoding) + memorySettingsC := C.lol_html_memory_settings_t{ + preallocated_parsing_buffer_size: C.size_t(config.Memory.PreallocatedParsingBufferSize), + max_allowed_memory_usage: C.size_t(config.Memory.MaxAllowedMemoryUsage), + } + p := pointer.Save(config.Sink) + r := (*Rewriter)(C.lol_html_rewriter_build( + (*C.lol_html_rewriter_builder_t)(rb), + encodingC, + C.size_t(encodingLen), + memorySettingsC, + (*[0]byte)(C.callback_sink), + p, + C.bool(config.Strict), + )) + if r != nil { + return r, nil + } + return nil, getError() +} + +//func (r *Rewriter) Write(b [] byte) error {} + +func (r *Rewriter) WriteString(chunk string) error { + chunkC := C.CString(chunk) + defer C.free(unsafe.Pointer(chunkC)) + chunkLen := len(chunk) + errCode := C.lol_html_rewriter_write((*C.lol_html_rewriter_t)(r), chunkC, C.size_t(chunkLen)) + if errCode == 0 { + return nil + } + return getError() +} + +func (r *Rewriter) End() error { + errCode := C.lol_html_rewriter_end((*C.lol_html_rewriter_t)(r)) + if errCode == 0 { + return nil + } + return getError() +} + +func (r *Rewriter) Free() { + if r != nil { + C.lol_html_rewriter_free((*C.lol_html_rewriter_t)(r)) + } +} + +func (d *Doctype) GetName() string { + nameC := (*str)(C.lol_html_doctype_name_get((*C.lol_html_doctype_t)(d))) + defer nameC.Free() + return strToGoString(nameC) +} + +func (d *Doctype) GetPublicId() string { + nameC := (*str)(C.lol_html_doctype_public_id_get((*C.lol_html_doctype_t)(d))) + defer nameC.Free() + return strToGoString(nameC) +} + +func (d *Doctype) GetSystemId() string { + nameC := (*str)(C.lol_html_doctype_system_id_get((*C.lol_html_doctype_t)(d))) + defer nameC.Free() + return strToGoString(nameC) +} + +//func (d* Doctype) SetUserData(){} + +//func (d* Doctype) GetUserData(){} + +func (c *Comment) GetText() string { + textC := (str)(C.lol_html_comment_text_get((*C.lol_html_comment_t)(c))) + defer textC.Free() + return strToGoString2(textC) +} + +func (c *Comment) SetText(text string) error { + textC := C.CString(text) + defer C.free(unsafe.Pointer(textC)) + textLen := len(text) + errCode := C.lol_html_comment_text_set((*C.lol_html_comment_t)(c), textC, C.size_t(textLen)) + if errCode == 0 { + return nil + } + return getError() +} + +func (c *Comment) InsertBeforeAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_comment_before((*C.lol_html_comment_t)(c), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (c *Comment) InsertBeforeAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_comment_before((*C.lol_html_comment_t)(c), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (c *Comment) InsertAfterAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_comment_after((*C.lol_html_comment_t)(c), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (c *Comment) InsertAfterAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_comment_after((*C.lol_html_comment_t)(c), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (c *Comment) ReplaceAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_comment_replace((*C.lol_html_comment_t)(c), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (c *Comment) ReplaceAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_comment_replace((*C.lol_html_comment_t)(c), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (c *Comment) Remove() { + C.lol_html_comment_remove((*C.lol_html_comment_t)(c)) +} + +func (c *Comment) IsRemoved() bool { + return (bool)(C.lol_html_comment_is_removed((*C.lol_html_comment_t)(c))) +} + +func (t *TextChunk) GetContent() string { + text := (TextChunkContent)(C.lol_html_text_chunk_content_get((*C.lol_html_text_chunk_t)(t))) + return textChunkContentToGoString(text) +} + +func (t *TextChunk) IsLastInTextNode() bool { + return (bool)(C.lol_html_text_chunk_is_last_in_text_node((*C.lol_html_text_chunk_t)(t))) +} + +func (t *TextChunk) InsertBeforeAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_text_chunk_before((*C.lol_html_text_chunk_t)(t), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (t *TextChunk) InsertBeforeAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_text_chunk_before((*C.lol_html_text_chunk_t)(t), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (t *TextChunk) InsertAfterAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_text_chunk_after((*C.lol_html_text_chunk_t)(t), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (t *TextChunk) InsertAfterAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_text_chunk_after((*C.lol_html_text_chunk_t)(t), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (t *TextChunk) ReplaceAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_text_chunk_replace((*C.lol_html_text_chunk_t)(t), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (t *TextChunk) ReplaceAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_text_chunk_replace((*C.lol_html_text_chunk_t)(t), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (t *TextChunk) Remove() { + C.lol_html_text_chunk_remove((*C.lol_html_text_chunk_t)(t)) +} + +func (t *TextChunk) IsRemoved() bool { + return (bool)(C.lol_html_text_chunk_is_removed((*C.lol_html_text_chunk_t)(t))) +} + +func (e *Element) GetTagName() string { + tagNameC := (str)(C.lol_html_element_tag_name_get((*C.lol_html_element_t)(e))) + defer tagNameC.Free() + return strToGoString2(tagNameC) +} + +func (e *Element) SetTagName(name string) error { + nameC := C.CString(name) + defer C.free(unsafe.Pointer(nameC)) + nameLen := len(name) + errCode := C.lol_html_element_tag_name_set((*C.lol_html_element_t)(e), nameC, C.size_t(nameLen)) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) GetNamespaceUri() string { + namespaceUriC := C.lol_html_element_namespace_uri_get((*C.lol_html_element_t)(e)) + return C.GoString(namespaceUriC) +} + +func (e *Element) GetAttributeIterator() *AttributeIterator { + return (*AttributeIterator)(C.lol_html_attributes_iterator_get((*C.lol_html_element_t)(e))) +} + +func (e *Element) GetAttributeValue(name string) (string, error) { + nameC := C.CString(name) + defer C.free(unsafe.Pointer(nameC)) + nameLen := len(name) + valueC := (*str)(C.lol_html_element_get_attribute((*C.lol_html_element_t)(e), nameC, C.size_t(nameLen))) + defer valueC.Free() + errC := (*str)(C.lol_html_take_last_error()) + defer errC.Free() + errMsg := strToGoString(errC) + if errMsg != "" { + return "", errors.New(errMsg) + } + return strToGoString(valueC), nil +} + +func (e *Element) HasAttribute(name string) (bool, error) { + nameC := C.CString(name) + defer C.free(unsafe.Pointer(nameC)) + nameLen := len(name) + codeC := C.lol_html_element_has_attribute((*C.lol_html_element_t)(e), nameC, C.size_t(nameLen)) + if codeC == 1 { + return true, nil + } else if codeC == 0 { + return false, nil + } + return false, getError() +} + +func (e *Element) SetAttribute(name string, value string) error { + nameC := C.CString(name) + defer C.free(unsafe.Pointer(nameC)) + nameLen := len(name) + valueC := C.CString(value) + defer C.free(unsafe.Pointer(valueC)) + valueLen := len(value) + errCode := C.lol_html_element_set_attribute( + (*C.lol_html_element_t)(e), + nameC, + C.size_t(nameLen), + valueC, + C.size_t(valueLen), + ) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) RemoveAttribute(name string) error { + nameC := C.CString(name) + defer C.free(unsafe.Pointer(nameC)) + nameLen := len(name) + errCode := C.lol_html_element_remove_attribute((*C.lol_html_element_t)(e), nameC, C.size_t(nameLen)) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) InsertBeforeStartTagAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_before((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) InsertBeforeStartTagAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_prepend((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) InsertAfterStartTagAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_prepend((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) InsertAfterStartTagAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_prepend((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) InsertBeforeEndTagAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_append((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) InsertBeforeEndTagAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_append((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) InsertAfterEndTagAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_after((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) InsertAfterEndTagAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_after((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) SetInnerContentAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_set_inner_content((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) SetInnerContentAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_set_inner_content((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) ReplaceAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_replace((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) ReplaceAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_element_replace((*C.lol_html_element_t)(e), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func (e *Element) Remove() { + C.lol_html_element_remove((*C.lol_html_element_t)(e)) +} + +func (e *Element) RemoveAndKeepContent() { + C.lol_html_element_remove_and_keep_content((*C.lol_html_element_t)(e)) +} + +func (e *Element) IsRemoved() bool { + return (bool)(C.lol_html_element_is_removed((*C.lol_html_element_t)(e))) +} + +func (ai *AttributeIterator) Free() { + C.lol_html_attributes_iterator_free((*C.lol_html_attributes_iterator_t)(ai)) +} + +func (ai *AttributeIterator) Next() *Attribute { + return (*Attribute)(C.lol_html_attributes_iterator_next((*C.lol_html_attributes_iterator_t)(ai))) +} + +func (a *Attribute) GetName() string { + nameC := (str)(C.lol_html_attribute_name_get((*C.lol_html_attribute_t)(a))) + defer nameC.Free() + return strToGoString2(nameC) +} + +func (a *Attribute) GetValue() string { + valueC := (str)(C.lol_html_attribute_value_get((*C.lol_html_attribute_t)(a))) + defer valueC.Free() + return strToGoString2(valueC) +} + +func (d *DocEnd) AppendAsRaw(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_doc_end_append((*C.lol_html_doc_end_t)(d), contentC, C.size_t(contentLen), false) + if errCode == 0 { + return nil + } + return getError() +} + +func (d *DocEnd) AppendAsHtml(content string) error { + contentC := C.CString(content) + defer C.free(unsafe.Pointer(contentC)) + contentLen := len(content) + errCode := C.lol_html_doc_end_append((*C.lol_html_doc_end_t)(d), contentC, C.size_t(contentLen), true) + if errCode == 0 { + return nil + } + return getError() +} + +func NewSelector(selector string) (*Selector, error) { + selectorC := C.CString(selector) + defer C.free(unsafe.Pointer(selectorC)) + selectorLen := len(selector) + s := (*Selector)(C.lol_html_selector_parse(selectorC, C.size_t(selectorLen))) + if s != nil { + return s, nil + } + return nil, getError() +} + +func (s *Selector) Free() { + if s != nil { + C.lol_html_selector_free((*C.lol_html_selector_t)(s)) + } +} + +func (s *str) Free() { + if s != nil { + C.lol_html_str_free(*(*C.lol_html_str_t)(s)) + } +} + +//export callbackSink +func callbackSink(chunk *C.char, chunkLen C.size_t, userData unsafe.Pointer) { + c := C.GoStringN(chunk, C.int(chunkLen)) + cb := pointer.Restore(userData).(OutputSink) + cb(c) +} + +//export callbackDoctype +func callbackDoctype(doctype *Doctype, userData unsafe.Pointer) RewriterDirective { + cb := pointer.Restore(userData).(DoctypeHandler) + return cb(doctype) +} + +//export callbackComment +func callbackComment(comment *Comment, userData unsafe.Pointer) RewriterDirective { + cb := pointer.Restore(userData).(CommentHandler) + return cb(comment) +} + +//export callbackTextChunk +func callbackTextChunk(textChunk *TextChunk, userData unsafe.Pointer) RewriterDirective { + cb := pointer.Restore(userData).(TextChunkHandler) + return cb(textChunk) +} + +//export callbackElement +func callbackElement(element *Element, userData unsafe.Pointer) RewriterDirective { + cb := pointer.Restore(userData).(ElementHandler) + return cb(element) +} + +//export callbackDocEnd +func callbackDocEnd(docEnd *DocEnd, userData unsafe.Pointer) RewriterDirective { + cb := pointer.Restore(userData).(DocEndHandler) + return cb(docEnd) +} + +// strToGoString is a helper function that translates the underlying-library-defined lol_html_str_t data to Go string. +// It is the caller's responsibility to arrange for lol_html_str_t to be freed, +// by calling str.Free() or lol_html_str_free(). +// Potential issue: lol_html_str_t->len from size_t (uint) to int (int32) on 32-bit machines? +func strToGoString(s *str) string { + if s == nil { + return "" + } + return C.GoStringN(s.data, C.int(s.len)) +} + +// strToGoString2 is similar to strToGoString, except for the function argument. +func strToGoString2(s str) string { + var nullStr str + if s == nullStr { + return "" + } + return C.GoStringN(s.data, C.int(s.len)) +} + +func textChunkContentToGoString(s TextChunkContent) string { + var nullTextChunkContent TextChunkContent + if s == nullTextChunkContent { + return "" + } + return C.GoStringN(s.data, C.int(s.len)) +} + +// getError is a helper function that gets error message for the last function call. +// You should make sure there is an error when calling this, or the function interprets +// the NULL error message obtained as ErrCannotGetErrorMessage. +func getError() error { + errC := (*str)(C.lol_html_take_last_error()) + defer errC.Free() + if errMsg := strToGoString(errC); errMsg != "" { + return errors.New(errMsg) + } + return ErrCannotGetErrorMessage +} diff --git a/lolhtml_test.go b/lolhtml_test.go new file mode 100644 index 0000000..6dad4de --- /dev/null +++ b/lolhtml_test.go @@ -0,0 +1,368 @@ +package lolhtml_test + +import ( + "errors" + "fmt" + "github.com/coolspring8/lolhtml" + "testing" +) + +func TestRewriterBuilder(t *testing.T) { + rb := lolhtml.NewRewriterBuilder() + defer rb.Free() + if rb == nil { + t.Error("cannot get new rewriter-builder\n") + } + r, err := rb.Build(lolhtml.NewDefaultConfig()) + defer r.Free() + if err != nil { + t.Errorf("cannot build rewriter %s\n", err) + } + err = r.WriteString("
a<") + if err != nil { + t.Error(err) + } + err = r.WriteString("/div>") + if err != nil { + t.Error(err) + } + err = r.End() + if err != nil { + t.Error(err) + } +} + +func TestRewriterBuilderNonAsciiEncoding(t *testing.T) { + rb := lolhtml.NewRewriterBuilder() + defer rb.Free() + if rb == nil { + t.FailNow() + } + r, err := rb.Build(lolhtml.Config{ + Encoding: "UTF-16", + Memory: &lolhtml.MemorySettings{ + PreallocatedParsingBufferSize: 0, + MaxAllowedMemoryUsage: 16, + }, + Sink: func(string) {}, + Strict: true, + }) + defer r.Free() + if err == nil { + t.FailNow() + } + if err.Error() != "Expected ASCII-compatible encoding." { + t.Error(err) + } +} + +func TestRewriterBuilderMemoryLimiting(t *testing.T) { + rb := lolhtml.NewRewriterBuilder() + defer rb.Free() + if rb == nil { + t.Error("cannot get new rewriter-builder\n") + } + s, err := lolhtml.NewSelector("span") + defer s.Free() + if err != nil { + t.Error(err) + } + rb.AddElementContentHandlers(s, nil, nil, nil) + r, err := rb.Build(lolhtml.Config{ + Encoding: "utf-8", + Memory: &lolhtml.MemorySettings{ + PreallocatedParsingBufferSize: 0, + MaxAllowedMemoryUsage: 5, + }, + Sink: func(string) {}, + Strict: true, + }) + defer r.Free() + if err != nil { + t.Error(err) + } + err = r.WriteString("") + if err != nil { + t.Error(err) + } + err = r.End() + if err != nil { + t.Error(err) + } + if finalText != "Hi " { + t.Errorf("wrong output %s", finalText) + } +} + +// TestNullErrorStr tests internal functions for handling a null lol_html_str_t, by calling lol_html_take_last_error() +// when there is no error. +func TestNullErrorStr(t *testing.T) { + err := lolhtml.GetError() + if !errors.Is(err, lolhtml.ErrCannotGetErrorMessage) { + t.Error(err) + } +}