diff --git a/.autotest b/.autotest deleted file mode 100644 index 878238aca7..0000000000 --- a/.autotest +++ /dev/null @@ -1,22 +0,0 @@ -# -*- ruby -*- - -begin - require 'autotest/fsevent' -rescue LoadError -end - -Autotest.add_hook :initialize do |at| - at.add_exception /bundle$/ - at.add_exception /\.git/ -end - -Autotest.add_hook :run_command do |at| - at.unit_diff = 'cat' - system "rake compile" -end - -Autotest.add_hook :ran_command do |at| - File.open('/tmp/autotest.txt', 'wb') { |f| - f.write(at.results.join) - } -end diff --git a/.cross_rubies b/.cross_rubies index 9810448bbe..6f52404a8d 100644 --- a/.cross_rubies +++ b/.cross_rubies @@ -1,8 +1,32 @@ -2.6.0:i686-w64-mingw32 -2.6.0:x86_64-w64-mingw32 -2.5.0:i686-w64-mingw32 -2.5.0:x86_64-w64-mingw32 -2.4.0:i686-w64-mingw32 -2.4.0:x86_64-w64-mingw32 -2.3.0:i686-w64-mingw32 -2.3.0:x86_64-w64-mingw32 +2.7.0:aarch64-linux +2.7.0:arm-linux +2.7.0:arm64-darwin +2.7.0:x64-mingw32 +2.7.0:x86-linux +2.7.0:x86-mingw32 +2.7.0:x86_64-darwin +2.7.0:x86_64-linux +3.0.0:aarch64-linux +3.0.0:arm-linux +3.0.0:arm64-darwin +3.0.0:x64-mingw32 +3.0.0:x86-linux +3.0.0:x86-mingw32 +3.0.0:x86_64-darwin +3.0.0:x86_64-linux +3.1.0:aarch64-linux +3.1.0:arm-linux +3.1.0:arm64-darwin +3.1.0:x64-mingw-ucrt +3.1.0:x86-linux +3.1.0:x86-mingw32 +3.1.0:x86_64-darwin +3.1.0:x86_64-linux +3.2.0:aarch64-linux +3.2.0:arm-linux +3.2.0:arm64-darwin +3.2.0:x64-mingw-ucrt +3.2.0:x86-linux +3.2.0:x86-mingw32 +3.2.0:x86_64-darwin +3.2.0:x86_64-linux diff --git a/.editorconfig b/.editorconfig index e5dd4f83fd..fb6e656d50 100644 --- a/.editorconfig +++ b/.editorconfig @@ -14,4 +14,4 @@ indent_size = 2 [**.java] indent_style = space -indent_size = 4 +indent_size = 2 diff --git a/.gemtest b/.gemtest deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000000..0716b3a33d --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,3 @@ +github: flavorjones +tidelift: rubygems/nokogiri +open_collective: nokogiri diff --git a/.github/ISSUE_TEMPLATE/bug-report-or-help-request.md b/.github/ISSUE_TEMPLATE/1-bug-report.md similarity index 77% rename from .github/ISSUE_TEMPLATE/bug-report-or-help-request.md rename to .github/ISSUE_TEMPLATE/1-bug-report.md index b5077193ba..297cd4de42 100644 --- a/.github/ISSUE_TEMPLATE/bug-report-or-help-request.md +++ b/.github/ISSUE_TEMPLATE/1-bug-report.md @@ -1,24 +1,27 @@ --- -name: Bug Report or Help Request -about: Create a report to help us improve -title: '' -labels: '' -assignees: '' +name: "Bug Report" +about: "Open an issue to help us improve!" +title: "[bug]" +labels: "state/needs-triage" +assignees: "" --- + +**Please describe the bug** -**Describe the bug** + -A clear and concise description of what the bug is. Please include as much context as you can about what you are trying to do. - - -**To Reproduce** +**Help us reproduce what you're seeing** + **Expected behavior** + **Environment** + **Additional context** + diff --git a/.github/ISSUE_TEMPLATE/2-installation-difficulties.md b/.github/ISSUE_TEMPLATE/2-installation-difficulties.md new file mode 100644 index 0000000000..17929f6c70 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-installation-difficulties.md @@ -0,0 +1,76 @@ +--- +name: "Installation Difficulties" +about: "If you're having trouble installing Nokogiri ..." +title: "[install]" +labels: "topic/installation" +assignees: "" + +--- + +**Have you read and followed the installation tutorial at http://www.nokogiri.org/tutorials/installing_nokogiri.html?** + +- [ ] Yes! + + +**What is the complete output of `gem install` or `bundle install`?** + + + +``` +the output goes here +``` + + +**If installation completed but is broken, what is the complete output from `nokogiri -v`?** + + + +``` +the output of "nokogiri -v" goes here +``` + + + +**If installation failed during compilation, what are the complete contents of the `mkmf.log` file generated during the failed installation?** + + + +``` +the mkmf.log file contents go here +``` + + +**Tell us about your system!** + +What is the output from `ruby -v`? + +What is the output from `gem -v`? + +What is the output from `gem env`? + +``` +the output of "gem env" output goes here +``` + + +If you're using Bundler: +- what is the output from `bundle version`? +- what is the output from `bundle config`? (Take care to redact any credentials) + +``` +the output of "bundle config" goes here +``` + +If you're on MacOS, please note: +- the version of XCode you have installed (if you know) +- the output of `gcc -v` or `clang -v` + +If Linux or a BSD variant, please note: +- the distro you're using +- the output of `uname -a` +- the contents of `/etc/lsb-release`. + +If Windows, please note: +- whether you're installing the precompiled gems, or compiling yourself with DevKit +- the version of RubyInstaller you've installed +- or if you're not using RubyInstaller, how did you install Ruby? diff --git a/.github/ISSUE_TEMPLATE/3-help-request.md b/.github/ISSUE_TEMPLATE/3-help-request.md new file mode 100644 index 0000000000..fec33dfaaf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3-help-request.md @@ -0,0 +1,58 @@ +--- +name: "Help Request" +about: "If something is confusing or you need a helping hand ..." +title: "[help]" +labels: "meta/user-help" +assignees: "" + +--- + +**What problem are you trying to solve?** + + + +**Please show your code!** + + + + +**Environment** + + + +**Additional context** + + diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000..138b21e00a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: Nokogiri Discussion Board + url: https://github.com/sparklemotion/nokogiri/discussions + about: Start a thread about a new feature idea, read the RFCs, ask and answer general questions. diff --git a/.github/ISSUE_TEMPLATE/installation-difficulties.md b/.github/ISSUE_TEMPLATE/installation-difficulties.md deleted file mode 100644 index fee38a0689..0000000000 --- a/.github/ISSUE_TEMPLATE/installation-difficulties.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -name: Installation Difficulties -about: If you're having trouble installing Nokogiri ... -title: Installation difficulties -labels: '' -assignees: '' - ---- - -**Have you read and followed the advice in the installation tutorial at http://www.nokogiri.org/tutorials/installing_nokogiri.html?** - -**What is the complete output of `gem install`?** - -Please make sure the escape the file contents with triple-backticks. - - -**What are the complete contents of the `mkmf.log` file generated during the failed installation?** - -Please make sure the escape the file contents with triple-backticks. - - -**What operating system are you using?** - -Are you using Linux, MacOS, Windows, a BSD variant, or something else entirely? - -If MacOS, please note the version of XCode you have installed. - -If Linux, please include the output of `uname -a` and the contents of `/etc/lsb-release`. - -If Windows, please note whether you're installing the precompiled gems, or compiling yourself with DevKit. Also please note the version of RubyInstaller you've installed. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 55ce82bede..d17a637918 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,24 +1,33 @@ ---- - -Thank you for contributing to Nokogiri! To help us prioritize, please take care to answer the questions below when you submit this pull request. - + **What problem is this PR intended to solve?** -If there is an existing issue that describes this, feel free to simply link to that issue. - -Otherwise, please provide enough context for the Nokogiri maintainers to understand your intent. - + **Have you included adequate test coverage?** -We have a thorough test suite that allows us to create releases confidently and prevent accidental regressions. Any proposed change in behavior __must__ be accompanied by tests. - -If possible, please try to write the tests so that they communicate intent. - - -**Does this change affect the C or the Java implementations?** - -If you're proposing a change to the C implementation, has the behavior change been made to the Java code as well? And vice versa? - -If not, that may be OK, just please note it here. + + +**Does this change affect the behavior of either the C or the Java implementations?** + + diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..8076dd7a64 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "bundler" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..ccb967f4c9 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,728 @@ +name: ci +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3 + push: + branches: + - main + - v*.*.x + tags: + - v*.*.* + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + # + # SECTION pre-checks for fast feedback loops, and to gate the rest of the suite + # + rubocop: + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - run: bundle install --local || bundle install + - run: bundle exec rake rubocop + + basic: + needs: ["rubocop"] + strategy: + fail-fast: false + matrix: + image: ["ubuntu", "ubuntu32"] + sys: ["enable"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:${{matrix.image}} + env: + NOKOGIRI_TEST_GC_LEVEL: normal # for fast feedback + steps: + - uses: actions/checkout@v1 # v1 because of https://github.com/actions/checkout/issues/334 + with: + submodules: true + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + - run: bundle exec rake test:bench + + gumbo: + needs: ["rubocop"] + strategy: + fail-fast: false + matrix: + plat: ["ubuntu", "windows", "macos"] + runs-on: ${{matrix.plat}}-latest + steps: + - name: configure git crlf + if: matrix.plat == 'windows' + run: | + git config --system core.autocrlf false + git config --system core.eol lf + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.1" + bundler-cache: true + bundler: latest + - run: bundle exec rake gumbo:test + + # + # SECTION run the test suite across a broad matrix of rubies, configs, and systems + # + linux: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + - run: bundle exec rake test:bench + + valgrind: + needs: ["linux"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:valgrind + + musl: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:alpine + steps: + - uses: actions/checkout@v1 # v1 because of https://github.com/actions/checkout/issues/334 + with: + submodules: true + # skip cache because of https://github.com/actions/cache/issues/675 + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + musl-valgrind: + needs: ["musl"] + strategy: + fail-fast: false + matrix: + sys: ["disable"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:alpine + steps: + - uses: actions/checkout@v1 # v1 because of https://github.com/actions/checkout/issues/334 + with: + submodules: true + # skip cache because of https://github.com/actions/cache/issues/675 + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:valgrind + + libxmlruby: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable"] + ruby: ["3.1"] + env: + BUNDLE_GEMFILE: "Gemfile-libxml-ruby" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + libxmlruby-valgrind: + needs: ["libxmlruby"] + strategy: + fail-fast: false + matrix: + sys: ["disable"] + ruby: ["3.1"] + env: + BUNDLE_GEMFILE: "Gemfile-libxml-ruby" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:valgrind + + osx: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{matrix.ruby}} + bundler-cache: true + bundler: latest + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-macos-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + windows: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.7", "3.0", "3.1", "3.2", "mingw"] + runs-on: windows-2022 + steps: + - name: configure git crlf + run: | + git config --system core.autocrlf false + git config --system core.eol lf + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby-pkgs@v1 + with: + ruby-version: "${{matrix.ruby}}" + mingw: "libxml2 libxslt" + bundler-cache: true + bundler: latest + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-windows-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + jruby: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + ruby: ["jruby-9.4"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{matrix.ruby}} + bundler-cache: true + bundler: latest + - run: bundle exec rake compile + - run: bundle exec rake test + - run: bundle exec rake test:bench + + truffleruby-head: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + flags: + - "--disable-system-libraries --disable-static" + - "--disable-system-libraries --enable-static" + - "--enable-system-libraries" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:truffle-nightly + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + with: + path: ports/archives + key: tarballs-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- ${{matrix.flags}} + - run: bundle exec rake test + + bsd: + continue-on-error: true # we're seeing VMs hang and fail the whole workflow + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + runs-on: macos-12 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: vmactions/freebsd-vm@v0 + with: + usesh: true + prepare: pkg install -y ruby devel/ruby-gems pkgconf libxml2 libxslt + run: | + gem install bundler + bundle install --local || bundle install + bundle exec rake compile -- --${{matrix.sys}}-system-libraries + bundle exec rake test + + # + # SECTION let's look for memory leaks + # + memcheck: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["disable"] + ruby: ["3.1"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:memcheck + + # + # SECTION the end-to-end gem installation tests + # + rcd_image_version: + needs: ["basic"] + runs-on: ubuntu-latest + outputs: + rcd_image_version: ${{steps.rcd_image_version.outputs.rcd_image_version}} + steps: + - uses: actions/checkout@v3 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.1" + bundler-cache: true + bundler: latest + - id: rcd_image_version + run: bundle exec ruby -e 'require "rake_compiler_dock"; puts "rcd_image_version=#{RakeCompilerDock::IMAGE_VERSION}"' >> $GITHUB_OUTPUT + + generic-package: + needs: ["rcd_image_version"] + name: "generic-package" + runs-on: ubuntu-latest + container: + image: "ghcr.io/rake-compiler/rake-compiler-dock-image:${{needs.rcd_image_version.outputs.rcd_image_version}}-mri-x86_64-linux" + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - run: git config --global --add safe.directory /__w/nokogiri/nokogiri # shrug + - uses: actions/cache@v3 + with: + path: ports/archives + key: tarballs-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: ./scripts/test-gem-build gems ruby + - uses: actions/upload-artifact@v3 + with: + name: generic-gem + path: gems + retention-days: 1 + + generic-linux-install: + needs: ["generic-package"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby-pkgs@v1 + with: + ruby-version: "${{matrix.ruby}}" + apt-get: "libxml2-dev libxslt1-dev pkg-config" + - uses: actions/download-artifact@v3 + with: + name: generic-gem + path: gems + - run: ./scripts/test-gem-install gems --${{matrix.sys}}-system-libraries + + generic-darwin-install: + needs: ["generic-package"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v3 + with: + name: generic-gem + path: gems + - run: ./scripts/test-gem-install gems --${{matrix.sys}}-system-libraries + + generic-windows-install: + needs: ["generic-package"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.7", "3.0"] + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby-pkgs@v1 + with: + ruby-version: "${{matrix.ruby}}" + mingw: "libxml2 libxslt" + - uses: actions/download-artifact@v3 + with: + name: generic-gem + path: gems + - run: ./scripts/test-gem-install gems --${{matrix.sys}}-system-libraries + shell: bash + + generic-windows-install-ucrt: + needs: ["generic-package"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["3.1", "3.2"] + runs-on: windows-2022 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby-pkgs@v1 + with: + ruby-version: "${{matrix.ruby}}" + mingw: "libxml2 libxslt" + - uses: actions/download-artifact@v3 + with: + name: generic-gem + path: gems + - run: ./scripts/test-gem-install gems --${{matrix.sys}}-system-libraries + shell: bash + + cruby-package: + needs: ["rcd_image_version"] + name: "cruby-package" + strategy: + fail-fast: false + matrix: + plat: + - "aarch64-linux" + - "arm-linux" + - "arm64-darwin" # github actions does not support this runtime as of 2022-12, but let's build anyway + - "x64-mingw-ucrt" + - "x64-mingw32" + - "x86-linux" + - "x86-mingw32" # github actions does not support this runtime as of 2022-12, but let's build anyway + - "x86_64-darwin" + - "x86_64-linux" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + with: + path: ports/archives + key: tarballs-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - env: + DOCKER_IMAGE: "ghcr.io/rake-compiler/rake-compiler-dock-image:${{needs.rcd_image_version.outputs.rcd_image_version}}-mri-${{matrix.plat}}" + run: | + docker run --rm -v "$(pwd):/nokogiri" -w /nokogiri \ + ${DOCKER_IMAGE} \ + ./scripts/test-gem-build gems ${{matrix.plat}} + - uses: actions/upload-artifact@v3 + with: + name: "cruby-${{matrix.plat}}-gem" + path: gems + retention-days: 1 + + cruby-x86-linux-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/download-artifact@v3 + with: + name: cruby-x86-linux-gem + path: gems + - run: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + docker run --rm -v "$(pwd):/nokogiri" -w /nokogiri \ + --platform=linux/386 \ + ruby:${{matrix.ruby}} \ + ./scripts/test-gem-install gems + + cruby-aarch64-linux-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/download-artifact@v3 + with: + name: cruby-aarch64-linux-gem + path: gems + - run: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + docker run --rm -v "$(pwd):/nokogiri" -w /nokogiri \ + --platform=linux/arm64/v8 \ + ruby:${{matrix.ruby}} \ + ./scripts/test-gem-install gems + + cruby-arm-linux-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/download-artifact@v3 + with: + name: cruby-arm-linux-gem + path: gems + - run: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + docker run --rm -v "$(pwd):/nokogiri" -w /nokogiri \ + --platform=linux/arm/v7 \ + ruby:${{matrix.ruby}} \ + ./scripts/test-gem-install gems + + cruby-x86_64-linux-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v3 + with: + name: cruby-x86_64-linux-gem + path: gems + - run: ./scripts/test-gem-install gems + + cruby-x86_64-musl-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:alpine + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/download-artifact@v3 + with: + name: cruby-x86_64-linux-gem + path: gems + - run: ./scripts/test-gem-install gems + + cruby-x86_64-darwin-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.7", "3.0", "3.1", "3.2"] + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v3 + with: + name: cruby-x86_64-darwin-gem + path: gems + - run: ./scripts/test-gem-install gems + + cruby-x64-mingw32-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.7", "3.0"] + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v3 + with: + name: cruby-x64-mingw32-gem + path: gems + - run: ./scripts/test-gem-install gems + shell: bash + + cruby-x64-mingw-ucrt-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["3.1", "3.2"] + runs-on: windows-2022 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v3 + with: + name: cruby-x64-mingw-ucrt-gem + path: gems + - run: ./scripts/test-gem-install gems + shell: bash + + jruby-package: + needs: ["rcd_image_version"] + name: "jruby-package" + runs-on: ubuntu-latest + container: + image: "ghcr.io/rake-compiler/rake-compiler-dock-image:${{needs.rcd_image_version.outputs.rcd_image_version}}-jruby" + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - run: ./scripts/test-gem-build gems java + - uses: actions/upload-artifact@v3 + with: + name: jruby-gem + path: gems + retention-days: 1 + + jruby-install: + needs: ["jruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["jruby-9.4"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v3 + with: + name: jruby-gem + path: gems + - run: ./scripts/test-gem-install gems diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml new file mode 100644 index 0000000000..bc1f8705c7 --- /dev/null +++ b/.github/workflows/downstream.yml @@ -0,0 +1,78 @@ +name: downstream +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 1,3,5" # At 08:00 on Monday, Wednesday, and Friday # https://crontab.guru/#0_8_*_*_1,3,5 + push: + branches: + - main + - v*.*.x + tags: + - v*.*.* + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + downstream: + name: downstream-${{matrix.name}} + strategy: + fail-fast: false + matrix: + include: + - url: https://github.com/flavorjones/loofah + name: loofah + command: "bundle exec rake test" + - url: https://github.com/rails/rails-html-sanitizer + name: rails-html-sanitizer + command: "bundle exec rake test" + - url: https://github.com/rgrove/sanitize + name: sanitize + command: "bundle exec rake test" + - url: https://github.com/ebeigarts/signer + name: signer + command: "bundle exec rake spec" + - url: https://github.com/WinRb/Viewpoint + name: viewpoint + command: "bundle exec rspec spec" + - url: https://github.com/rails/rails + name: xmlmini + command: "cd activesupport && bundle exec rake test TESTOPTS=-n/XmlMini/" + - url: https://github.com/pythonicrubyist/creek + name: creek + command: "bundle exec rake spec" + # - url: https://github.com/instructure/nokogiri-xmlsec-instructure + # name: nokogiri-xmlsec-instructure + # precommand: "apt install -y libxmlsec1-dev" + # command: "bundle exec rake compile rspec" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - if: matrix.precommand + run: ${{matrix.precommand}} + - run: gem install bundler -v ">= 2.3.22" # for "add --path" + - run: bundle install --local || bundle install + - run: bundle exec rake compile + - run: git clone --depth=1 ${{matrix.url}} ${{matrix.name}} + - name: ${{matrix.name}} test suite + working-directory: ${{matrix.name}} + run: | + bundle remove nokogiri || true + bundle add nokogiri --path=".." + if grep "add_development_dependency.*\bbundler\b" *gemspec ; then + sed -i 's/.*add_development_dependency.*\bbundler\b.*//' *gemspec + fi + bundle install --local || bundle install + ${{matrix.command}} diff --git a/.github/workflows/generate-ci-images.yml b/.github/workflows/generate-ci-images.yml new file mode 100644 index 0000000000..58cf93aa1f --- /dev/null +++ b/.github/workflows/generate-ci-images.yml @@ -0,0 +1,37 @@ +# DO NOT EDIT +# this file is automatically generated by the "docker:pipeline" rake task +name: Generate CI Images +on: + workflow_dispatch: {} + schedule: + - cron: "0 5 * * 3" # At 05:00 on Wednesday # https://crontab.guru/#0_5_*_*_3 +# reference: https://github.com/marketplace/actions/build-and-push-docker-images +jobs: + build_images: + strategy: + fail-fast: false + matrix: + tag: ["alpine", "mri-2.7", "mri-3.0", "mri-3.1", "mri-3.2", "truffle-nightly", "ubuntu", "ubuntu32"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.1" + bundler-cache: true + bundler: latest + - uses: docker/setup-buildx-action@v2 + - uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{github.actor}} + password: ${{secrets.GITHUB_TOKEN}} + - name: ${{matrix.tag}} + uses: docker/build-push-action@v3 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:${{matrix.tag}} + file: oci-images/nokogiri-test/${{matrix.tag}}.dockerfile diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml new file mode 100644 index 0000000000..1c63c8078a --- /dev/null +++ b/.github/workflows/upstream.yml @@ -0,0 +1,156 @@ +name: upstream +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 1,3,5" # At 08:00 on Monday, Wednesday, and Friday # https://crontab.guru/#0_8_*_*_1,3,5 + pull_request: + types: [opened, synchronize] + branches: + - '*' + paths: + - .github/workflows/upstream.yml # this file + +jobs: + xmlsoft-head: + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Setup libxml2 + run: | + git clone --depth=1 https://gitlab.gnome.org/GNOME/libxml2 + cd libxml2 + env NOCONFIGURE=t ./autogen.sh + - name: Setup libxslt + run: | + git clone --depth=1 https://gitlab.gnome.org/GNOME/libxslt + cd libxslt + env NOCONFIGURE=t ./autogen.sh + - name: "Run bundle install" + run: "bundle install --local || bundle install" + - name: "Compile against libxml2 and libxslt source directories" + run: "bundle exec rake compile -- --with-xml2-source-dir=${GITHUB_WORKSPACE}/libxml2 --with-xslt-source-dir=${GITHUB_WORKSPACE}/libxslt" + - run: "bundle exec rake test" + + xmlsoft-head-valgrind: + needs: ["xmlsoft-head"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Setup libxml2 + run: | + git clone --depth=1 https://gitlab.gnome.org/GNOME/libxml2 + cd libxml2 + env NOCONFIGURE=t ./autogen.sh + - name: Setup libxslt + run: | + git clone --depth=1 https://gitlab.gnome.org/GNOME/libxslt + cd libxslt + env NOCONFIGURE=t ./autogen.sh + - name: "Run bundle install" + run: "bundle install --local || bundle install" + - name: "Compile against libxml2 and libxslt source directories" + run: "bundle exec rake compile -- --with-xml2-source-dir=${GITHUB_WORKSPACE}/libxml2 --with-xslt-source-dir=${GITHUB_WORKSPACE}/libxslt" + - run: "bundle exec rake test:valgrind" + + ruby-head: + strategy: + fail-fast: false + matrix: + plat: ["ubuntu", "windows", "macos"] + sys: ["enable", "disable"] + runs-on: ${{matrix.plat}}-latest + steps: + - name: configure git crlf + if: matrix.plat == 'windows' + run: | + git config --system core.autocrlf false + git config --system core.eol lf + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby-pkgs@v1 + with: + ruby-version: "head" + apt-get: "libxml2-dev libxslt1-dev pkg-config" + mingw: "_upgrade_ libxml2 libxslt pkgconf" + bundler-cache: true + bundler: latest + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-${{matrix.plat}}-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + ruby-head-valgrind: + needs: ["ruby-head"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + runs-on: ubuntu-20.04 # warning that 22.04 binary has dwarf5 debug info that valgrind can't read + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby-pkgs@v1 + with: + ruby-version: "head" + apt-get: "libxml2-dev libxslt1-dev pkg-config valgrind" + bundler-cache: true + bundler: latest + - uses: actions/cache@v3 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:valgrind + + jruby-head: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "jruby-head" + bundler-cache: true + bundler: latest + - run: bundle exec rake compile + - run: bundle exec rake test + + html5lib-tests: + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - uses: actions/cache@v3 + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - name: Update html5lib-tests + run: | + cd test/html5lib-tests + git remote update origin + git checkout origin/master + git log --pretty=oneline -n1 + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --disable-system-libraries + - run: bundle exec rake test diff --git a/.gitignore b/.gitignore index 8746a53f36..ade1b6109b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,37 +1,42 @@ -*.gemspec -*.tmproj -*~ -.*.swp -.bundle +# bundler +/.bundle/ +/Gemfile*lock +/vendor/ + +# building and packaging +/ext/nokogiri/**/nokogiri.dll +/ext/nokogiri/include +/gems/ +/lib/nokogiri/**/nokogiri.bundle +/lib/nokogiri/**/nokogiri.so +/lib/nokogiri/nokogiri.jar +/pkg/ +/ports/ +/tmp/ + +# code coverage +/coverage/ + +# documentation +/html/ + +# editors and tags .classpath .project -.rake_tasks -.ruby-gemset -.ruby-version -.rvmrc .settings -.yardoc -Gemfile*lock -TAGS build -concourse/private.yml -concourse/images/*.generated -coverage -ext/java/Canna -ext/java/nokogiri/**/*.class -ext/nokogiri/*.dll -gems -lib/nokogiri/**/nokogiri.so -lib/nokogiri/nokogiri.bundle -lib/nokogiri/nokogiri.jar -lib/nokogiri/nokogiri.rb -pkg -ports -stash +TAGS tags -test/*/*_mini.rb -test/*_mini.rb -test/test_jruby_footer.rb -test/test_jruby_header.rb -tmp -vendor + +# auto-formatting +/ext/**/*.orig + +# mac +.DS_Store/ + +# Vagrant +/.vagrant/ + +# directories or files named after issues, and debugging +/[0-9][0-9][0-9]* +/*.log diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..57c9b57f2f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "test/html5lib-tests"] + path = test/html5lib-tests + url = https://github.com/html5lib/html5lib-tests.git + branch = master diff --git a/.hoerc b/.hoerc deleted file mode 100644 index 49e6572a18..0000000000 --- a/.hoerc +++ /dev/null @@ -1,41 +0,0 @@ -# -*- yaml -*- ---- -# this regex is what `rake check_manifest` should consider excluded from the gem -exclude: !ruby/regexp '/ -(^\.\/ - ((\.git - |.yardoc - |concourse - |gems - |ports - |suppressions - |tasks - |test - |tmp - )\/) - |\.(autotest - |cross_rubies - |editorconfig - |gemtest - |github - |gitignore - |hoerc - |travis\.yml - ) - |Gemfile.* - |Manifest.txt - |Rakefile - |appveyor\.yml - |build_all - |CHANGELOG.md - |CODE_OF_CONDUCT.md - |CONTRIBUTING.md - |ROADMAP.md - |SECURITY.md - |STANDARD_RESPONSES.md - |Y_U_NO_GEMSPEC.md - |C_CODING_STYLE.* - |patches/sort-patches-by-date -) -|\.gitkeep -/x' diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000000..8c5b9bbe6d --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,23 @@ +require: + - rubocop-minitest + - rubocop-performance + - rubocop-rake +inherit_gem: + rubocop-shopify: rubocop.yml +inherit_from: .rubocop_todo.yml + +AllCops: + NewCops: enable + Exclude: + - 'lib/nokogiri/css/parser.rb' # generated by racc + - 'lib/nokogiri/css/tokenizer.rb' # generated by rex + - 'lib/nokogiri/jruby/nokogiri_jars.rb' # generated by jar-dependencies + - 'test/_test_pattern_matching.rb' # until TargetRubyVersion >= 3.0 + TargetRubyVersion: "2.7" +Naming/MethodName: + Enabled: false +Naming/FileName: + Exclude: + - rakelib/** +Minitest/EmptyLineBeforeAssertionMethods: + Enabled: false diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml new file mode 100644 index 0000000000..4f657f9b9b --- /dev/null +++ b/.rubocop_todo.yml @@ -0,0 +1,69 @@ +# This configuration was generated by +# `rubocop --auto-gen-config --exclude-limit 50` +# on 2022-08-23 18:18:31 UTC using RuboCop version 1.35.1. +# The point is for the user to remove these configuration records +# one by one as the offenses are removed from the code base. +# Note that changes in the inspected code, or installation of new +# versions of RuboCop, may require this file to be generated again. + +# Offense count: 3 +Lint/MissingSuper: + Exclude: + - 'lib/nokogiri/html4/document_fragment.rb' + - 'lib/nokogiri/html5/document_fragment.rb' + - 'lib/nokogiri/xml/document_fragment.rb' + +# Offense count: 4 +# Configuration parameters: CountBlocks. +Metrics/BlockNesting: + Max: 4 + +# Offense count: 2 +# Configuration parameters: Max, CountKeywordArgs. +Metrics/ParameterLists: + MaxOptionalParameters: 4 + +# Offense count: 3 +# Configuration parameters: MinSize. +Performance/CollectionLiteralInLoop: + Exclude: + - 'test/html5/test_tree_construction.rb' + - 'test/xml/test_dtd_encoding.rb' + - 'test/xml/test_node_reparenting.rb' + +# Offense count: 23 +# This cop supports safe autocorrection (--autocorrect). +# Configuration parameters: EnforcedStyle. +# SupportedStyles: def_self, self_class +Style/ClassMethodsDefinitions: + Exclude: + - 'lib/nokogiri/css/xpath_visitor.rb' + - 'lib/nokogiri/html4/document_fragment.rb' + - 'lib/nokogiri/html4/encoding_reader.rb' + - 'lib/nokogiri/html4/sax/parser_context.rb' + - 'lib/nokogiri/html5.rb' + - 'lib/nokogiri/html5/document_fragment.rb' + - 'lib/nokogiri/version/info.rb' + - 'lib/nokogiri/xml/builder.rb' + - 'lib/nokogiri/xml/document_fragment.rb' + - 'lib/nokogiri/xml/entity_decl.rb' + - 'lib/nokogiri/xml/sax/parser_context.rb' + - 'lib/nokogiri/xml/schema.rb' + - 'test/helper.rb' + - 'test/html5/test_serialize.rb' + - 'test/html5/test_tree_construction.rb' + - 'test/test_memory_leak.rb' + - 'test/test_soap4r_sax.rb' + - 'test/xml/test_entity_reference.rb' + +# Offense count: 2 +Style/MissingRespondToMissing: + Exclude: + - 'lib/nokogiri/xml/builder.rb' + +# Offense count: 73 +# This cop supports safe autocorrection (--autocorrect). +# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns. +# URISchemes: http, https +Layout/LineLength: + Max: 250 diff --git a/CHANGELOG.md b/CHANGELOG.md index c613e9554e..2981f55e3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,797 @@ # Nokogiri Changelog +Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [README.md](README.md) for details. + +--- + +## 1.14.2 / 2023-02-13 + +### Fixed + +* Calling `NodeSet#to_html` on an empty node set no longer raises an encoding-related exception. This bug was introduced in v1.14.0 while fixing [#2649](https://github.com/sparklemotion/nokogiri/issues/2649). [[#2784](https://github.com/sparklemotion/nokogiri/issues/2784)] + + +## 1.14.1 / 2023-01-30 + +### Fixed + +* Serializing documents now works again with pseudo-IO objects that don't support IO's encoding API (like rubyzip's `Zip::OutputStream`). This was a regression in v1.14.0 due to the fix for [#752](https://github.com/sparklemotion/nokogiri/issues/752) in [#2434](https://github.com/sparklemotion/nokogiri/issues/2434), and was not completely fixed by [#2753](https://github.com/sparklemotion/nokogiri/issues/2753). [[#2773](https://github.com/sparklemotion/nokogiri/issues/2773)] +* [CRuby] Address compiler warnings about `void*` casting and old-style C function definitions. + + +## 1.14.0 / 2023-01-12 + +### Notable Changes + +#### Ruby + +This release introduces native gem support for Ruby 3.2. (Also see "Technical note" under "Changed" below.) + +This release ends support for: + +* Ruby 2.6, for which [upstream support ended 2022-04-12](https://www.ruby-lang.org/en/downloads/branches/). +* JRuby 9.3, which is not fully compatible with Ruby 2.7+ + + +#### Faster, more reliable installation: Native Gem for `aarch64-linux` (aka `linux/arm64/v8`) + +This version of Nokogiri ships _official_ native gem support for the `aarch64-linux` platform, which should support AWS Graviton and other ARM64 Linux platforms. Please note that glibc >= 2.29 is required for aarch64-linux systems, see [Supported Platforms](https://nokogiri.org/#supported-platforms) for more information. + + +#### Faster, more reliable installation: Native Gem for `arm-linux` (aka `linux/arm/v7`) + +This version of Nokogiri ships _experimental_ native gem support for the `arm-linux` platform. Please note that glibc >= 2.29 is required for arm-linux systems, see [Supported Platforms](https://nokogiri.org/#supported-platforms) for more information. + + +#### Pattern matching + +This version introduces an _experimental_ pattern matching API for `XML::Attr`, `XML::Document`, `XML::DocumentFragment`, `XML::Namespace`, `XML::Node`, and `XML::NodeSet` (and their subclasses). + +Some documentation on what can be matched: + +* [`XML::Attr#deconstruct_keys`](https://nokogiri.org/rdoc/Nokogiri/XML/Attr.html?h=deconstruct#method-i-deconstruct_keys) +* [`XML::Document#deconstruct_keys`](https://nokogiri.org/rdoc/Nokogiri/XML/Document.html?h=deconstruct#method-i-deconstruct_keys) +* [`XML::Namespace#deconstruct_keys`](https://nokogiri.org/rdoc/Nokogiri/XML/Namespace.html?h=deconstruct+namespace#method-i-deconstruct_keys) +* [`XML::Node#deconstruct_keys`](https://nokogiri.org/rdoc/Nokogiri/XML/Node.html?h=deconstruct#method-i-deconstruct_keys) +* [`XML::DocumentFragment#deconstruct`](https://nokogiri.org/rdoc/Nokogiri/XML/DocumentFragment.html?h=deconstruct#method-i-deconstruct) +* [`XML::NodeSet#deconstruct`](https://nokogiri.org/rdoc/Nokogiri/XML/NodeSet.html?h=deconstruct#method-i-deconstruct) + +We welcome feedback on this API at [#2360](https://github.com/sparklemotion/nokogiri/issues/2360). + + +### Dependencies + +#### CRuby + +* Vendored libiconv is updated to [v1.17](https://savannah.gnu.org/forum/forum.php?forum_id=10175) + +#### JRuby + +* This version of Nokogiri uses [`jar-dependencies`](https://github.com/mkristian/jar-dependencies) to manage most of the vendored Java dependencies. `nokogiri -v` now outputs maven metadata for all Java dependencies, and `Nokogiri::VERSION_INFO` also contains this metadata. [[#2432](https://github.com/sparklemotion/nokogiri/issues/2432)] +* HTML parsing is now provided by `net.sourceforge.htmlunit:neko-htmlunit:2.61.0` (previously Nokogiri used a fork of `org.cyberneko.html:nekohtml`) +* Vendored Jing is updated from `com.thaiopensource:jing:20091111` to `nu.validator:jing:20200702VNU`. +* New dependency on `net.sf.saxon:Saxon-HE:9.6.0-4` (via `nu.validator:jing:20200702VNU`). + + +### Added + +* `Node#wrap` and `NodeSet#wrap` now also accept a `Node` type argument, which will be `dup`ed for each wrapper. For cases where many nodes are being wrapped, creating a `Node` once using `Document#create_element` and passing that `Node` multiple times is significantly faster than re-parsing markup on each call. [[#2657](https://github.com/sparklemotion/nokogiri/issues/2657)] +* [CRuby] Invocation of custom XPath or CSS handler functions may now use the `nokogiri` namespace prefix. Historically, the JRuby implementation _required_ this namespace but the CRuby implementation did not support it. It's recommended that all XPath and CSS queries use the `nokogiri` namespace going forward. Invocation without the namespace is planned for deprecation in v1.15.0 and removal in a future release. [[#2147](https://github.com/sparklemotion/nokogiri/issues/2147)] +* `HTML5::Document#quirks_mode` and `HTML5::DocumentFragment#quirks_mode` expose the quirks mode used by the parser. + + +### Improved + +#### Functional + +* HTML5 parser update to reflect changes to the living specification: + * [Add the <search> element by domenic · whatwg/html](https://github.com/whatwg/html/pull/7320) + * [Remove parse error for <template><tr></tr> </template> by zcorpan · whatwg/html](https://github.com/whatwg/html/pull/8271) + +#### Performance + +* Serialization of HTML5 documents and fragments has been re-implemented and is ~10x faster than previous versions. [[#2596](https://github.com/sparklemotion/nokogiri/issues/2596), [#2569](https://github.com/sparklemotion/nokogiri/issues/2569)] +* Parsing of HTML5 documents is ~90% faster thanks to additional compiler optimizations being applied. [[#2639](https://github.com/sparklemotion/nokogiri/issues/2639)] +* Compare `Encoding` objects rather than compare their names. This is a slight performance improvement and is future-proof. [[#2454](https://github.com/sparklemotion/nokogiri/issues/2454)] (Thanks, [@casperisfine](https://github.com/casperisfine)!) + +#### Error handling + +* `Document#canonicalize` now raises an exception if `inclusive_namespaces` is non-nil and the mode is inclusive, i.e. `XML_C14N_1_0` or `XML_C14N_1_1`. `inclusive_namespaces` can only be passed with exclusive modes, and previously this silently failed. +* Empty CSS selectors now raise a clearer `Nokogiri::CSS::SyntaxError` message, "empty CSS selector". Previously the exception raised from the bowels of `racc` was "unexpected '$' after ''". [[#2700](https://github.com/sparklemotion/nokogiri/issues/2700)] +* [CRuby] `XML::Reader` parsing errors encountered during `Reader#attribute_hash` and `Reader#namespaces` now raise an `XML::SyntaxError`. Previously these methods would return `nil` and users would generally experience `NoMethodErrors` from elsewhere in the code. +* Prefer `ruby_xmalloc` to `malloc` within the C extension. [[#2480](https://github.com/sparklemotion/nokogiri/issues/2480)] (Thanks, [@Garfield96](https://github.com/Garfield96)!) + +#### Installation + +* Avoid compile-time conflict with system-installed `gumbo.h` on OpenBSD. [[#2464](https://github.com/sparklemotion/nokogiri/issues/2464)] +* Remove calls to `vasprintf` in favor of platform-independent `rb_vsprintf` +* Installation from source on systems missing libiconv will once again generate a helpful error message (broken since v1.11.0). [[#2505](https://github.com/sparklemotion/nokogiri/issues/2505)] +* [CRuby+OSX] Compiling from source on MacOS will use the clang option `-Wno-unknown-warning-option` to avoid errors when Ruby injects options that clang doesn't know about. [[#2689](https://github.com/sparklemotion/nokogiri/issues/2689)] + + +### Fixed + +* `SAX::Parser`'s `encoding` attribute will not be clobbered when an alternative encoding is passed into `SAX::Parser#parse_io`. [[#1942](https://github.com/sparklemotion/nokogiri/issues/1942)] (Thanks, [@kp666](https://github.com/kp666)!) +* Serialized `HTML4::DocumentFragment` will now be properly encoded. Previously this empty string was encoded as `US-ASCII`. [[#2649](https://github.com/sparklemotion/nokogiri/issues/2649)] +* `Node#wrap` now uses the parent as the context node for parsing wrapper markup, falling back to the document for unparented nodes. Previously the document was always used. +* [CRuby] UTF-16-encoded documents longer than ~4000 code points now serialize properly. Previously the serialized document was corrupted when it exceeded the length of libxml2's internal string buffer. [[#752](https://github.com/sparklemotion/nokogiri/issues/752)] +* [CRuby] The HTML5 parser now correctly handles text at the end of `form` elements. +* [CRuby] `HTML5::Document#fragment` now always uses `body` as the parsing context. Previously, fragments were parsed in the context of the associated document's root node, which allowed for inconsistent parsing. [[#2553](https://github.com/sparklemotion/nokogiri/issues/2553)] +* [CRuby] `Nokogiri::HTML5::Document#url` now correctly returns the URL passed to the constructor method. Previously it always returned `nil`. [[#2583](https://github.com/sparklemotion/nokogiri/issues/2583)] +* [CRuby] `HTML5` encoding detection is now case-insensitive with respect to `meta` tag charset declaration. [[#2693](https://github.com/sparklemotion/nokogiri/issues/2693)] +* [CRuby] `HTML5` fragment parsing in context of an annotation-xml node now works. Previously this rarely-used path invoked rb_funcall with incorrect parameters, resulting in an exception, a fatal error, or potentially a segfault. [[#2692](https://github.com/sparklemotion/nokogiri/issues/2692)] +* [CRuby] `HTML5` quirks mode during fragment parsing more closely matches document parsing. [[#2646](https://github.com/sparklemotion/nokogiri/issues/2646)] +* [JRuby] Fixed a bug with adding the same namespace to multiple nodes via `#add_namespace_definition`. [[#1247](https: 2.6.1` to `~> 2.7.0`. ("ruby" platform gem only.) + + +### Improved + +* `{XML,HTML4}::DocumentFragment` constructors all now take an optional parse options parameter or block (similar to Document constructors). [[#1692](https://github.com/sparklemotion/nokogiri/issues/1692)] (Thanks, [@JackMc](https://github.com/JackMc)!) +* `Nokogiri::CSS.xpath_for` allows an `XPathVisitor` to be injected, for finer-grained control over how CSS queries are translated into XPath. +* [CRuby] `XML::Reader#encoding` will return the encoding detected by the parser when it's not passed to the constructor. [[#980](https://github.com/sparklemotion/nokogiri/issues/980)] +* [CRuby] Handle abruptly-closed HTML comments as recommended by WHATWG. (Thanks to [tehryanx](https://hackerone.com/tehryanx?type=user) for reporting!) +* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES`, which is turned on by default in `ParseOptions::DEFAULT_{XML,XSLT,HTML,SCHEMA}` (Note that JRuby already supported large line numbers.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)] +* [CRuby] If a cycle is introduced when reparenting a node (i.e., the node becomes its own ancestor), a `RuntimeError` is raised. libxml2 does no checking for this, which means cycles would otherwise result in infinite loops on subsequent operations. (Note that JRuby already did this.) [[#1912](https://github.com/sparklemotion/nokogiri/issues/1912)] +* [CRuby] Source builds will download zlib and libiconv via HTTPS. ("ruby" platform gem only.) [[#2391](https://github.com/sparklemotion/nokogiri/issues/2391)] (Thanks, [@jmartin-r7](https://github.com/jmartin-r7)!) +* [JRuby] `Node#line` behavior has been modified to return the line number of the node in the _final DOM structure_. This behavior is different from CRuby, which returns the node's position in the _input string_. Ideally the two implementations would be the same, but at least is now officially documented and tested. The real-world impact of this change is that the value returned in JRuby is greater by 1 to account for the XML prolog in the output. [[#2380](https://github.com/sparklemotion/nokogiri/issues/2380)] (Thanks, [@dabdine](https://github.com/dabdine)!) + + +### Fixed + +* CSS queries on HTML5 documents now correctly match foreign elements (SVG, MathML) when namespaces are not specified in the query. [[#2376](https://github.com/sparklemotion/nokogiri/issues/2376)] +* `XML::Builder` blocks restore context properly when exceptions are raised. [[#2372](https://github.com/sparklemotion/nokogiri/issues/2372)] (Thanks, [@ric2b](https://github.com/ric2b) and [@rinthedev](https://github.com/rinthedev)!) +* The `Nokogiri::CSS::Parser` cache now uses the `XPathVisitor` configuration as part of the cache key, preventing incorrect cache results from being returned when multiple `XPathVisitor` options are being used. +* Error recovery from in-context parsing (e.g., `Node#parse`) now always uses the correct `DocumentFragment` class. Previously `Nokogiri::HTML4::DocumentFragment` was always used, even for XML documents. [[#1158](https://github.com/sparklemotion/nokogiri/issues/1158)] +* `DocumentFragment#>` now works properly, matching a CSS selector against only the fragment roots. [[#1857](https://github.com/sparklemotion/nokogiri/issues/1857)] +* `XML::DocumentFragment#errors` now correctly contains any parsing errors encountered. Previously this was always empty. (Note that `HTML::DocumentFragment#errors` already did this.) +* [CRuby] Fix memory leak in `Document#canonicalize` when inclusive namespaces are passed in. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)] +* [CRuby] Fix memory leak in `Document#canonicalize` when an argument type error is raised. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)] +* [CRuby] Fix memory leak in `EncodingHandler` where iconv handlers were not being cleaned up. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)] +* [CRuby] Fix memory leak in XPath custom handlers where string arguments were not being cleaned up. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)] +* [CRuby] Fix memory leak in `Reader#base_uri` where the string returned by libxml2 was not freed. [[#2347](https://github.com/sparklemotion/nokogiri/issues/2347)] +* [JRuby] Deleting a `Namespace` from a `NodeSet` no longer modifies the `href` to be the default namespace URL. +* [JRuby] Fix XHTML formatting of closing tags for non-container elements. [[#2355](https://github.com/sparklemotion/nokogiri/issues/2355)] + + +### Deprecated + +* Passing a `Nokogiri::XML::Node` as the second parameter to `Node.new` is deprecated and will generate a warning. This parameter should be a kind of `Nokogiri::XML::Document`. This will become an error in a future version of Nokogiri. [[#975](https://github.com/sparklemotion/nokogiri/issues/975)] +* `Nokogiri::CSS::Parser`, `Nokogiri::CSS::Tokenizer`, and `Nokogiri::CSS::Node` are now internal-only APIs that are no longer documented, and should not be considered stable. With the introduction of `XPathVisitor` injection into `Nokogiri::CSS.xpath_for` there should be no reason to rely on these internal APIs. +* CSS-to-XPath utility classes `Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins` and `XPathVisitorOptimallyUseBuiltins` are deprecated. Prefer `Nokogiri::CSS::XPathVisitor` with appropriate constructor arguments. These classes will be removed in a future version of Nokogiri. + + +## 1.12.5 / 2021-09-27 + +### Security + +[JRuby] Address CVE-2021-41098 ([GHSA-2rr5-8q37-2w7h](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-2rr5-8q37-2w7h)). + +In Nokogiri v1.12.4 and earlier, on JRuby only, the SAX parsers resolve external entities (XXE) by default. This fix turns off entity-resolution-by-default in the JRuby SAX parsers to match the CRuby SAX parsers' behavior. + +CRuby users are not affected by this CVE. + + +### Fixed + +* [CRuby] `Document#to_xhtml` properly serializes self-closing tags in libxml > 2.9.10. A behavior change introduced in libxml 2.9.11 resulted in emitting start and and tags (e.g., `

`) instead of a self-closing tag (e.g., `
`) in previous Nokogiri versions. [[#2324](https://github.com/sparklemotion/nokogiri/issues/2324)] + + +## 1.12.4 / 2021-08-29 + +### Notable fix: Namespace inheritance + +Namespace behavior when reparenting nodes has historically been poorly specified and the behavior diverged between CRuby and JRuby. As a result, making this behavior consistent in v1.12.0 introduced a breaking change. + +This patch release reverts the Builder behavior present in v1.12.0..v1.12.3 but keeps the Document behavior. This release also introduces a Document attribute to allow affected users to easily change this behavior for their legacy code without invasive changes. + + +#### Compensating Feature in XML::Document + +This release of Nokogiri introduces a new `Document` boolean attribute, `namespace_inheritance`, which controls whether children should inherit a namespace when they are reparented. `Nokogiri::XML:Document` defaults this attribute to `false` meaning "do not inherit," thereby making explicit the behavior change introduced in v1.12.0. + +CRuby users who desire the pre-v1.12.0 behavior may set `document.namespace_inheritance = true` before reparenting nodes. + +See https://nokogiri.org/rdoc/Nokogiri/XML/Document.html#namespace_inheritance-instance_method for example usage. + + +#### Fix for XML::Builder + +However, recognizing that we want `Builder`-created children to inherit namespaces, Builder now will set `namespace_inheritance=true` on the underlying document for both JRuby and CRuby. This means that, on CRuby, the pre-v1.12.0 behavior is restored. + +Users who want to turn this behavior off may pass a keyword argument to the Builder constructor like so: + +``` ruby +Nokogiri::XML::Builder.new(namespace_inheritance: false) +``` + +See https://nokogiri.org/rdoc/Nokogiri/XML/Builder.html#label-Namespace+inheritance for example usage. + + +#### Downstream gem maintainers + +Note that any downstream gems may want to specifically omit Nokogiri v1.12.0--v1.12.3 from their dependency specification if they rely on child namespace inheritance: + +``` ruby +Gem::Specification.new do |gem| + # ... + gem.add_runtime_dependency 'nokogiri', '!=1.12.3', '!=1.12.2', '!=1.12.1', '!=1.12.0' + # ... +end +``` + + +### Fixed + +* [JRuby] Fix NPE in Schema parsing when an imported resource doesn't have a `systemId`. [[#2296](https://github.com/sparklemotion/nokogiri/issues/2296)] (Thanks, [@pepijnve](https://github.com/pepijnve)!) + + +## 1.12.3 / 2021-08-10 + +### Fixed + +* [CRuby] Fix compilation of libgumbo on older systems with versions of GCC that give errors on C99-isms. Affected systems include RHEL6, RHEL7, and SLES12. [[#2302](https://github.com/sparklemotion/nokogiri/issues/2302)] + + +## 1.12.2 / 2021-08-04 + +### Fixed + +* [CRuby] Ensure that C extension files in non-native gem installations are loaded using `require` and rely on `$LOAD_PATH` instead of using `require_relative`. This issue only exists when deleting shared libraries that exist outside the extensions directory, something users occasionally do to conserve disk space. [[#2300](https://github.com/sparklemotion/nokogiri/issues/2300)] + + +## 1.12.1 / 2021-08-03 + +### Fixed + +* [CRuby] Fix compilation of libgumbo on BSD systems by avoiding GNU-isms. [[#2298](https://github.com/sparklemotion/nokogiri/issues/2298)] + + +## 1.12.0 / 2021-08-02 + +### Notable Addition: HTML5 Support (CRuby only) + +__HTML5 support__ has been added (to CRuby only) by merging [Nokogumbo](https://github.com/rubys/nokogumbo) into Nokogiri. The Nokogumbo public API has been preserved, so this functionality is available under the `Nokogiri::HTML5` namespace. [[#2204](https://github.com/sparklemotion/nokogiri/issues/2204)] + +Please note that HTML5 support is not available for JRuby in this version. However, we feel it is important to think about JRuby and we hope to work on this in the future. If you're interested in helping with HTML5 support on JRuby, please reach out to the maintainers by commenting on issue [#2227](https://github.com/sparklemotion/nokogiri/issues/2227). + +Many thanks to Sam Ruby, Steve Checkoway, and Craig Barnes for creating and maintaining Nokogumbo and supporting the Gumbo HTML5 parser. They're now Nokogiri core contributors with all the powers and privileges pertaining thereto. 🙌 + + +### Notable Change: `Nokogiri::HTML4` module and namespace + +`Nokogiri::HTML` has been renamed to `Nokogiri::HTML4`, and `Nokogiri::HTML` is aliased to preserve backwards-compatibility. `Nokogiri::HTML` and `Nokogiri::HTML4` parse methods still use libxml2's (or NekoHTML's) HTML4 parser in the v1.12 release series. + +Take special note that if you rely on the class name of an object in your code, objects will now report a class of `Nokogiri::HTML4::Foo` where they previously reported `Nokogiri::HTML::Foo`. Instead of relying on the string returned by `Object#class`, prefer `Class#===` or `Object#is_a?` or `Object#instance_of?`. + +Future releases of Nokogiri may deprecate `HTML` methods or otherwise change this behavior, so please start using `HTML4` in place of `HTML`. + + +### Added + +* [CRuby] `Nokogiri::VERSION_INFO["libxslt"]["datetime_enabled"]` is a new boolean value which describes whether libxslt (or, more properly, libexslt) has compiled-in datetime support. This generally going to be `true`, but some distros ship without this support (e.g., some mingw UCRT-based packages, see https://github.com/msys2/MINGW-packages/pull/8957). See [#2272](https://github.com/sparklemotion/nokogiri/issues/2272) for more details. + + +### Changed + +* Introduce a new constant, `Nokogiri::XML::ParseOptions::DEFAULT_XSLT`, which adds the libxslt-preferred options of `NOENT | DTDLOAD | DTDATTR | NOCDATA` to `ParseOptions::DEFAULT_XML`. +* `Nokogiri.XSLT` parses stylesheets using `ParseOptions::DEFAULT_XSLT`, which should make some edge-case XSL transformations match libxslt's default behavior. [[#1940](https://github.com/sparklemotion/nokogiri/issues/1940)] + + +### Fixed + +* [CRuby] Namespaced attributes are handled properly when their parent node is reparented into another document. Previously, the namespace may have gotten dropped. [[#2228](https://github.com/sparklemotion/nokogiri/issues/2228)] +* [CRuby] Reparented nodes no longer inherit their parent's namespace. Previously, a node without a namespace was forced to adopt its parent's namespace. [[#1712](https://github.com/sparklemotion/nokogiri/issues/1712), [#425](https://github.com/sparklemotion/nokogiri/issues/425)] + + +### Improved + +* [CRuby] Speed up (slightly) the compile time of packaged libraries `libiconv`, `libxml2`, and `libxslt` by using autoconf's `--disable-dependency-tracking` option. ("ruby" platform gem only.) + + +### Deprecated + +* Deprecating Nokogumbo's `Nokogiri::HTML5.get`. This method will be removed in a future version of Nokogiri. + + +### Dependencies + +* [CRuby] Upgrade mini_portile2 dependency from `~> 2.5.0` to `~> 2.6.1`. ("ruby" platform gem only.) + + +## 1.11.7 / 2021-06-02 + +### Fixed + +* [CRuby] Backporting an upstream fix to XPath recursion depth limits which impacted some users of complex XPath queries. This issue is present in libxml 2.9.11 and 2.9.12. [[#2257](https://github.com/sparklemotion/nokogiri/issues/2257)] + + +## 1.11.6 / 2021-05-26 + +### Fixed + +* [CRuby] `DocumentFragment#path` now does proper error-checking to handle behavior introduced in libxml > 2.9.10. In v1.11.4 and v1.11.5, calling `DocumentFragment#path` could result in a segfault. + + +## 1.11.5 / 2021-05-19 + +### Fixed + +[Windows CRuby] Work around segfault at process exit on Windows when using libxml2 system DLLs. + +libxml 2.9.12 introduced new behavior to avoid memory leaks when unloading libxml2 shared libraries (see [libxml/!66](https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/66)). Early testing caught this segfault on non-Windows platforms (see [#2059](https://github.com/sparklemotion/nokogiri/issues/2059) and [libxml@956534e](https://gitlab.gnome.org/GNOME/libxml2/-/commit/956534e02ef280795a187c16f6ac04e107f23c5d)) but it was incompletely fixed and is still an issue on Windows platforms that are using system DLLs. + +We work around this by configuring libxml2 in this situation to use its default memory management functions. Note that if Nokogiri is not on Windows, or is not using shared system libraries, it will will continue to configure libxml2 to use Ruby's memory management functions. `Nokogiri::VERSION_INFO["libxml"]["memory_management"]` will allow you to verify when the default memory management functions are being used. [[#2241](https://github.com/sparklemotion/nokogiri/issues/2241)] + + +### Added + +`Nokogiri::VERSION_INFO["libxml"]` now contains the key `"memory_management"` to declare whether libxml2 is using its `default` memory management functions, or whether it uses the memory management functions from `ruby`. See above for more details. + + +## 1.11.4 / 2021-05-14 + +### Security + +[CRuby] Vendored libxml2 upgraded to v2.9.12 which addresses: + +* [CVE-2019-20388](https://security.archlinux.org/CVE-2019-20388) +* [CVE-2020-24977](https://security.archlinux.org/CVE-2020-24977) +* [CVE-2021-3517](https://security.archlinux.org/CVE-2021-3517) +* [CVE-2021-3518](https://security.archlinux.org/CVE-2021-3518) +* [CVE-2021-3537](https://security.archlinux.org/CVE-2021-3537) +* [CVE-2021-3541](https://security.archlinux.org/CVE-2021-3541) + +Note that two additional CVEs were addressed upstream but are not relevant to this release. [CVE-2021-3516](https://security.archlinux.org/CVE-2021-3516) via `xmllint` is not present in Nokogiri, and [CVE-2020-7595](https://security.archlinux.org/CVE-2020-7595) has been patched in Nokogiri since v1.10.8 (see [#1992](https://github.com/sparklemotion/nokogiri/issues/1992)). + +Please see [nokogiri/GHSA-7rrm-v45f-jp64 ](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-7rrm-v45f-jp64) or [#2233](https://github.com/sparklemotion/nokogiri/issues/2233) for a more complete analysis of these CVEs and patches. + + +### Dependencies + +* [CRuby] vendored libxml2 is updated from 2.9.10 to 2.9.12. (Note that 2.9.11 was skipped because it was superseded by 2.9.12 a few hours after its release.) + + +## 1.11.3 / 2021-04-07 + +### Fixed + +* [CRuby] Passing non-`Node` objects to `Document#root=` now raises an `ArgumentError` exception. Previously this likely segfaulted. [[#1900](https://github.com/sparklemotion/nokogiri/issues/1900)] +* [JRuby] Passing non-`Node` objects to `Document#root=` now raises an `ArgumentError` exception. Previously this raised a `TypeError` exception. +* [CRuby] arm64/aarch64 systems (like Apple's M1) can now compile libxml2 and libxslt from source (though we continue to strongly advise users to install the native gems for the best possible experience) + + +## 1.11.2 / 2021-03-11 + +### Fixed + +* [CRuby] `NodeSet` may now safely contain `Node` objects from multiple documents. Previously the GC lifecycle of the parent `Document` objects could lead to nodes being GCed while still in scope. [[#1952](https://github.com/sparklemotion/nokogiri/issues/1952#issuecomment-770856928)] +* [CRuby] Patch libxml2 to avoid "huge input lookup" errors on large CDATA elements. (See upstream [GNOME/libxml2#200](https://gitlab.gnome.org/GNOME/libxml2/-/issues/200) and [GNOME/libxml2!100](https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/100).) [[#2132](https://github.com/sparklemotion/nokogiri/issues/2132)]. +* [CRuby+Windows] Enable Nokogumbo (and other downstream gems) to compile and link against `nokogiri.so` by including `LDFLAGS` in `Nokogiri::VERSION_INFO`. [[#2167](https://github.com/sparklemotion/nokogiri/issues/2167)] +* [CRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was invoked twice on each object. +* [JRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was not called, which was a problem for subclassing such as done by `Loofah`. + + +### Improved + +* Reduce the number of object allocations needed when parsing an `HTML::DocumentFragment`. [[#2087](https://github.com/sparklemotion/nokogiri/issues/2087)] (Thanks, [@ashmaroli](https://github.com/ashmaroli)!) +* [JRuby] Update the algorithm used to calculate `Node#line` to be wrong less-often. The underlying parser, Xerces, does not track line numbers, and so we've always used a hacky solution for this method. [[#1223](https://github.com/sparklemotion/nokogiri/issues/1223), [#2177](https://github.com/sparklemotion/nokogiri/issues/2177)] +* Introduce `--enable-system-libraries` and `--disable-system-libraries` flags to `extconf.rb`. These flags provide the same functionality as `--use-system-libraries` and the `NOKOGIRI_USE_SYSTEM_LIBRARIES` environment variable, but are more idiomatic. [[#2193](https://github.com/sparklemotion/nokogiri/issues/2193)] (Thanks, [@eregon](https://github.com/eregon)!) +* [TruffleRuby] `--disable-static` is now the default on TruffleRuby when the packaged libraries are used. This is more flexible and compiles faster. (Note, though, that the default on TR is still to use system libraries.) [[#2191](https://github.com/sparklemotion/nokogiri/issues/2191#issuecomment-780724627), [#2193](https://github.com/sparklemotion/nokogiri/issues/2193)] (Thanks, [@eregon](https://github.com/eregon)!) + + +### Changed + +* `Nokogiri::XML::Path` is now a Module (previously it has been a Class). It has been acting solely as a Module since v1.0.0. See [8461c74](https://github.com/sparklemotion/nokogiri/commit/8461c74). + + +## 1.11.1 / 2021-01-06 + +### Fixed + +* [CRuby] If `libxml-ruby` is loaded before `nokogiri`, the SAX and Push parsers no longer call `libxml-ruby`'s handlers. Instead, they defensively override the libxml2 global handler before parsing. [[#2168](https://github.com/sparklemotion/nokogiri/issues/2168)] + + +## 1.11.0 / 2021-01-03 + +### Notes + +#### Faster, more reliable installation: Native Gems for Linux and OSX/Darwin + +"Native gems" contain pre-compiled libraries for a specific machine architecture. On supported platforms, this removes the need for compiling the C extension and the packaged libraries. This results in **much faster installation** and **more reliable installation**, which as you probably know are the biggest headaches for Nokogiri users. + +We've been shipping native Windows gems since 2009, but starting in v1.11.0 we are also shipping native gems for these platforms: + +* Linux: `x86-linux` and `x86_64-linux` -- including musl platforms like alpine +* OSX/Darwin: `x86_64-darwin` and `arm64-darwin` + +We'd appreciate your thoughts and feedback on this work at [#2075](https://github.com/sparklemotion/nokogiri/issues/2075). + + +### Dependencies + +#### Ruby + +This release introduces support for Ruby 2.7 and 3.0 in the precompiled native gems. + +This release ends support for: + +* Ruby 2.3, for which [official support ended on 2019-03-31](https://www.ruby-lang.org/en/news/2019/03/31/support-of-ruby-2-3-has-ended/) [[#1886](https://github.com/sparklemotion/nokogiri/issues/1886)] (Thanks [@ashmaroli](https://github.com/ashmaroli)!) +* Ruby 2.4, for which [official support ended on 2020-04-05](https://www.ruby-lang.org/en/news/2020/04/05/support-of-ruby-2-4-has-ended/) +* JRuby 9.1, which is the Ruby 2.3-compatible release. + + +#### Gems + +* Explicitly add racc as a runtime dependency. [[#1988](https://github.com/sparklemotion/nokogiri/issues/1988)] (Thanks, [@voxik](https://github.com/voxik)!) +* [MRI] Upgrade mini_portile2 dependency from `~> 2.4.0` to `~> 2.5.0` [[#2005](https://github.com/sparklemotion/nokogiri/issues/2005)] (Thanks, [@alejandroperea](https://github.com/alejandroperea)!) + + +### Security + +See note below about CVE-2020-26247 in the "Changed" subsection entitled "XML::Schema parsing treats input as untrusted by default". + + +### Added + +* Add Node methods for manipulating "keyword attributes" (for example, `class` and `rel`): `#kwattr_values`, `#kwattr_add`, `#kwattr_append`, and `#kwattr_remove`. [[#2000](https://github.com/sparklemotion/nokogiri/issues/2000)] +* Add support for CSS queries `a:has(> b)`, `a:has(~ b)`, and `a:has(+ b)`. [[#688](https://github.com/sparklemotion/nokogiri/issues/688)] (Thanks, [@jonathanhefner](https://github.com/jonathanhefner)!) +* Add `Node#value?` to better match expected semantics of a Hash-like object. [[#1838](https://github.com/sparklemotion/nokogiri/issues/1838), [#1840](https://github.com/sparklemotion/nokogiri/issues/1840)] (Thanks, [@MatzFan](https://github.com/MatzFan)!) +* [CRuby] Add `Nokogiri::XML::Node#line=` for use by downstream libs like nokogumbo. [[#1918](https://github.com/sparklemotion/nokogiri/issues/1918)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!) +* `nokogiri.gemspec` is back after a 10-year hiatus. We still prefer you use the official releases, but `main` is pretty stable these days, and YOLO. + + +### Performance + +* [CRuby] The CSS `~=` operator and class selector `.` are about 2x faster. [[#2137](https://github.com/sparklemotion/nokogiri/issues/2137), [#2135](https://github.com/sparklemotion/nokogiri/issues/2135)] +* [CRuby] Patch libxml2 to call `strlen` from `xmlStrlen` rather than the naive implementation, because `strlen` is generally optimized for the architecture. [[#2144](https://github.com/sparklemotion/nokogiri/issues/2144)] (Thanks, [@ilyazub](https://github.com/ilyazub)!) +* Improve performance of some namespace operations. [[#1916](https://github.com/sparklemotion/nokogiri/issues/1916)] (Thanks, [@ashmaroli](https://github.com/ashmaroli)!) +* Remove unnecessary array allocations from Node serialization methods [[#1911](https://github.com/sparklemotion/nokogiri/issues/1911)] (Thanks, [@ashmaroli](https://github.com/ashmaroli)!) +* Avoid creation of unnecessary zero-length String objects. [[#1970](https://github.com/sparklemotion/nokogiri/issues/1970)] (Thanks, [@ashmaroli](https://github.com/ashmaroli)!) +* Always compile libxml2 and libxslt with '-O2' [[#2022](https://github.com/sparklemotion/nokogiri/issues/2022), [#2100](https://github.com/sparklemotion/nokogiri/issues/2100)] (Thanks, [@ilyazub](https://github.com/ilyazub)!) +* [JRuby] Lots of code cleanup and performance improvements. [[#1934](https://github.com/sparklemotion/nokogiri/issues/1934)] (Thanks, [@kares](https://github.com/kares)!) +* [CRuby] `RelaxNG.from_document` no longer leaks memory. [[#2114](https://github.com/sparklemotion/nokogiri/issues/2114)] + + +### Improved + +* [CRuby] Handle incorrectly-closed HTML comments as WHATWG recommends for browsers. [[#2058](https://github.com/sparklemotion/nokogiri/issues/2058)] (Thanks to HackerOne user [mayflower](https://hackerone.com/mayflower?type=user) for reporting this!) +* `{HTML,XML}::Document#parse` now accept `Pathname` objects. Previously this worked only if the referenced file was less than 4096 bytes long; longer files resulted in undefined behavior because the `read` method would be repeatedly invoked. [[#1821](https://github.com/sparklemotion/nokogiri/issues/1821), [#2110](https://github.com/sparklemotion/nokogiri/issues/2110)] (Thanks, [@doriantaylor](https://github.com/doriantaylor) and [@phokz](https://github.com/phokz)!) +* [CRuby] Nokogumbo builds faster because it can now use header files provided by Nokogiri. [[#1788](https://github.com/sparklemotion/nokogiri/issues/1788)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!) +* Add `frozen_string_literal: true` magic comment to all `lib` files. [[#1745](https://github.com/sparklemotion/nokogiri/issues/1745)] (Thanks, [@oniofchaos](https://github.com/oniofchaos)!) +* [JRuby] Clean up deprecated calls into JRuby. [[#2027](https://github.com/sparklemotion/nokogiri/issues/2027)] (Thanks, [@headius](https://github.com/headius)!) + + +### Fixed + +* HTML Parsing in "strict" mode (i.e., the `RECOVER` parse option not set) now correctly raises a `XML::SyntaxError` exception. Previously the value of the `RECOVER` bit was being ignored by CRuby and was misinterpreted by JRuby. [[#2130](https://github.com/sparklemotion/nokogiri/issues/2130)] +* The CSS `~=` operator now correctly handles non-space whitespace in the `class` attribute. commit e45dedd +* The switch to turn off the CSS-to-XPath cache is now thread-local, rather than being shared mutable state. [[#1935](https://github.com/sparklemotion/nokogiri/issues/1935)] +* The Node methods `add_previous_sibling`, `previous=`, `before`, `add_next_sibling`, `next=`, `after`, `replace`, and `swap` now correctly use their parent as the context node for parsing markup. These methods now also raise a `RuntimeError` if they are called on a node with no parent. [[nokogumbo#160](https://github.com/rubys/nokogumbo/issues/160)] +* [JRuby] `XML::Schema` XSD validation errors are captured in `XML::Schema#errors`. These errors were previously ignored. +* [JRuby] Standardize reading from IO like objects, including StringIO. [[#1888](https://github.com/sparklemotion/nokogiri/issues/1888), [#1897](https://github.com/sparklemotion/nokogiri/issues/1897)] +* [JRuby] Fix how custom XPath function namespaces are inferred to be less naive. [[#1890](https://github.com/sparklemotion/nokogiri/issues/1890), [#2148](https://github.com/sparklemotion/nokogiri/issues/2148)] +* [JRuby] Clarify exception message when custom XPath functions can't be resolved. +* [JRuby] Comparison of Node to Document with `Node#<=>` now matches CRuby/libxml2 behavior. +* [CRuby] Syntax errors are now correctly captured in `Document#errors` for short HTML documents. Previously the SAX parser used for encoding detection was clobbering libxml2's global error handler. +* [CRuby] Fixed installation on AIX with respect to `vasprintf`. [[#1908](https://github.com/sparklemotion/nokogiri/issues/1908)] +* [CRuby] On some platforms, avoid symbol name collision with glibc's `canonicalize`. [[#2105](https://github.com/sparklemotion/nokogiri/issues/2105)] +* [Windows Visual C++] Fixed compiler warnings and errors. [[#2061](https://github.com/sparklemotion/nokogiri/issues/2061), [#2068](https://github.com/sparklemotion/nokogiri/issues/2068)] +* [CRuby] Fixed Nokogumbo integration which broke in the v1.11.0 release candidates. [[#1788](https://github.com/sparklemotion/nokogiri/issues/1788)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!) +* [JRuby] Fixed document encoding regression in v1.11.0 release candidates. [[#2080](https://github.com/sparklemotion/nokogiri/issues/2080), [#2083](https://github.com/sparklemotion/nokogiri/issues/2083)] (Thanks, [@thbar](https://github.com/thbar)!) + + +### Removed + +* The internal method `Nokogiri::CSS::Parser.cache_on=` has been removed. Use `.set_cache` if you need to muck with the cache internals. +* The class method `Nokogiri::CSS::Parser.parse` has been removed. This was originally deprecated in 2009 in 13db61b. Use `Nokogiri::CSS.parse` instead. + + +### Changed + +#### `XML::Schema` input is now "untrusted" by default + +Address [CVE-2020-26247](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-vr8q-g5c7-m54m). + +In Nokogiri versions <= 1.11.0.rc3, XML Schemas parsed by `Nokogiri::XML::Schema` were **trusted** by default, allowing external resources to be accessed over the network, potentially enabling XXE or SSRF attacks. + +This behavior is counter to the security policy intended by Nokogiri maintainers, which is to treat all input as **untrusted** by default whenever possible. + +Please note that this security fix was pushed into a new minor version, 1.11.x, rather than a patch release to the 1.10.x branch, because it is a breaking change for some schemas and the risk was assessed to be "Low Severity". + +More information and instructions for enabling "trusted input" behavior in v1.11.0.rc4 and later is available at the [public advisory](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-vr8q-g5c7-m54m). + + +#### HTML parser now obeys the `strict` or `norecover` parsing option + +(Also noted above in the "Fixed" section) HTML Parsing in "strict" mode (i.e., the `RECOVER` parse option not set) now correctly raises a `XML::SyntaxError` exception. Previously the value of the `RECOVER` bit was being ignored by CRuby and was misinterpreted by JRuby. + +If you're using the default parser options, you will be unaffected by this fix. If you're passing `strict` or `norecover` to your HTML parser call, you may be surprised to see that the parser now fails to recover and raises a `XML::SyntaxError` exception. Given the number of HTML documents on the internet that libxml2 would consider to be ill-formed, this is probably not what you want, and you can omit setting that parse option to restore the behavior that you have been relying upon. + +Apologies to anyone inconvenienced by this breaking bugfix being present in a minor release, but I felt it was appropriate to introduce this fix because it's straightforward to fix any code that has been relying on this buggy behavior. + + +#### `VersionInfo`, the output of `nokogiri -v`, and related constants + +This release changes the metadata provided in `Nokogiri::VersionInfo` which also affects the output of `nokogiri -v`. Some related constants have also been changed. If you're using `VersionInfo` programmatically, or relying on constants related to underlying library versions, please read the detailed changes for `Nokogiri::VersionInfo` at [#2139](https://github.com/sparklemotion/nokogiri/issues/2139) and accept our apologies for the inconvenience. + + +## 1.10.10 / 2020-07-06 + +### Features + +* [MRI] Cross-built Windows gems now support Ruby 2.7 [[#2029](https://github.com/sparklemotion/nokogiri/issues/2029)]. Note that prior to this release, the v1.11.x prereleases provided this support. + + +## 1.10.9 / 2020-03-01 + +### Fixed + +* [MRI] Raise an exception when Nokogiri detects a specific libxml2 edge case involving blank Schema nodes wrapped by Ruby objects that would cause a segfault. Currently no fix is available upstream, so we're preventing a dangerous operation and informing users to code around it if possible. [[#1985](https://github.com/sparklemotion/nokogiri/issues/1985), [#2001](https://github.com/sparklemotion/nokogiri/issues/2001)] +* [JRuby] Change `NodeSet#to_a` to return a RubyArray instead of Object, for compilation under JRuby 9.2.9 and later. [[#1968](https://github.com/sparklemotion/nokogiri/issues/1968), [#1969](https://github.com/sparklemotion/nokogiri/issues/1969)] (Thanks, [@headius](https://github.com/headius)!) + + +## 1.10.8 / 2020-02-10 + +### Security + +[MRI] Pulled in upstream patch from libxml that addresses CVE-2020-7595. Full details are available in [#1992](https://github.com/sparklemotion/nokogiri/issues/1992). Note that this patch is not yet (as of 2020-02-10) in an upstream release of libxml. + + +## 1.10.7 / 2019-12-03 + +### Fixed + +* [MRI] Ensure the patch applied in v1.10.6 works with GNU `patch`. [[#1954](https://github.com/sparklemotion/nokogiri/issues/1954)] + + +## 1.10.6 / 2019-12-03 + +### Fixed + +* [MRI] Fix FreeBSD installation of vendored libxml2. [[#1941](https://github.com/sparklemotion/nokogiri/issues/1941), [#1953](https://github.com/sparklemotion/nokogiri/issues/1953)] (Thanks, [@nurse](https://github.com/nurse)!) + + +## 1.10.5 / 2019-10-31 + +### Security + +[MRI] Vendored libxslt upgraded to v1.1.34 which addresses three CVEs for libxslt: + +* CVE-2019-13117 +* CVE-2019-13118 +* CVE-2019-18197 +* CVE-2019-19956 + +More details are available at [#1943](https://github.com/sparklemotion/nokogiri/issues/1943). + + +### Dependencies + +* [MRI] vendored libxml2 is updated from 2.9.9 to 2.9.10 +* [MRI] vendored libxslt is updated from 1.1.33 to 1.1.34 + + +## 1.10.4 / 2019-08-11 + +### Security + +Address CVE-2019-5477 ([#1915](https://github.com/sparklemotion/nokogiri/issues/1915)). + +A command injection vulnerability in Nokogiri v1.10.3 and earlier allows commands to be executed in a subprocess by Ruby's `Kernel.open` method. Processes are vulnerable only if the undocumented method `Nokogiri::CSS::Tokenizer#load_file` is being passed untrusted user input. + +This vulnerability appears in code generated by the Rexical gem versions v1.0.6 and earlier. Rexical is used by Nokogiri to generate lexical scanner code for parsing CSS queries. The underlying vulnerability was addressed in Rexical v1.0.7 and Nokogiri upgraded to this version of Rexical in Nokogiri v1.10.4. + +This CVE's public notice is [#1915](https://github.com/sparklemotion/nokogiri/issues/1915) + + ## 1.10.3 / 2019-04-22 -### Security Notes +### Security [MRI] Pulled in upstream patch from libxslt that addresses CVE-2019-11068. Full details are available in [#1892](https://github.com/sparklemotion/nokogiri/issues/1892). Note that this patch is not yet (as of 2019-04-22) in an upstream release of libxslt. @@ -11,91 +800,85 @@ ### Security -* [MRI] Remove support from vendored libxml2 for future script macros. [#1871] -* [MRI] Remove support from vendored libxml2 for server-side includes within attributes. [#1877] +* [MRI] Remove support from vendored libxml2 for future script macros. [[#1871](https://github.com/sparklemotion/nokogiri/issues/1871)] +* [MRI] Remove support from vendored libxml2 for server-side includes within attributes. [[#1877](https://github.com/sparklemotion/nokogiri/issues/1877)] -### Bug fixes +### Fixed -* [JRuby] Fix node ownership in duplicated documents. [#1060] -* [JRuby] Rethrow exceptions caught by Java SAX handler. [#1847, #1872] (Thanks, @adjam!) +* [JRuby] Fix node ownership in duplicated documents. [[#1060](https://github.com/sparklemotion/nokogiri/issues/1060)] +* [JRuby] Rethrow exceptions caught by Java SAX handler. [[#1847](https://github.com/sparklemotion/nokogiri/issues/1847), [#1872](https://github.com/sparklemotion/nokogiri/issues/1872)] (Thanks, [@adjam](https://github.com/adjam)!) ## 1.10.1 / 2019-01-13 -### Features +### Added -* [MRI] During installation, handle Xcode 10's new library path. [#1801, #1851] (Thanks, @mlj and @deepj!) -* Avoid unnecessary creation of `Proc`s in many methods. [#1776] (Thanks, @chopraanmol1!) +* [MRI] During installation, handle Xcode 10's new library path. [[#1801](https://github.com/sparklemotion/nokogiri/issues/1801), [#1851](https://github.com/sparklemotion/nokogiri/issues/1851)] (Thanks, [@mlj](https://github.com/mlj) and [@deepj](https://github.com/deepj)!) +* Avoid unnecessary creation of `Proc`s in many methods. [[#1776](https://github.com/sparklemotion/nokogiri/issues/1776)] (Thanks, [@chopraanmol1](https://github.com/chopraanmol1)!) -### Bug fixes +### Fixed -* CSS selector `:has()` now correctly matches against any descendant. Previously this selector matched against only direct children). [#350] (Thanks, @Phrogz!) +* CSS selector `:has()` now correctly matches against any descendant. Previously this selector matched against only direct children). [[#350](https://github.com/sparklemotion/nokogiri/issues/350)] (Thanks, [@Phrogz](https://github.com/Phrogz)!) * `NodeSet#attr` now returns `nil` if it's empty. Previously this raised a NoMethodError. -* [MRI] XPath errors are no longer suppressed during `XSLT::Stylesheet#transform`. Previously these errors were suppressed which led to silent failures and a subsequent segfault. [#1802] +* [MRI] XPath errors are no longer suppressed during `XSLT::Stylesheet#transform`. Previously these errors were suppressed which led to silent failures and a subsequent segfault. [[#1802](https://github.com/sparklemotion/nokogiri/issues/1802)] ## 1.10.0 / 2019-01-04 -### Features - -* [MRI] Cross-built Windows gems now support Ruby 2.6 [#1842, #1850] - - -### Backwards incompatibilities - -This release ends support for: +### Added -* Ruby 2.2, for which [official support ended on 2018-03-31](https://www.ruby-lang.org/en/news/2018/06/20/support-of-ruby-2-2-has-ended/) [#1841] -* JRuby 1.7, for which [official support ended on 2017-11-21](https://github.com/jruby/jruby/issues/4112) [#1741] +* [MRI] Cross-built Windows gems now support Ruby 2.6 [[#1842](https://github.com/sparklemotion/nokogiri/issues/1842), [#1850](https://github.com/sparklemotion/nokogiri/issues/1850)] ### Dependencies +* This release ends support for Ruby 2.2, for which [official support ended on 2018-03-31](https://www.ruby-lang.org/en/news/2018/06/20/support-of-ruby-2-2-has-ended/) [[#1841](https://github.com/sparklemotion/nokogiri/issues/1841)] +* This release ends support for JRuby 1.7, for which [official support ended on 2017-11-21](https://github.com/jruby/jruby/issues/4112) [[#1741](https://github.com/sparklemotion/nokogiri/issues/1741)] * [MRI] libxml2 is updated from 2.9.8 to 2.9.9 * [MRI] libxslt is updated from 1.1.32 to 1.1.33 ## 1.9.1 / 2018-12-17 -### Bug fixes +### Fixed -* Fix a bug introduced in v1.9.0 where `XML::DocumentFragment#dup` no longer returned an instance of the callee's class, instead always returning an `XML::DocumentFragment`. This notably broke any subclass of `XML::DocumentFragment` including `HTML::DocumentFragment` as well as the Loofah gem's `Loofah::HTML::DocumentFragment`. [#1846] +* Fix a bug introduced in v1.9.0 where `XML::DocumentFragment#dup` no longer returned an instance of the callee's class, instead always returning an `XML::DocumentFragment`. This notably broke any subclass of `XML::DocumentFragment` including `HTML::DocumentFragment` as well as the Loofah gem's `Loofah::HTML::DocumentFragment`. [[#1846](https://github.com/sparklemotion/nokogiri/issues/1846)] ## 1.9.0 / 2018-12-17 -### Security Notes +### Security -* [JRuby] Upgrade Xerces dependency from 2.11.0 to 2.12.0 to address upstream vulnerability CVE-2012-0881 [#1831] (Thanks @grajagandev for reporting.) +* [JRuby] Upgrade Xerces dependency from 2.11.0 to 2.12.0 to address upstream vulnerability CVE-2012-0881 [[#1831](https://github.com/sparklemotion/nokogiri/issues/1831)] (Thanks [@grajagandev](https://github.com/grajagandev) for reporting.) -### Notable non-functional changes +### Improved -* Decrease installation size by removing many unneeded files (e.g., `/test`) from the packaged gems. [#1719] (Thanks, @stevecrozz!) +* Decrease installation size by removing many unneeded files (e.g., `/test`) from the packaged gems. [[#1719](https://github.com/sparklemotion/nokogiri/issues/1719)] (Thanks, [@stevecrozz](https://github.com/stevecrozz)!) -### Features +### Added -* `XML::Attr#value=` allows HTML node attribute values to be set to either a blank string or an empty boolean attribute. [#1800] -* Introduce `XML::Node#wrap` which does what `XML::NodeSet#wrap` has always done, but for a single node. [#1531] (Thanks, @ethirajsrinivasan!) -* [MRI] Improve installation experience on macOS High Sierra (Darwin). [#1812, #1813] (Thanks, @gpakosz and @nurse!) -* [MRI] Node#dup supports copying a node directly to a new document. See the method documentation for details. -* [MRI] DocumentFragment#dup is now more memory-efficient, avoiding making unnecessary copies. [#1063] -* [JRuby] NodeSet has been rewritten to improve performance! [#1795] +* `XML::Attr#value=` allows HTML node attribute values to be set to either a blank string or an empty boolean attribute. [[#1800](https://github.com/sparklemotion/nokogiri/issues/1800)] +* Introduce `XML::Node#wrap` which does what `XML::NodeSet#wrap` has always done, but for a single node. [[#1531](https://github.com/sparklemotion/nokogiri/issues/1531)] (Thanks, [@ethirajsrinivasan](https://github.com/ethirajsrinivasan)!) +* [MRI] Improve installation experience on macOS High Sierra (Darwin). [[#1812](https://github.com/sparklemotion/nokogiri/issues/1812), [#1813](https://github.com/sparklemotion/nokogiri/issues/1813)] (Thanks, [@gpakosz](https://github.com/gpakosz) and [@nurse](https://github.com/nurse)!) +* [MRI] `Node#dup` supports copying a node directly to a new document. See the method documentation for details. +* [MRI] `DocumentFragment#dup` is now more memory-efficient, avoiding making unnecessary copies. [[#1063](https://github.com/sparklemotion/nokogiri/issues/1063)] +* [JRuby] `NodeSet` has been rewritten to improve performance! [[#1795](https://github.com/sparklemotion/nokogiri/issues/1795)] -### Bug fixes +### Fixed -* `NodeSet#each` now returns `self` instead of zero. [#1822] (Thanks, @olehif!) -* [MRI] Address a memory leak when using XML::Builder to create nodes with namespaces. [#1810] -* [MRI] Address a memory leak when unparenting a DTD. [#1784] (Thanks, @stevecheckoway!) -* [MRI] Use RbConfig::CONFIG instead of ::MAKEFILE_CONFIG to fix installations that use Makefile macros. [#1820] (Thanks, @nobu!) -* [JRuby] Decrease large memory usage when making nested XPath queries. [#1749] +* `NodeSet#each` now returns `self` instead of zero. [[#1822](https://github.com/sparklemotion/nokogiri/issues/1822)] (Thanks, [@olehif](https://github.com/olehif)!) +* [MRI] Address a memory leak when using `XML::Builder` to create nodes with namespaces. [[#1810](https://github.com/sparklemotion/nokogiri/issues/1810)] +* [MRI] Address a memory leak when unparenting a DTD. [[#1784](https://github.com/sparklemotion/nokogiri/issues/1784)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!) +* [MRI] Use `RbConfig::CONFIG` instead of `::MAKEFILE_CONFIG` to fix installations that use Makefile macros. [[#1820](https://github.com/sparklemotion/nokogiri/issues/1820)] (Thanks, [@nobu](https://github.com/nobu)!) +* [JRuby] Decrease large memory usage when making nested XPath queries. [[#1749](https://github.com/sparklemotion/nokogiri/issues/1749)] * [JRuby] Fix failing tests on JRuby 9.2.x -* [JRuby] Fix default namespaces in nodes reparented into a different document [#1774] -* [JRuby] Fix support for Java 9. [#1759] (Thanks, @Taywee!) +* [JRuby] Fix default namespaces in nodes reparented into a different document [[#1774](https://github.com/sparklemotion/nokogiri/issues/1774)] +* [JRuby] Fix support for Java 9. [[#1759](https://github.com/sparklemotion/nokogiri/issues/1759)] (Thanks, [@Taywee](https://github.com/Taywee)!) ### Dependencies @@ -105,27 +888,27 @@ This release ends support for: ## 1.8.5 / 2018-10-04 -### Security Notes +### Security [MRI] Pulled in upstream patches from libxml2 that address CVE-2018-14404 and CVE-2018-14567. Full details are available in [#1785](https://github.com/sparklemotion/nokogiri/issues/1785). Note that these patches are not yet (as of 2018-10-04) in an upstream release of libxml2. -### Bug fixes +### Fixed -* [MRI] Fix regression in installation when building against system libraries, where some systems would not be able to find libxml2 or libxslt when present. (Regression introduced in v1.8.3.) [#1722] -* [JRuby] Fix node reparenting when the destination doc is empty. [#1773] +* [MRI] Fix regression in installation when building against system libraries, where some systems would not be able to find libxml2 or libxslt when present. (Regression introduced in v1.8.3.) [[#1722](https://github.com/sparklemotion/nokogiri/issues/1722)] +* [JRuby] Fix node reparenting when the destination doc is empty. [[#1773](https://github.com/sparklemotion/nokogiri/issues/1773)] ## 1.8.4 / 2018-07-03 -### Bug fixes +### Fixed -* [MRI] Fix memory leak when creating nodes with namespaces. (Introduced in v1.5.7) [#1771] +* [MRI] Fix memory leak when creating nodes with namespaces. (Introduced in v1.5.7) [[#1771](https://github.com/sparklemotion/nokogiri/issues/1771)] ## 1.8.3 / 2018-06-16 -### Security Notes +### Security [MRI] Behavior in libxml2 has been reverted which caused CVE-2018-8048 (loofah gem), CVE-2018-3740 (sanitize gem), and CVE-2018-3741 (rails-html-sanitizer gem). The commit in question is here: @@ -142,34 +925,39 @@ If you're offended by what happened here, I'd kindly ask that you comment on the > https://bugzilla.gnome.org/show_bug.cgi?id=769760 +### More Security + +[MRI] Vendored libxml2 upgraded to v2.9.8 which addresses CVE-2016-9318 [[#1582](https://github.com/sparklemotion/nokogiri/issues/1582)]. + + ### Dependencies * [MRI] libxml2 is updated from 2.9.7 to 2.9.8 -### Features +### Added -* Node#classes, #add_class, #append_class, and #remove_class are added. -* NodeSet#append_class is added. -* NodeSet#remove_attribute is a new alias for NodeSet#remove_attr. -* NodeSet#each now returns an Enumerator when no block is passed (Thanks, @park53kr!) -* [JRuby] General improvements in JRuby implementation (Thanks, @kares!) +* `Node#classes`, `#add_class`, `#append_class`, and `#remove_class` are added. +* `NodeSet#append_class` is added. +* `NodeSet#remove_attribute` is a new alias for `NodeSet#remove_attr`. +* `NodeSet#each` now returns an `Enumerator` when no block is passed (Thanks, [@park53kr](https://github.com/park53kr)!) +* [JRuby] General improvements in JRuby implementation (Thanks, [@kares](https://github.com/kares)!) -### Bug fixes +### Fixed -* CSS attribute selectors now gracefully handle queries using integers. [#711] -* Handle ASCII-8BIT encoding on fragment input [#553] -* Handle non-string return values within `Reader` [#898] -* [JRuby] Allow Node#replace to insert Comment and CDATA nodes. [#1666] -* [JRuby] Stability and speed improvements to `Node`, `Sax::PushParser`, and the JRuby implementation [#1708, #1710, #1501] +* CSS attribute selectors now gracefully handle queries using integers. [[#711](https://github.com/sparklemotion/nokogiri/issues/711)] +* Handle ASCII-8BIT encoding on fragment input [[#553](https://github.com/sparklemotion/nokogiri/issues/553)] +* Handle non-string return values within `Reader` [[#898](https://github.com/sparklemotion/nokogiri/issues/898)] +* [JRuby] Allow `Node#replace` to insert Comment and CDATA nodes. [[#1666](https://github.com/sparklemotion/nokogiri/issues/1666)] +* [JRuby] Stability and speed improvements to `Node`, `Sax::PushParser`, and the JRuby implementation [[#1708](https://github.com/sparklemotion/nokogiri/issues/1708), [#1710](https://github.com/sparklemotion/nokogiri/issues/1710), [#1501](https://github.com/sparklemotion/nokogiri/issues/1501)] ## 1.8.2 / 2018-01-29 -### Security Notes +### Security -[MRI] The update of vendored libxml2 from 2.9.5 to 2.9.7 addresses at least one published vulnerability, CVE-2017-15412. [#1714 has complete details] +[MRI] The update of vendored libxml2 from 2.9.5 to 2.9.7 addresses at least one published vulnerability, CVE-2017-15412. [[#1714](https://github.com/sparklemotion/nokogiri/issues/1714) has complete details] ### Dependencies @@ -178,17 +966,17 @@ If you're offended by what happened here, I'd kindly ask that you comment on the * [MRI] libxslt is updated from 1.1.30 to 1.1.32 -### Features +### Added -* [MRI] OpenBSD installation should be a bit easier now. [#1685] (Thanks, @jeremyevans!) +* [MRI] OpenBSD installation should be a bit easier now. [[#1685](https://github.com/sparklemotion/nokogiri/issues/1685)] (Thanks, [@jeremyevans](https://github.com/jeremyevans)!) * [MRI] Cross-built Windows gems now support Ruby 2.5 -### Bug fixes +### Fixed -* Node#serialize once again returns UTF-8-encoded strings. [#1659] -* [JRuby] made SAX parsing of characters consistent with C implementation [#1676] (Thanks, @andrew-aladev!) -* [MRI] Predefined entities, when inspected, no longer cause a segfault. [#1238] +* `Node#serialize` once again returns UTF-8-encoded strings. [[#1659](https://github.com/sparklemotion/nokogiri/issues/1659)] +* [JRuby] made SAX parsing of characters consistent with C implementation [[#1676](https://github.com/sparklemotion/nokogiri/issues/1676)] (Thanks, [[@andrew](https://github.com/andrew)-aladev](https://github.com/andrew-aladev)!) +* [MRI] Predefined entities, when inspected, no longer cause a segfault. [[#1238](https://github.com/sparklemotion/nokogiri/issues/1238)] ## 1.8.1 / 2017-09-19 @@ -197,19 +985,19 @@ If you're offended by what happened here, I'd kindly ask that you comment on the * [MRI] libxml2 is updated from 2.9.4 to 2.9.5. * [MRI] libxslt is updated from 1.1.29 to 1.1.30. -* [MRI] optional dependency on the pkg-config gem has had its constraint loosened to `~> 1.1` (from `~> 1.1.7`). [#1660] +* [MRI] optional dependency on the pkg-config gem has had its constraint loosened to `~> 1.1` (from `~> 1.1.7`). [[#1660](https://github.com/sparklemotion/nokogiri/issues/1660)] * [MRI] Upgrade mini_portile2 dependency from `~> 2.2.0` to `~> 2.3.0`, which will validate checksums on the vendored libxml2 and libxslt tarballs before using them. -### Bugs +### Fixed -* NodeSet#first with an integer argument longer than the length of the NodeSet now correctly clamps the length of the returned NodeSet to the original length. [#1650] (Thanks, @Derenge!) -* [MRI] Ensure CData.new raises TypeError if the `content` argument is not implicitly convertible into a string. [#1669] +* `NodeSet#first` with an integer argument longer than the length of the `NodeSet` now correctly clamps the length of the returned `NodeSet` to the original length. [[#1650](https://github.com/sparklemotion/nokogiri/issues/1650)] (Thanks, [@Derenge](https://github.com/Derenge)!) +* [MRI] Ensure CData.new raises TypeError if the `content` argument is not implicitly convertible into a string. [[#1669](https://github.com/sparklemotion/nokogiri/issues/1669)] ## 1.8.0 / 2017-06-04 -### Backwards incompatibilities +### Dependencies This release ends support for Ruby 2.1 on Windows in the `x86-mingw32` and `x64-mingw32` platform gems (containing pre-compiled DLLs). Official support ended for Ruby 2.1 on 2017-04-01. @@ -222,86 +1010,81 @@ Please note that this deprecation note only applies to the precompiled Windows g * [Windows] Upgrade zlib from 1.2.8 to 1.2.11 (unless --use-system-libraries) * [MRI] Upgrade rake-compiler dependency from 0.9.2 to 1.0.3 * [MRI] Upgrade mini-portile2 dependency from `~> 2.1.0` to `~> 2.2.0` +* [JRuby] Removed support for `jruby --1.8` code paths. [[#1607](https://github.com/sparklemotion/nokogiri/issues/1607)] (Thanks, [@kares](https://github.com/kares)!) +* [MRI Windows] Retrieve zlib source from http://zlib.net/fossils to avoid deprecation issues going forward. See [#1632](https://github.com/sparklemotion/nokogiri/issues/1632) for details around this problem. +### Added -### Compatibility notes - -* [JRuby] Removed support for `jruby --1.8` code paths. [#1607] (Thanks, @kares!) -* [MRI Windows] Retrieve zlib source from http://zlib.net/fossils to avoid deprecation issues going forward. See #1632 for details around this problem. - - -### Features - -* NodeSet#clone is not an alias for NodeSet#dup [#1503] (Thanks, @stephankaag!) -* Allow Processing Instructions and Comments as children of a document root. [#1033] (Thanks, @windwiny!) -* [MRI] PushParser#replace_entities and #replace_entities= will control whether entities are replaced or not. [#1017] (Thanks, @spraints!) -* [MRI] SyntaxError#to_s now includes line number, column number, and log level if made available by the parser. [#1304, #1637] (Thanks, @spk and @ccarruitero!) +* `NodeSet#clone` is now an alias for `NodeSet#dup` [[#1503](https://github.com/sparklemotion/nokogiri/issues/1503)] (Thanks, [@stephankaag](https://github.com/stephankaag)!) +* Allow Processing Instructions and Comments as children of a document root. [[#1033](https://github.com/sparklemotion/nokogiri/issues/1033)] (Thanks, [@windwiny](https://github.com/windwiny)!) +* [MRI] `PushParser#replace_entities` and `#replace_entities=` will control whether entities are replaced or not. [[#1017](https://github.com/sparklemotion/nokogiri/issues/1017)] (Thanks, [@spraints](https://github.com/spraints)!) +* [MRI] `SyntaxError#to_s` now includes line number, column number, and log level if made available by the parser. [[#1304](https://github.com/sparklemotion/nokogiri/issues/1304), [#1637](https://github.com/sparklemotion/nokogiri/issues/1637)] (Thanks, [@spk](https://github.com/spk) and [@ccarruitero](https://github.com/ccarruitero)!) * [MRI] Cross-built Windows gems now support Ruby 2.4 -* [MRI] Support for frozen string literals. [#1413] -* [MRI] Support for installing Nokogiri on a machine in FIPS-enabled mode [#1544] -* [MRI] Vendored libraries are verified with SHA-256 hashes (formerly some MD5 hashes were used) [#1544] -* [JRuby] (performance) remove unnecessary synchronization of class-cache [#1563] (Thanks, @kares!) -* [JRuby] (performance) remove unnecessary cloning of objects in XPath searches [#1563] (Thanks, @kares!) -* [JRuby] (performance) more performance improvements, particularly in XPath, Reader, XmlNode, and XmlNodeSet [#1597] (Thanks, @kares!) +* [MRI] Support for frozen string literals. [[#1413](https://github.com/sparklemotion/nokogiri/issues/1413)] +* [MRI] Support for installing Nokogiri on a machine in FIPS-enabled mode [[#1544](https://github.com/sparklemotion/nokogiri/issues/1544)] +* [MRI] Vendored libraries are verified with SHA-256 hashes (formerly some MD5 hashes were used) [[#1544](https://github.com/sparklemotion/nokogiri/issues/1544)] +* [JRuby] (performance) remove unnecessary synchronization of class-cache [[#1563](https://github.com/sparklemotion/nokogiri/issues/1563)] (Thanks, [@kares](https://github.com/kares)!) +* [JRuby] (performance) remove unnecessary cloning of objects in XPath searches [[#1563](https://github.com/sparklemotion/nokogiri/issues/1563)] (Thanks, [@kares](https://github.com/kares)!) +* [JRuby] (performance) more performance improvements, particularly in XPath, Reader, XmlNode, and XmlNodeSet [[#1597](https://github.com/sparklemotion/nokogiri/issues/1597)] (Thanks, [@kares](https://github.com/kares)!) -### Bugs +### Fixed -* HTML::SAX::Parser#parse_io now correctly parses HTML and not XML [#1577] (Thanks for the test case, @gregors!) -* Support installation on systems with a `lib64` site config. [#1562] -* [MRI] on OpenBSD, do not require gcc if using system libraries [#1515] (Thanks, @jeremyevans!) -* [MRI] XML::Attr.new checks type of Document arg to prevent segfaults. [#1477] -* [MRI] Prefer xmlCharStrdup (and friends) to strdup (and friends), which can cause problems on some platforms. [#1517] (Thanks, @jeremy!) -* [JRuby] correctly append a text node before another text node [#1318] (Thanks, @jkraemer!) -* [JRuby] custom xpath functions returning an integer now work correctly [#1595] (Thanks, @kares!) -* [JRuby] serializing (`#to_html`, `#to_s`, et al) a document with explicit encoding now works correctly. [#1281, #1440] (Thanks, @kares!) -* [JRuby] XML::Reader now returns parse errors [#1586] (Thanks, @kares!) -* [JRuby] Empty NodeSets are now decorated properly. [#1319] (Thanks, @kares!) -* [JRuby] Merged nodes no longer results in Java exceptions during XPath queries. [#1320] (Thanks, @kares!) +* `HTML::SAX::Parser#parse_io` now correctly parses HTML and not XML [[#1577](https://github.com/sparklemotion/nokogiri/issues/1577)] (Thanks for the test case, [@gregors](https://github.com/gregors)!) +* Support installation on systems with a `lib64` site config. [[#1562](https://github.com/sparklemotion/nokogiri/issues/1562)] +* [MRI] on OpenBSD, do not require gcc if using system libraries [[#1515](https://github.com/sparklemotion/nokogiri/issues/1515)] (Thanks, [@jeremyevans](https://github.com/jeremyevans)!) +* [MRI] `XML::Attr.new` checks type of Document arg to prevent segfaults. [[#1477](https://github.com/sparklemotion/nokogiri/issues/1477)] +* [MRI] Prefer xmlCharStrdup (and friends) to strdup (and friends), which can cause problems on some platforms. [[#1517](https://github.com/sparklemotion/nokogiri/issues/1517)] (Thanks, [@jeremy](https://github.com/jeremy)!) +* [JRuby] correctly append a text node before another text node [[#1318](https://github.com/sparklemotion/nokogiri/issues/1318)] (Thanks, [@jkraemer](https://github.com/jkraemer)!) +* [JRuby] custom xpath functions returning an integer now work correctly [[#1595](https://github.com/sparklemotion/nokogiri/issues/1595)] (Thanks, [@kares](https://github.com/kares)!) +* [JRuby] serializing (`#to_html`, `#to_s`, et al) a document with explicit encoding now works correctly. [[#1281](https://github.com/sparklemotion/nokogiri/issues/1281), [#1440](https://github.com/sparklemotion/nokogiri/issues/1440)] (Thanks, [@kares](https://github.com/kares)!) +* [JRuby] `XML::Reader` now returns parse errors [[#1586](https://github.com/sparklemotion/nokogiri/issues/1586)] (Thanks, [@kares](https://github.com/kares)!) +* [JRuby] Empty `NodeSet`s are now decorated properly. [[#1319](https://github.com/sparklemotion/nokogiri/issues/1319)] (Thanks, [@kares](https://github.com/kares)!) +* [JRuby] Merged nodes no longer results in Java exceptions during XPath queries. [[#1320](https://github.com/sparklemotion/nokogiri/issues/1320)] (Thanks, [@kares](https://github.com/kares)!) ## 1.7.2 / 2017-05-09 -### Security Notes +### Security [MRI] Upstream libxslt patches are applied to the vendored libxslt 1.1.29 which address CVE-2017-5029 and CVE-2016-4738. For more information: -* https://github.com/sparklemotion/nokogiri/issues/1634 +* [#1634](https://github.com/sparklemotion/nokogiri/issues/1634) * http://people.canonical.com/~ubuntu-security/cve/2017/CVE-2017-5029.html * http://people.canonical.com/~ubuntu-security/cve/2016/CVE-2016-4738.html ## 1.7.1 / 2017-03-19 -### Security Notes +### Security [MRI] Upstream libxml2 patches are applied to the vendored libxml 2.9.4 which address CVE-2016-4658 and CVE-2016-5131. For more information: -* https://github.com/sparklemotion/nokogiri/issues/1615 +* [#1615](https://github.com/sparklemotion/nokogiri/issues/1615) * http://people.canonical.com/~ubuntu-security/cve/2016/CVE-2016-4658.html * http://people.canonical.com/~ubuntu-security/cve/2016/CVE-2016-5131.html ## 1.7.0.1 / 2017-01-04 -### Bugs +### Fixed -* Fix OpenBSD support. (#1569) (related to #1543) +* Fix OpenBSD support. ([#1569](https://github.com/sparklemotion/nokogiri/issues/1569)) (related to [#1543](https://github.com/sparklemotion/nokogiri/issues/1543)) ## 1.7.0 / 2016-12-26 -### Features +### Added -* Remove deprecation warnings in Ruby 2.4.0 (#1545) (Thanks, @matthewd!) -* Support egcc compiler on OpenBSD (#1543) (Thanks, @frenkel and @knu!) +* Remove deprecation warnings in Ruby 2.4.0 ([#1545](https://github.com/sparklemotion/nokogiri/issues/1545)) (Thanks, [@matthewd](https://github.com/matthewd)!) +* Support egcc compiler on OpenBSD ([#1543](https://github.com/sparklemotion/nokogiri/issues/1543)) (Thanks, [@frenkel](https://github.com/frenkel) and [@knu](https://github.com/knu)!) -### Backwards incompatibilities. +### Dependencies This release ends support for: @@ -313,11 +1096,11 @@ This release ends support for: ## 1.6.8.1 / 2016-10-03 -### Dependency License Notes +### Dependencies Removes required dependency on the `pkg-config` gem. This dependency was introduced in v1.6.8 and, because it's distributed under LGPL, was -objectionable to many Nokogiri users (#1488, #1496). +objectionable to many Nokogiri users ([#1488](https://github.com/sparklemotion/nokogiri/issues/1488), [#1496](https://github.com/sparklemotion/nokogiri/issues/1496)). This version makes `pkg-config` an optional dependency. If it's installed, it's used; but otherwise Nokogiri will attempt to work @@ -326,7 +1109,7 @@ around its absence. ## 1.6.8 / 2016-06-06 -### Security Notes +### Security [MRI] Bundled libxml2 is upgraded to 2.9.4, which fixes many security issues. Many of these had previously been patched in the vendored libxml 2.9.2 in the 1.6.7.x branch, but some are newer. @@ -347,46 +1130,46 @@ See this libxslt email post for more: * https://mail.gnome.org/archives/xslt/2016-May/msg00004.html -### Features +### Added Several changes were made to improve performance: -* [MRI] Simplify NodeSet#to_a with a minor speed-up. (#1397) -* XML::Node#ancestors optimization. (#1297) (Thanks, Bruno Sutic!) -* Use Symbol#to_proc where we weren't previously. (#1296) (Thanks, Bruno Sutic!) -* XML::DTD#each uses implicit block calls. (Thanks, @glaucocustodio!) -* Fall back to the `pkg-config` gem if we're having trouble finding the system libxml2. This should help many FreeBSD users. (#1417) -* Set document encoding appropriately even on blank document. (#1043) (Thanks, @batter!) - - -### Bug Fixes - -* [JRuby] fix slow add_child (#692) -* [JRuby] fix load errors when deploying to JRuby/Torquebox (#1114) (Thanks, @atambo and @jvshahid!) -* [JRuby] fix NPE when inspecting nodes returned by NodeSet#drop (#1042) (Thanks, @mkristian!) -* [JRuby] fix nil attriubte node's namespace in reader (#1327) (Thanks, @codekitchen!) -* [JRuby] fix Nokogiri munging unicode characters that require more than 2 bytes (#1113) (Thanks, @mkristian!) -* [JRuby] allow unlinking an unparented node (#1112, #1152) (Thanks, @esse!) -* [JRuby] allow Fragment parsing on a frozen string (#444, #1077) -* [JRuby] HTML `style` tags are no longer encoded (#1316) (Thanks, @tbeauvais!) -* [MRI] fix assertion failure while accessing attribute node's namespace in reader (#843) (Thanks, @2potatocakes!) -* [MRI] fix issue with GCing namespace nodes returned in an xpath query. (#1155) -* [MRI] Ensure C strings are null-terminated. (#1381) -* [MRI] Ensure Rubygems is loaded before using mini_portile2 at installation. (#1393, #1411) (Thanks, @JonRowe!) -* [MRI] Handling another edge case where the `libxml-ruby` gem's global callbacks were smashing the heap. (#1426). (Thanks to @bbergstrom for providing an isolated test case!) -* [MRI] Ensure encodings are passed to Sax::Parser xmldecl callback. (#844) -* [MRI] Ensure default ns prefix is applied correctly when reparenting nodes to another document. (#391) (Thanks, @ylecuyer!) -* [MRI] Ensure Reader handles non-existent attributes as expected. (#1254) (Thanks, @ccutrer!) -* [MRI] Cleanup around namespace handling when reparenting nodes. (#1332, #1333, #1444) (Thanks, @cuttrer and @bradleybeddoes!) -* unescape special characters in CSS queries (#1303) (Thanks, @twalpole!) -* consistently handle empty documents (#1349) -* Update to mini_portile2 2.1.0 to address whitespace-handling during patching. (#1402) +* [MRI] Simplify `NodeSet#to_a` with a minor speed-up. ([#1397](https://github.com/sparklemotion/nokogiri/issues/1397)) +* `XML::Node#ancestors` optimization. ([#1297](https://github.com/sparklemotion/nokogiri/issues/1297)) (Thanks, Bruno Sutic!) +* Use `Symbol#to_proc` where we weren't previously. ([#1296](https://github.com/sparklemotion/nokogiri/issues/1296)) (Thanks, Bruno Sutic!) +* `XML::DTD#each` uses implicit block calls. (Thanks, [@glaucocustodio](https://github.com/glaucocustodio)!) +* Fall back to the `pkg-config` gem if we're having trouble finding the system libxml2. This should help many FreeBSD users. ([#1417](https://github.com/sparklemotion/nokogiri/issues/1417)) +* Set document encoding appropriately even on blank document. ([#1043](https://github.com/sparklemotion/nokogiri/issues/1043)) (Thanks, [@batter](https://github.com/batter)!) + + +### Fixed + +* [JRuby] fix slow add_child ([#692](https://github.com/sparklemotion/nokogiri/issues/692)) +* [JRuby] fix load errors when deploying to JRuby/Torquebox ([#1114](https://github.com/sparklemotion/nokogiri/issues/1114)) (Thanks, [@atambo](https://github.com/atambo) and [@jvshahid](https://github.com/jvshahid)!) +* [JRuby] fix NPE when inspecting nodes returned by `NodeSet#drop` ([#1042](https://github.com/sparklemotion/nokogiri/issues/1042)) (Thanks, [@mkristian](https://github.com/mkristian)!) +* [JRuby] fix nil attriubte node's namespace in reader ([#1327](https://github.com/sparklemotion/nokogiri/issues/1327)) (Thanks, [@codekitchen](https://github.com/codekitchen)!) +* [JRuby] fix Nokogiri munging unicode characters that require more than 2 bytes ([#1113](https://github.com/sparklemotion/nokogiri/issues/1113)) (Thanks, [@mkristian](https://github.com/mkristian)!) +* [JRuby] allow unlinking an unparented node ([#1112](https://github.com/sparklemotion/nokogiri/issues/1112), [#1152](https://github.com/sparklemotion/nokogiri/issues/1152)) (Thanks, [@esse](https://github.com/esse)!) +* [JRuby] allow Fragment parsing on a frozen string ([#444](https://github.com/sparklemotion/nokogiri/issues/444), [#1077](https://github.com/sparklemotion/nokogiri/issues/1077)) +* [JRuby] HTML `style` tags are no longer encoded ([#1316](https://github.com/sparklemotion/nokogiri/issues/1316)) (Thanks, [@tbeauvais](https://github.com/tbeauvais)!) +* [MRI] fix assertion failure while accessing attribute node's namespace in reader ([#843](https://github.com/sparklemotion/nokogiri/issues/843)) (Thanks, [@2potatocakes](https://github.com/2potatocakes)!) +* [MRI] fix issue with GCing namespace nodes returned in an xpath query. ([#1155](https://github.com/sparklemotion/nokogiri/issues/1155)) +* [MRI] Ensure C strings are null-terminated. ([#1381](https://github.com/sparklemotion/nokogiri/issues/1381)) +* [MRI] Ensure Rubygems is loaded before using mini_portile2 at installation. ([#1393](https://github.com/sparklemotion/nokogiri/issues/1393), [#1411](https://github.com/sparklemotion/nokogiri/issues/1411)) (Thanks, [@JonRowe](https://github.com/JonRowe)!) +* [MRI] Handling another edge case where the `libxml-ruby` gem's global callbacks were smashing the heap. ([#1426](https://github.com/sparklemotion/nokogiri/issues/1426)). (Thanks to [@bbergstrom](https://github.com/bbergstrom) for providing an isolated test case!) +* [MRI] Ensure encodings are passed to `Sax::Parser` xmldecl callback. ([#844](https://github.com/sparklemotion/nokogiri/issues/844)) +* [MRI] Ensure default ns prefix is applied correctly when reparenting nodes to another document. ([#391](https://github.com/sparklemotion/nokogiri/issues/391)) (Thanks, [@ylecuyer](https://github.com/ylecuyer)!) +* [MRI] Ensure Reader handles non-existent attributes as expected. ([#1254](https://github.com/sparklemotion/nokogiri/issues/1254)) (Thanks, [@ccutrer](https://github.com/ccutrer)!) +* [MRI] Cleanup around namespace handling when reparenting nodes. ([#1332](https://github.com/sparklemotion/nokogiri/issues/1332), [#1333](https://github.com/sparklemotion/nokogiri/issues/1333), [#1444](https://github.com/sparklemotion/nokogiri/issues/1444)) (Thanks, [@cuttrer](https://github.com/cuttrer) and [@bradleybeddoes](https://github.com/bradleybeddoes)!) +* unescape special characters in CSS queries ([#1303](https://github.com/sparklemotion/nokogiri/issues/1303)) (Thanks, [@twalpole](https://github.com/twalpole)!) +* consistently handle empty documents ([#1349](https://github.com/sparklemotion/nokogiri/issues/1349)) +* Update to mini_portile2 2.1.0 to address whitespace-handling during patching. ([#1402](https://github.com/sparklemotion/nokogiri/issues/1402)) * Fix encoding of xml node namespaces. -* Work around issue installing Nokogiri on overlayfs (commonly used in Docker containers). (#1370, #1405) +* Work around issue installing Nokogiri on overlayfs (commonly used in Docker containers). ([#1370](https://github.com/sparklemotion/nokogiri/issues/1370), [#1405](https://github.com/sparklemotion/nokogiri/issues/1405)) -### Other Notes +### Notes * Removed legacy code remaining from Ruby 1.8.x support. * Removed legacy code remaining from REE support. @@ -398,7 +1181,7 @@ Several changes were made to improve performance: This version pulls in several upstream patches to the vendored libxml2 and libxslt to address: - CVE-2015-7499 +* CVE-2015-7499 Ubuntu classifies this as "Priority: Low", RedHat classifies this as "Impact: Moderate", and NIST classifies this as "Severity: 5.0 (MEDIUM)". @@ -409,54 +1192,56 @@ MITRE record is https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-7499 This version pulls in several upstream patches to the vendored libxml2 and libxslt to address: - CVE-2015-5312 - CVE-2015-7497 - CVE-2015-7498 - CVE-2015-7499 - CVE-2015-7500 - CVE-2015-8241 - CVE-2015-8242 - CVE-2015-8317 +* CVE-2015-5312 +* CVE-2015-7497 +* CVE-2015-7498 +* CVE-2015-7499 +* CVE-2015-7500 +* CVE-2015-8241 +* CVE-2015-8242 +* CVE-2015-8317 See also http://www.ubuntu.com/usn/usn-2834-1/ ## 1.6.7 / 2015-11-29 -### Notes +### Added This version supports native builds on Windows using the RubyInstaller DevKit. It also supports Ruby 2.2.x on Windows, as well as making several other improvements to the installation process on various platforms. +### Security + This version also includes the security patches already applied in v1.6.6.3 and v1.6.6.4 to the vendored libxml2 and libxslt source. -See #1374 and #1376 for details. +See [#1374](https://github.com/sparklemotion/nokogiri/issues/1374) and [#1376](https://github.com/sparklemotion/nokogiri/issues/1376) for details. -### Features +### Added -* Cross-built gems now have a proper ruby version requirement. (#1266) +* Cross-built gems now have a proper ruby version requirement. ([#1266](https://github.com/sparklemotion/nokogiri/issues/1266)) * Ruby 2.2.x is supported on Windows. * Native build is supported on Windows. -* [MRI] libxml2 and libxslt `config.guess` files brought up to date. (#1326) (Thanks, @hernan-erasmo!) -* [JRuby] fix error in validating files with jruby (#1355, #1361) (Thanks, @twalpole!) -* [MRI, OSX] Patch to handle nonstandard location of `iconv.h`. (#1206, #1210, #1218, #1345) (Thanks, @neonichu!) +* [MRI] libxml2 and libxslt `config.guess` files brought up to date. ([#1326](https://github.com/sparklemotion/nokogiri/issues/1326)) (Thanks, [[@hernan](https://github.com/hernan)-erasmo](https://github.com/hernan-erasmo)!) +* [JRuby] fix error in validating files with jruby ([#1355](https://github.com/sparklemotion/nokogiri/issues/1355), [#1361](https://github.com/sparklemotion/nokogiri/issues/1361)) (Thanks, [@twalpole](https://github.com/twalpole)!) +* [MRI, OSX] Patch to handle nonstandard location of `iconv.h`. ([#1206](https://github.com/sparklemotion/nokogiri/issues/1206), [#1210](https://github.com/sparklemotion/nokogiri/issues/1210), [#1218](https://github.com/sparklemotion/nokogiri/issues/1218), [#1345](https://github.com/sparklemotion/nokogiri/issues/1345)) (Thanks, [@neonichu](https://github.com/neonichu)!) -### Bug Fixes +### Fixed -* [JRuby] reset the namespace cache when replacing the document's innerHtml (#1265) (Thanks, @mkristian!) -* [JRuby] Document#parse should support IO objects that respond to #read. (#1124) (Thanks, Jake Byman!) -* [MRI] Duplicate-id errors when setting the `id` attribute on HTML documents are now silenced. (#1262) -* [JRuby] SAX parser cuts texts in pieces when square brackets exist. (#1261) -* [JRuby] Namespaced attributes aren't removed by remove_attribute. (#1299) +* [JRuby] reset the namespace cache when replacing the document's innerHtml ([#1265](https://github.com/sparklemotion/nokogiri/issues/1265)) (Thanks, [@mkristian](https://github.com/mkristian)!) +* [JRuby] `Document#parse` should support IO objects that respond to `#read`. ([#1124](https://github.com/sparklemotion/nokogiri/issues/1124)) (Thanks, Jake Byman!) +* [MRI] Duplicate-id errors when setting the `id` attribute on HTML documents are now silenced. ([#1262](https://github.com/sparklemotion/nokogiri/issues/1262)) +* [JRuby] SAX parser cuts texts in pieces when square brackets exist. ([#1261](https://github.com/sparklemotion/nokogiri/issues/1261)) +* [JRuby] Namespaced attributes aren't removed by remove_attribute. ([#1299](https://github.com/sparklemotion/nokogiri/issues/1299)) ## 1.6.6.4 / 2015-11-19 This version pulls in an upstream patch to the vendored libxml2 to address: -* unclosed comment uninitialized access issue (#1376) +* unclosed comment uninitialized access issue ([#1376](https://github.com/sparklemotion/nokogiri/issues/1376)) This issue was assigned CVE-2015-8710 after the fact. See http://seclists.org/oss-sec/2015/q4/616 for details. @@ -473,14 +1258,14 @@ This version pulls in several upstream patches to the vendored libxml2 and libxs * CVE-2015-8035 * CVE-2015-7995 -See #1374 for details. +See [#1374](https://github.com/sparklemotion/nokogiri/issues/1374) for details. ## 1.6.6.2 / 2015-01-23 -### Bug fixes +### Fixed -* Fixed installation issue affecting compiler arguments. (#1230) +* Fixed installation issue affecting compiler arguments. ([#1230](https://github.com/sparklemotion/nokogiri/issues/1230)) ## 1.6.6.1 / 2015-01-22 @@ -488,102 +1273,102 @@ See #1374 for details. Note that 1.6.6.0 was not released. -### Features +### Added -* Unified Node and NodeSet implementations of #search, #xpath and #css. -* Added Node#lang and Node#lang=. -* bin/nokogiri passes the URI to parse() if an HTTP URL is given. -* bin/nokogiri now loads ~/.nokogirirc so user can define helper methods, etc. -* bin/nokogiri can be configured to use Pry instead of IRB by adding a couple of lines to ~/.nokogirirc. (#1198) -* bin/nokogiri can better handle urls from STDIN (aiding use of xargs). (#1065) +* Unified `Node` and `NodeSet` implementations of `#search`, `#xpath` and `#css`. +* Added `Node#lang` and `Node#lang=`. +* `bin/nokogiri` passes the URI to `parse()` if an HTTP URL is given. +* `bin/nokogiri` now loads `~/.nokogirirc` so user can define helper methods, etc. +* `bin/nokogiri` can be configured to use Pry instead of IRB by adding a couple of lines to ~/.nokogirirc. ([#1198](https://github.com/sparklemotion/nokogiri/issues/1198)) +* `bin/nokogiri` can better handle urls from STDIN (aiding use of xargs). ([#1065](https://github.com/sparklemotion/nokogiri/issues/1065)) * JRuby 9K support. -### Bug fixes +### Fixed -* DocumentFragment#search now matches against root nodes. (#1205) -* (MRI) More fixes related to handling libxml2 parse errors during DocumentFragment#dup. (#1196) -* (JRuby) Builder now handles namespace hrefs properly when there is a default ns. (#1039) -* (JRuby) Clear the XPath cache on attr removal. (#1109) -* `XML::Comment.new` argument types are now consistent and safe (and documented) across MRI and JRuby. (#1224) -* (MRI) Restoring support for Ruby 1.9.2 that was broken in v1.6.4.1 and v1.6.5. (#1207) -* Check if `zlib` is available before building `libxml2`. (#1188) -* (JRuby) HtmlSaxPushParser now exists. (#1147) (Thanks, Piotr Szmielew!) +* `DocumentFragment#search` now matches against root nodes. ([#1205](https://github.com/sparklemotion/nokogiri/issues/1205)) +* (MRI) More fixes related to handling libxml2 parse errors during `DocumentFragment#dup`. ([#1196](https://github.com/sparklemotion/nokogiri/issues/1196)) +* (JRuby) Builder now handles namespace hrefs properly when there is a default ns. ([#1039](https://github.com/sparklemotion/nokogiri/issues/1039)) +* (JRuby) Clear the XPath cache on attr removal. ([#1109](https://github.com/sparklemotion/nokogiri/issues/1109)) +* `XML::Comment.new` argument types are now consistent and safe (and documented) across MRI and JRuby. ([#1224](https://github.com/sparklemotion/nokogiri/issues/1224)) +* (MRI) Restoring support for Ruby 1.9.2 that was broken in v1.6.4.1 and v1.6.5. ([#1207](https://github.com/sparklemotion/nokogiri/issues/1207)) +* Check if `zlib` is available before building `libxml2`. ([#1188](https://github.com/sparklemotion/nokogiri/issues/1188)) +* (JRuby) HtmlSaxPushParser now exists. ([#1147](https://github.com/sparklemotion/nokogiri/issues/1147)) (Thanks, Piotr Szmielew!) ## 1.6.5 / 2014-11-26 -### Features +### Added -* Implement Slop#respond_to_missing?. (#1176) +* Implement `Slop#respond_to_missing?`. ([#1176](https://github.com/sparklemotion/nokogiri/issues/1176)) * Optimized the XPath query generated by an `an+b` CSS query. -### Bug fixes +### Fixed -* Capture non-parse errors from Document#dup in Document#errors. (#1196) -* (JRuby) Document#canonicalize parameters are now consistent with MRI. (#1189) +* Capture non-parse errors from `Document#dup` in `Document#errors`. ([#1196](https://github.com/sparklemotion/nokogiri/issues/1196)) +* (JRuby) `Document#canonicalize` parameters are now consistent with MRI. ([#1189](https://github.com/sparklemotion/nokogiri/issues/1189)) ## 1.6.4.1 / 2014-11-05 -### Bug fixes +### Fixed -* (MRI) Fix a bug where CFLAGS passed in are dropped. (#1188) -* Fix a bug where CSS selector :nth(n) did not work. (#1187) +* (MRI) Fix a bug where CFLAGS passed in are dropped. ([#1188](https://github.com/sparklemotion/nokogiri/issues/1188)) +* Fix a bug where CSS selector :nth(n) did not work. ([#1187](https://github.com/sparklemotion/nokogiri/issues/1187)) ## 1.6.4 / 2014-11-04 -### Features +### Added * (MRI) Bundled Libxml2 is upgraded to 2.9.2. * (MRI) `nokogiri --version` will include a list of applied patches. * (MRI) Nokogiri no longer prints messages directly to TTY while building the extension. -* (MRI) Detect and help user fix a missing /usr/include/iconv.h on OS X. (#1111) +* (MRI) Detect and help user fix a missing /usr/include/iconv.h on OS X. ([#1111](https://github.com/sparklemotion/nokogiri/issues/1111)) * (MRI) Improve the iconv detection for building libxml2. -### Bug fixes +### Fixed -* (MRI) Fix DocumentFragment#element_children (#1138). -* Fix a bug with CSS attribute selector without any prefix where "foo [bar]" was treated as "foo[bar]". (#1174) +* (MRI) Fix `DocumentFragment#element_children` ([#1138](https://github.com/sparklemotion/nokogiri/issues/1138)). +* Fix a bug with CSS attribute selector without any prefix where "foo [bar]" was treated as "foo[bar]". ([#1174](https://github.com/sparklemotion/nokogiri/issues/1174)) ## 1.6.3.1 / 2014-07-21 -### Bug fixes +### Fixed -* Addressing an Apple Macintosh installation problem for GCC users. #1130 (Thanks, @zenspider!) +* Addressing an Apple Macintosh installation problem for GCC users. [#1130](https://github.com/sparklemotion/nokogiri/issues/1130) (Thanks, [@zenspider](https://github.com/zenspider)!) ## 1.6.3 / 2014-07-20 -### Features +### Added -* Added Node#document? and Node#processing_instruction? +* Added `Node#document?` and `Node#processing_instruction?` -### Bug fixes +### Fixed -* [JRuby] Fix Ruby memory exhaustion vulnerability. #1087 (Thanks, @ocher) -* [MRI] Fix segfault during GC when using `libxml-ruby` and `nokogiri` together in multi-threaded environment. #895 (Thanks, @ender672!) -* Building on OSX 10.9 stock ruby 2.0.0 now works. #1101 (Thanks, @zenspider!) -* Node#parse now works again for HTML document nodes (broken in 1.6.2+). -* Processing instructions can now be added via Node#add_next_sibling. +* [JRuby] Fix Ruby memory exhaustion vulnerability. [#1087](https://github.com/sparklemotion/nokogiri/issues/1087) (Thanks, [@ocher](https://github.com/ocher)) +* [MRI] Fix segfault during GC when using `libxml-ruby` and `nokogiri` together in multi-threaded environment. [#895](https://github.com/sparklemotion/nokogiri/issues/895) (Thanks, [@ender672](https://github.com/ender672)!) +* Building on OSX 10.9 stock ruby 2.0.0 now works. [#1101](https://github.com/sparklemotion/nokogiri/issues/1101) (Thanks, [@zenspider](https://github.com/zenspider)!) +* `Node#parse` now works again for HTML document nodes (broken in 1.6.2+). +* Processing instructions can now be added via `Node#add_next_sibling`. ## 1.6.2.1 / 2014-05-13 -### Bug fixes +### Fixed -* Fix statically-linked libxml2 installation when using universal builds of Ruby. #1104 -* Patching `mini_portile` to address the git dependency detailed in #1102. -* Library load fix to address segfault reported on some systems. #1097 +* Fix statically-linked libxml2 installation when using universal builds of Ruby. [#1104](https://github.com/sparklemotion/nokogiri/issues/1104) +* Patching `mini_portile` to address the git dependency detailed in [#1102](https://github.com/sparklemotion/nokogiri/issues/1102). +* Library load fix to address segfault reported on some systems. [#1097](https://github.com/sparklemotion/nokogiri/issues/1097) ## 1.6.2 / 2014-05-12 -### Security Note +### Security A set of security and bugfix patches have been backported from the libxml2 and libxslt repositories onto the version of 2.8.0 packaged with Nokogiri, including these notable security fixes: @@ -593,55 +1378,55 @@ A set of security and bugfix patches have been backported from the libxml2 and l It is recommended that you upgrade from 1.6.x to this version as soon as possible. -### Compatibility Note +### Dependencies Now requires libxml >= 2.6.21 (was previously >= 2.6.17). -### Features +### Added -* Add cross building of fat binary gems for 64-Bit Windows (x64-mingw32) and add support for native builds on Windows. #864, #989, #1072 +* Add cross building of fat binary gems for 64-Bit Windows (x64-mingw32) and add support for native builds on Windows. [#864](https://github.com/sparklemotion/nokogiri/issues/864), [#989](https://github.com/sparklemotion/nokogiri/issues/989), [#1072](https://github.com/sparklemotion/nokogiri/issues/1072) * (MRI) Alias CP932 to Windows-31J if iconv does not support Windows-31J. -* (MRI) Nokogiri now links packaged libraries statically. To disable static linking, pass --disable-static to extconf.rb. #923 -* (MRI) Fix a library path (LIBPATH) precedence problem caused by CRuby bug #9760. -* (MRI) Nokogiri automatically deletes directories of packaged libraries only used during build. To keep them for debugging purposes, pass --disable-clean to extconf.rb. #952 +* (MRI) Nokogiri now links packaged libraries statically. To disable static linking, pass --disable-static to `extconf.rb`. [#923](https://github.com/sparklemotion/nokogiri/issues/923) +* (MRI) Fix a library path (LIBPATH) precedence problem caused by CRuby bug [#9760](https://github.com/sparklemotion/nokogiri/issues/9760). +* (MRI) Nokogiri automatically deletes directories of packaged libraries only used during build. To keep them for debugging purposes, pass --disable-clean to `extconf.rb`. [#952](https://github.com/sparklemotion/nokogiri/issues/952) * (MRI) Nokogiri now builds libxml2 properly with iconv support on platforms where libiconv is installed outside the system default directories, such as FreeBSD. -* Add support for an-b in nth selectors. #886 (Thanks, Magnus Bergmark!) -* Add support for bare and multiple :not() functions in selectors. #887 (Thanks, Magnus Bergmark!) -* (MRI) Add an extconf.rb option --use-system-libraries, alternative to setting the environment variable NOKOGIRI_USE_SYSTEM_LIBRARIES. +* Add support for an-b in nth selectors. [#886](https://github.com/sparklemotion/nokogiri/issues/886) (Thanks, Magnus Bergmark!) +* Add support for bare and multiple `:not()` functions in selectors. [#887](https://github.com/sparklemotion/nokogiri/issues/887) (Thanks, Magnus Bergmark!) +* (MRI) Add an `extconf.rb` option --use-system-libraries, alternative to setting the environment variable NOKOGIRI_USE_SYSTEM_LIBRARIES. * (MRI) Update packaged libraries: libxslt to 1.1.28, zlib to 1.2.8, and libiconv to 1.14, respectively. -* Nokogiri::HTML::Document#title= and #meta_encoding= now always add an element if not present, trying hard to find the best place to put it. -* Nokogiri::XML::DTD#html_dtd? and #html5_dtd? are added. -* Nokogiri::XML::Node#prepend_child is added. #664 -* Nokogiri::XML::SAX::ParserContext#recovery is added. #453 -* Fix documentation for XML::Node#namespace. #803 #802 (Thanks, Hoylen Sue) -* Allow Nokogiri::XML::Node#parse from unparented non-element nodes. #407 - -### Bugfixes - -* Ensure :only-child pseudo class works within :not pseudo class. #858 (Thanks, Yamagishi Kazutoshi!) -* Don't call pkg_config when using bundled libraries in extconf.rb #931 (Thanks, Shota Fukumori!) -* Nokogiri.parse() does not mistake a non-HTML document like a RSS document as HTML document. #932 (Thanks, Yamagishi Kazutoshi!) -* (MRI) Perform a node type check before adding a child node to another. Previously adding a text node to another as a child could cause a SEGV. #1092 -* (JRuby) XSD validation crashes in Java version. #373 -* (JRuby) Document already has a root node error while using Builder. #646 -* (JRuby) c14n tests are all passing on JRuby. #226 -* Parsing empty documents raise SyntaxError in strict mode. #1005 -* (JRuby) Make xpath faster by caching the xpath context. #741 -* (JRuby) XML SAX push parser leaks memory on JRuby, but not on MRI. #998 -* (JRuby) Inconsistent behavior aliasing the default namespace. #940 -* (JRuby) Inconsistent behavior between parsing and adding namespaces. #943 -* (JRuby) Xpath returns inconsistent result set on cloned document with namespaces and attributes. #1034 -* (JRuby) Java-Implementation forgets element namespaces #902 -* (JRuby) JRuby-Nokogiri does not recognise attributes inside namespaces #1081 -* (JRuby) JRuby-Nokogiri has different comment node name #1080 -* (JRuby) JAXPExtensionsProvider / Java 7 / Secure Processing #1070 +* `Nokogiri::HTML::Document#title=` and `#meta_encoding`= now always add an element if not present, trying hard to find the best place to put it. +* `Nokogiri::XML::DTD#html_dtd?` and `#html5_dtd?` are added. +* `Nokogiri::XML::Node#prepend_child` is added. [#664](https://github.com/sparklemotion/nokogiri/issues/664) +* `Nokogiri::XML::SAX::ParserContext#recovery` is added. [#453](https://github.com/sparklemotion/nokogiri/issues/453) +* Fix documentation for `XML::Node#namespace`. [#803](https://github.com/sparklemotion/nokogiri/issues/803) [#802](https://github.com/sparklemotion/nokogiri/issues/802) (Thanks, Hoylen Sue) +* Allow `Nokogiri::XML::Node#parse` from unparented non-element nodes. [#407](https://github.com/sparklemotion/nokogiri/issues/407) + +### Fixed + +* Ensure :only-child pseudo class works within :not pseudo class. [#858](https://github.com/sparklemotion/nokogiri/issues/858) (Thanks, Yamagishi Kazutoshi!) +* Don't call pkg_config when using bundled libraries in `extconf.rb` [#931](https://github.com/sparklemotion/nokogiri/issues/931) (Thanks, Shota Fukumori!) +* `Nokogiri.parse()` does not mistake a non-HTML document like a RSS document as HTML document. [#932](https://github.com/sparklemotion/nokogiri/issues/932) (Thanks, Yamagishi Kazutoshi!) +* (MRI) Perform a node type check before adding a child node to another. Previously adding a text node to another as a child could cause a SEGV. [#1092](https://github.com/sparklemotion/nokogiri/issues/1092) +* (JRuby) XSD validation crashes in Java version. [#373](https://github.com/sparklemotion/nokogiri/issues/373) +* (JRuby) Document already has a root node error while using Builder. [#646](https://github.com/sparklemotion/nokogiri/issues/646) +* (JRuby) c14n tests are all passing on JRuby. [#226](https://github.com/sparklemotion/nokogiri/issues/226) +* Parsing empty documents raise `SyntaxError` in strict mode. [#1005](https://github.com/sparklemotion/nokogiri/issues/1005) +* (JRuby) Make xpath faster by caching the xpath context. [#741](https://github.com/sparklemotion/nokogiri/issues/741) +* (JRuby) XML SAX push parser leaks memory on JRuby, but not on MRI. [#998](https://github.com/sparklemotion/nokogiri/issues/998) +* (JRuby) Inconsistent behavior aliasing the default namespace. [#940](https://github.com/sparklemotion/nokogiri/issues/940) +* (JRuby) Inconsistent behavior between parsing and adding namespaces. [#943](https://github.com/sparklemotion/nokogiri/issues/943) +* (JRuby) Xpath returns inconsistent result set on cloned document with namespaces and attributes. [#1034](https://github.com/sparklemotion/nokogiri/issues/1034) +* (JRuby) Java-Implementation forgets element namespaces [#902](https://github.com/sparklemotion/nokogiri/issues/902) +* (JRuby) JRuby-Nokogiri does not recognise attributes inside namespaces [#1081](https://github.com/sparklemotion/nokogiri/issues/1081) +* (JRuby) JRuby-Nokogiri has different comment node name [#1080](https://github.com/sparklemotion/nokogiri/issues/1080) +* (JRuby) JAXPExtensionsProvider / Java 7 / Secure Processing [#1070](https://github.com/sparklemotion/nokogiri/issues/1070) ## 1.6.1 / 2013-12-14 -* Bugfixes +### Fixed - * (JRuby) Fix out of memory bug when certain invalid documents are parsed. - * (JRuby) Fix regression of billion-laughs vulnerability. #586 +* (JRuby) Fix out of memory bug when certain invalid documents are parsed. +* (JRuby) Fix regression of billion-laughs vulnerability. [#586](https://github.com/sparklemotion/nokogiri/issues/586) ## 1.6.0 / 2013-06-08 @@ -649,9 +1434,9 @@ Now requires libxml >= 2.6.21 (was previously >= 2.6.17). This release was based on v1.5.10 and 1.6.0.rc1, and contains changes mentioned in both. -* Deprecations +### Deprecations - * Remove pre 1.9 monitoring from Travis. +* Remove pre 1.9 monitoring from Travis. ## 1.6.0.rc1 / 2013-04-14 @@ -659,815 +1444,756 @@ mentioned in both. This release was based on v1.5.9, and so does not contain any fixes mentioned in the notes for v1.5.10. -* Notes +### Notes - * mini_portile is now a runtime dependency - * Ruby 1.9.2 and higher now required +* mini_portile is now a runtime dependency +* Ruby 1.9.2 and higher now required -* Features +### Added - * (MRI) Source code for libxml 2.8.0 and libxslt 1.2.26 is packaged - with the gem. These libraries are compiled at gem install time - unless the environment variable NOKOGIRI_USE_SYSTEM_LIBRARIES is - set. VERSION_INFO (also `nokogiri -v`) exposes whether libxml was - compiled from packaged source, or the system library was used. - * (Windows) libxml upgraded to 2.8.0 +* (MRI) Source code for libxml 2.8.0 and libxslt 1.2.26 is packaged with the gem. These libraries are compiled at gem install time unless the environment variable NOKOGIRI_USE_SYSTEM_LIBRARIES is set. VERSION_INFO (also `nokogiri -v`) exposes whether libxml was compiled from packaged source, or the system library was used. +* (Windows) libxml upgraded to 2.8.0 -* Deprecations +### Dependencies - * Support for Ruby 1.8.7 and prior has been dropped +* Support for Ruby 1.8.7 and prior has been dropped ## 1.5.11 / 2013-12-14 -* Bugfixes +### Fixed - * (JRuby) Fix out of memory bug when certain invalid documents are parsed. - * (JRuby) Fix regression of billion-laughs vulnerability. #586 +* (JRuby) Fix out of memory bug when certain invalid documents are parsed. +* (JRuby) Fix regression of billion-laughs vulnerability. [#586](https://github.com/sparklemotion/nokogiri/issues/586) ## 1.5.10 / 2013-06-07 -* Bugfixes +### Fixed - * (JRuby) Fix "null document" error when parsing an empty IO in jruby 1.7.3. #883 - * (JRuby) Fix schema validation when XSD has DOCTYPE set to DTD. #912 (Thanks, Patrick Cheng!) - * (MRI) Fix segfault when there is no default subelement for an HTML node. #917 +* (JRuby) Fix "null document" error when parsing an empty IO in jruby 1.7.3. [#883](https://github.com/sparklemotion/nokogiri/issues/883) +* (JRuby) Fix schema validation when XSD has DOCTYPE set to DTD. [#912](https://github.com/sparklemotion/nokogiri/issues/912) (Thanks, Patrick Cheng!) +* (MRI) Fix segfault when there is no default subelement for an HTML node. [#917](https://github.com/sparklemotion/nokogiri/issues/917) -* Notes +### Notes - * Use rb_ary_entry instead of RARRAY_PTR (you know, for Rubinius). #877 (Thanks, Dirkjan Bussink!) - * Fix TypeError when running tests. #900 (Thanks, Cédric Boutillier!) +* Use rb_ary_entry instead of RARRAY_PTR (you know, for Rubinius). [#877](https://github.com/sparklemotion/nokogiri/issues/877) (Thanks, Dirkjan Bussink!) +* Fix TypeError when running tests. [#900](https://github.com/sparklemotion/nokogiri/issues/900) (Thanks, Cédric Boutillier!) ## 1.5.9 / 2013-03-21 -* Bugfixes +### Fixed - * Ensure that prefixed attributes are properly namespaced when reparented. #869 - * Fix for inconsistent namespaced attribute access for SVG nested in HTML. #861 - * (MRI) Fixed a memory leak in fragment parsing if nodes are not all subsequently reparented. #856 +* Ensure that prefixed attributes are properly namespaced when reparented. [#869](https://github.com/sparklemotion/nokogiri/issues/869) +* Fix for inconsistent namespaced attribute access for SVG nested in HTML. [#861](https://github.com/sparklemotion/nokogiri/issues/861) +* (MRI) Fixed a memory leak in fragment parsing if nodes are not all subsequently reparented. [#856](https://github.com/sparklemotion/nokogiri/issues/856) ## 1.5.8 / 2013-03-19 -* Bugfixes +### Fixed - * (JRuby) Fix EmptyStackException thrown by elements with xlink:href attributes and no base_uri #534, #805. (Thanks, Patrick Quinn and Brian Hoffman!) - * Fixes duplicate attributes issue introduced in 1.5.7. #865 - * Allow use of a prefixed namespace on a root node using Nokogiri::XML::Builder #868 +* (JRuby) Fix EmptyStackException thrown by elements with xlink:href attributes and no base_uri [#534](https://github.com/sparklemotion/nokogiri/issues/534), [#805](https://github.com/sparklemotion/nokogiri/issues/805). (Thanks, Patrick Quinn and Brian Hoffman!) +* Fixes duplicate attributes issue introduced in 1.5.7. [#865](https://github.com/sparklemotion/nokogiri/issues/865) +* Allow use of a prefixed namespace on a root node using `Nokogiri::XML::Builder` [#868](https://github.com/sparklemotion/nokogiri/issues/868) ## 1.5.7 / 2013-03-18 -* Features +### Added - * Windows support for Ruby 2.0. +* Windows support for Ruby 2.0. -* Bugfixes +### Fixed - * SAX::Parser.parse_io throw an error when used with lower case encoding. #828 - * (JRuby) Java Nokogiri is finally green (passes all tests) under 1.8 and 1.9 mode. High five everyone. #798, #705 - * (JRuby) Nokogiri::XML::Reader broken (as a pull parser) on jruby - reads the whole XML document. #831 - * (JRuby) JRuby hangs parsing "&". #837 - * (JRuby) JRuby NPE parsing an invalid XML instruction. #838 - * (JRuby) Node#content= incompatibility. #839 - * (JRuby) to_xhtml doesn't print the last slash for self-closing tags in JRuby. #834 - * (JRuby) Adding an EntityReference after a Text node mangles the entity in JRuby. #835 - * (JRuby) JRuby version inconsistency: nil for empty attributes. #818 - * CSS queries for classes (e.g., ".foo") now treat all whitespace identically. #854 - * Namespace behavior cleaned up and made consistent between JRuby and MRI. #846, #801 (Thanks, Michael Klein!) - * (MRI) SAX parser handles empty processing instructions. #845 +* `SAX::Parser.parse_io` throw an error when used with lower case encoding. [#828](https://github.com/sparklemotion/nokogiri/issues/828) +* (JRuby) Java Nokogiri is finally green (passes all tests) under 1.8 and 1.9 mode. High five everyone. [#798](https://github.com/sparklemotion/nokogiri/issues/798), [#705](https://github.com/sparklemotion/nokogiri/issues/705) +* (JRuby) `Nokogiri::XML::Reader` broken (as a pull parser) on jruby - reads the whole XML document. [#831](https://github.com/sparklemotion/nokogiri/issues/831) +* (JRuby) JRuby hangs parsing "&". [#837](https://github.com/sparklemotion/nokogiri/issues/837) +* (JRuby) JRuby NPE parsing an invalid XML instruction. [#838](https://github.com/sparklemotion/nokogiri/issues/838) +* (JRuby) `Node#content=` incompatibility. [#839](https://github.com/sparklemotion/nokogiri/issues/839) +* (JRuby) to_xhtml doesn't print the last slash for self-closing tags in JRuby. [#834](https://github.com/sparklemotion/nokogiri/issues/834) +* (JRuby) Adding an `EntityReference` after a Text node mangles the entity in JRuby. [#835](https://github.com/sparklemotion/nokogiri/issues/835) +* (JRuby) JRuby version inconsistency: nil for empty attributes. [#818](https://github.com/sparklemotion/nokogiri/issues/818) +* CSS queries for classes (e.g., ".foo") now treat all whitespace identically. [#854](https://github.com/sparklemotion/nokogiri/issues/854) +* Namespace behavior cleaned up and made consistent between JRuby and MRI. [#846](https://github.com/sparklemotion/nokogiri/issues/846), [#801](https://github.com/sparklemotion/nokogiri/issues/801) (Thanks, Michael Klein!) +* (MRI) SAX parser handles empty processing instructions. [#845](https://github.com/sparklemotion/nokogiri/issues/845) ## 1.5.6 / 2012-12-19 -* Features - - * Improved performance of XML::Document#collect_namespaces. #761 (Thanks, Juergen Mangler!) - * New callback SAX::Document#processing_instruction (Thanks, Kitaiti Makoto!) - * Node#native_content= allows setting unescaped node contant. #768 - * XPath lookup with namespaces supports symbol keys. #729 (Thanks, Ben Langfeld.) - * XML::Node#[]= stringifies values. #729 (Thanks, Ben Langfeld.) - * bin/nokogiri will process a document from $stdin - * bin/nokogiri -e will execute a program from the command line - * (JRuby) bin/nokogiri --version will print the Xerces and NekoHTML versions. - - -* Bugfixes - - * Nokogiri now detects XSLT transform errors. #731 (Thanks, Justin Fitzsimmons!) - * Don't throw an Error when trying to replace top-level text node in DocumentFragment. #775 - * Raise an ArgumentError if an invalid encoding is passed to the SAX parser. #756 (Thanks, Bradley Schaefer!) - * Prefixed element inconsistency between CRuby and JRuby. #712 - * (JRuby) space prior to xml preamble causes nokogiri to fail parsing. (fixed along with #748) #790 - * (JRuby) Fixed the bug Nokogiri::XML::Node#content inconsistency between Java and C. #794, #797 - * (JRuby) raises INVALID_CHARACTER_ERR exception when EntityReference name starts with '#'. #719 - * (JRuby) doesn't coerce namespaces out of strings on a direct subclass of Node. #715 - * (JRuby) Node#content now renders newlines properly. #737 (Thanks, Piotr Szmielew!) - * (JRuby) Unknown namespace are ignore when the recover option is used. #748 - * (JRuby) XPath queries for namespaces should not throw exceptions when called twice in a row. #764 - * (JRuby) More consistent (with libxml2) whitespace formatting when emitting XML. #771 - * (JRuby) namespaced attributes broken when appending raw xml to builder. #770 - * (JRuby) Nokogiri::XML::Document#wrap raises undefined method `length' for nil:NilClass when trying to << to a node. #781 - * (JRuby) Fixed "bad file descriptor" bug when closing open file descriptors. #495 - * (JRuby) JRuby/CRuby incompatibility for attribute decorators. #785 - * (JRuby) Issues parsing valid XML with no internal subset in the DTD. #547, #811 - * (JRuby) Issues parsing valid node content when it contains colons. #728 - * (JRuby) Correctly parse the doc type of html documents. #733 - * (JRuby) Include dtd in the xml output when a builder is used with create_internal_subset. #751 - * (JRuby) builder requires textwrappers for valid utf8 in jruby, not in mri. #784 +### Added + +* Improved performance of `XML::Document#collect_namespaces`. [#761](https://github.com/sparklemotion/nokogiri/issues/761) (Thanks, Juergen Mangler!) +* New callback `SAX::Document#processing_instruction` (Thanks, Kitaiti Makoto!) +* `Node#native_content=` allows setting unescaped node content. [#768](https://github.com/sparklemotion/nokogiri/issues/768) +* XPath lookup with namespaces supports symbol keys. [#729](https://github.com/sparklemotion/nokogiri/issues/729) (Thanks, Ben Langfeld.) +* `XML::Node#[]=` stringifies values. [#729](https://github.com/sparklemotion/nokogiri/issues/729) (Thanks, Ben Langfeld.) +* `bin/nokogiri` will process a document from $stdin +* `bin/nokogiri -e` will execute a program from the command line +* (JRuby) `bin/nokogiri --version` will print the Xerces and NekoHTML versions. + + +### Fixed + +* Nokogiri now detects XSLT transform errors. [#731](https://github.com/sparklemotion/nokogiri/issues/731) (Thanks, Justin Fitzsimmons!) +* Don't throw an Error when trying to replace top-level text node in DocumentFragment. [#775](https://github.com/sparklemotion/nokogiri/issues/775) +* Raise an ArgumentError if an invalid encoding is passed to the SAX parser. [#756](https://github.com/sparklemotion/nokogiri/issues/756) (Thanks, Bradley Schaefer!) +* Prefixed element inconsistency between CRuby and JRuby. [#712](https://github.com/sparklemotion/nokogiri/issues/712) +* (JRuby) space prior to xml preamble causes nokogiri to fail parsing. (fixed along with [#748](https://github.com/sparklemotion/nokogiri/issues/748)) [#790](https://github.com/sparklemotion/nokogiri/issues/790) +* (JRuby) Fixed the bug `Nokogiri::XML::Node#content` inconsistency between Java and C. [#794](https://github.com/sparklemotion/nokogiri/issues/794), [#797](https://github.com/sparklemotion/nokogiri/issues/797) +* (JRuby) raises INVALID_CHARACTER_ERR exception when EntityReference name starts with '#'. [#719](https://github.com/sparklemotion/nokogiri/issues/719) +* (JRuby) doesn't coerce namespaces out of strings on a direct subclass of Node. [#715](https://github.com/sparklemotion/nokogiri/issues/715) +* (JRuby) `Node#content` now renders newlines properly. [#737](https://github.com/sparklemotion/nokogiri/issues/737) (Thanks, Piotr Szmielew!) +* (JRuby) Unknown namespace are ignore when the recover option is used. [#748](https://github.com/sparklemotion/nokogiri/issues/748) +* (JRuby) XPath queries for namespaces should not throw exceptions when called twice in a row. [#764](https://github.com/sparklemotion/nokogiri/issues/764) +* (JRuby) More consistent (with libxml2) whitespace formatting when emitting XML. [#771](https://github.com/sparklemotion/nokogiri/issues/771) +* (JRuby) namespaced attributes broken when appending raw xml to builder. [#770](https://github.com/sparklemotion/nokogiri/issues/770) +* (JRuby) `Nokogiri::XML::Document#wrap` raises undefined method `length' for nil:NilClass when trying to << to a node. [#781](https://github.com/sparklemotion/nokogiri/issues/781) +* (JRuby) Fixed "bad file descriptor" bug when closing open file descriptors. [#495](https://github.com/sparklemotion/nokogiri/issues/495) +* (JRuby) JRuby/CRuby incompatibility for attribute decorators. [#785](https://github.com/sparklemotion/nokogiri/issues/785) +* (JRuby) Issues parsing valid XML with no internal subset in the DTD. [#547](https://github.com/sparklemotion/nokogiri/issues/547), [#811](https://github.com/sparklemotion/nokogiri/issues/811) +* (JRuby) Issues parsing valid node content when it contains colons. [#728](https://github.com/sparklemotion/nokogiri/issues/728) +* (JRuby) Correctly parse the doc type of html documents. [#733](https://github.com/sparklemotion/nokogiri/issues/733) +* (JRuby) Include dtd in the xml output when a builder is used with create_internal_subset. [#751](https://github.com/sparklemotion/nokogiri/issues/751) +* (JRuby) builder requires textwrappers for valid utf8 in jruby, not in mri. [#784](https://github.com/sparklemotion/nokogiri/issues/784) ## 1.5.5 / 2012-06-24 -* Features +### Added - * Much-improved support for JRuby in 1.9 mode! Yay! +* Much-improved support for JRuby in 1.9 mode! Yay! -* Bugfixes +### Fixed - * Regression in JRuby Nokogiri add_previous_sibling (1.5.0 -> 1.5.1) #691 (Thanks, John Shahid!) - * JRuby unable to create HTML doc if URL arg provided #674 (Thanks, John Shahid!) - * JRuby raises NullPointerException when given HTML document is nil or empty string. #699 - * JRuby 1.9 error, uncaught throw 'encoding_found', has been fixed. #673 - * Invalid encoding returned in JRuby with US-ASCII. #583 - * XmlSaxPushParser raises IndexOutOfBoundsException when over 512 characters are given. #567, #615 - * When xpath evaluation returns empty NodeSet, decorating NodeSet's base document raises exception. #514 - * JRuby raises exception when xpath with namespace is specified. pull request #681 (Thanks, Piotr Szmielew) - * JRuby renders nodes without their namespace when subclassing Node. #695 - * JRuby raises NAMESPACE_ERR (org.w3c.dom.DOMException) while instantiating RDF::RDFXML::Writer. #683 - * JRuby is not able to use namespaces in xpath. #493 - * JRuby's Entity resolving should be consistent with C-Nokogiri #704, #647, #703 +* Regression in JRuby Nokogiri add_previous_sibling (1.5.0 -> 1.5.1) [#691](https://github.com/sparklemotion/nokogiri/issues/691) (Thanks, John Shahid!) +* JRuby unable to create HTML doc if URL arg provided [#674](https://github.com/sparklemotion/nokogiri/issues/674) (Thanks, John Shahid!) +* JRuby raises NullPointerException when given HTML document is nil or empty string. [#699](https://github.com/sparklemotion/nokogiri/issues/699) +* JRuby 1.9 error, uncaught throw 'encoding_found', has been fixed. [#673](https://github.com/sparklemotion/nokogiri/issues/673) +* Invalid encoding returned in JRuby with US-ASCII. [#583](https://github.com/sparklemotion/nokogiri/issues/583) +* XmlSaxPushParser raises IndexOutOfBoundsException when over 512 characters are given. [#567](https://github.com/sparklemotion/nokogiri/issues/567), [#615](https://github.com/sparklemotion/nokogiri/issues/615) +* When xpath evaluation returns empty `NodeSet`, decorating `NodeSet`'s base document raises exception. [#514](https://github.com/sparklemotion/nokogiri/issues/514) +* JRuby raises exception when xpath with namespace is specified. pull request [#681](https://github.com/sparklemotion/nokogiri/issues/681) (Thanks, Piotr Szmielew) +* JRuby renders nodes without their namespace when subclassing Node. [#695](https://github.com/sparklemotion/nokogiri/issues/695) +* JRuby raises NAMESPACE_ERR (org.w3c.dom.DOMException) while instantiating `RDF::RDFXML::Writer`. [#683](https://github.com/sparklemotion/nokogiri/issues/683) +* JRuby is not able to use namespaces in xpath. [#493](https://github.com/sparklemotion/nokogiri/issues/493) +* JRuby's Entity resolving should be consistent with C-Nokogiri [#704](https://github.com/sparklemotion/nokogiri/issues/704), [#647](https://github.com/sparklemotion/nokogiri/issues/647), [#703](https://github.com/sparklemotion/nokogiri/issues/703) ## 1.5.4 / 2012-06-12 -* Features +### Added - * The "nokogiri" script now has more verbose output when passed the `--rng` option. #675 (Thanks, Dan Radez!) - * Build support on hardened Debian systems that use `-Werror=format-security`. #680. - * Better build support for systems with pkg-config. #584 - * Better build support for systems with multiple iconv installations. +* The "nokogiri" script now has more verbose output when passed the `--rng` option. [#675](https://github.com/sparklemotion/nokogiri/issues/675) (Thanks, Dan Radez!) +* Build support on hardened Debian systems that use `-Werror=format-security`. [#680](https://github.com/sparklemotion/nokogiri/issues/680). +* Better build support for systems with pkg-config. [#584](https://github.com/sparklemotion/nokogiri/issues/584) +* Better build support for systems with multiple iconv installations. -* Bugfixes +### Fixed - * Segmentation fault when creating a comment node for a DocumentFragment. #677, #678. - * Treat '.' as xpath in at() and search(). #690 +* Segmentation fault when creating a comment node for a DocumentFragment. [#677](https://github.com/sparklemotion/nokogiri/issues/677), [#678](https://github.com/sparklemotion/nokogiri/issues/678). +* Treat '.' as xpath in `at()` and `search()`. [#690](https://github.com/sparklemotion/nokogiri/issues/690) - * (MRI, Security) Default parse options for XML documents were - changed to not make network connections during document parsing, - to avoid XXE vulnerability. #693 +### Security + +(MRI) Default parse options for XML documents were changed to not make network connections during document parsing, to avoid XXE vulnerability. [#693](https://github.com/sparklemotion/nokogiri/issues/693) - To re-enable this behavior, the configuration method `nononet` may - be called, like this: +To re-enable this behavior, the configuration method `nononet` may be called, like this: - Nokogiri::XML::Document.parse(xml) { |config| config.nononet } +``` ruby +Nokogiri::XML::Document.parse(xml) { |config| config.nononet } +``` - Insert your own joke about double-negatives here. +Insert your own joke about double-negatives here. ## 1.5.3 / 2012-06-01 -* Features - - * Support for "prefixless" CSS selectors ~, > and + like jQuery - supports. #621, #623. (Thanks, David Lee!) - * Attempting to improve installation on homebrew 0.9 (with regards - to iconv). Isn't package management convenient? - -* Bugfixes - - * Custom xpath functions with empty nodeset arguments cause a - segfault. #634. - * Nokogiri::XML::Node#css now works for XML documents with default - namespaces when the rule contains attribute selector without - namespace. - * Fixed marshalling bugs around how arguments are passed to (and - returned from) XSLT custom xpath functions. #640. - * Nokogiri::XML::Reader#outer_xml is broken in JRuby #617 - * Nokogiri::XML::Attribute on JRuby returns a nil namespace #647 - * Nokogiri::XML::Node#namespace= cannot set a namespace without a - prefix on JRuby #648 - * (JRuby) 1.9 mode causes dead lock while running rake #571 - * HTML::Document#meta_encoding does not raise exception on docs with - malformed content-type. #655 - * Fixing segfault related to unsupported encodings in in-context - parsing on 1.8.7. #643 - * (JRuby) Concurrency issue in XPath parsing. #682 +### Added + +* Support for "prefixless" CSS selectors ~, > and + like jQuery supports. [#621](https://github.com/sparklemotion/nokogiri/issues/621), [#623](https://github.com/sparklemotion/nokogiri/issues/623). (Thanks, David Lee!) +* Attempting to improve installation on homebrew 0.9 (with regards to iconv). Isn't package management convenient? + +### Fixed + +* Custom xpath functions with empty nodeset arguments cause a segfault. [#634](https://github.com/sparklemotion/nokogiri/issues/634). +* `Nokogiri::XML::Node#css` now works for XML documents with default namespaces when the rule contains attribute selector without namespace. +* Fixed marshalling bugs around how arguments are passed to (and returned from) XSLT custom xpath functions. [#640](https://github.com/sparklemotion/nokogiri/issues/640). +* `Nokogiri::XML::Reader#outer_xml` is broken in JRuby [#617](https://github.com/sparklemotion/nokogiri/issues/617) +* `Nokogiri::XML::Attribute` on JRuby returns a nil namespace [#647](https://github.com/sparklemotion/nokogiri/issues/647) +* `Nokogiri::XML::Node#namespace=` cannot set a namespace without a prefix on JRuby [#648](https://github.com/sparklemotion/nokogiri/issues/648) +* (JRuby) 1.9 mode causes dead lock while running rake [#571](https://github.com/sparklemotion/nokogiri/issues/571) +* `HTML::Document#meta_encoding` does not raise exception on docs with malformed content-type. [#655](https://github.com/sparklemotion/nokogiri/issues/655) +* Fixing segfault related to unsupported encodings in in-context parsing on 1.8.7. [#643](https://github.com/sparklemotion/nokogiri/issues/643) +* (JRuby) Concurrency issue in XPath parsing. [#682](https://github.com/sparklemotion/nokogiri/issues/682) ## 1.5.2 / 2012-03-09 -Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. #631, #632. +Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631](https://github.com/sparklemotion/nokogiri/issues/631), [#632](https://github.com/sparklemotion/nokogiri/issues/632). ## 1.5.1 / 2012-03-09 -* Features - - * XML::Builder#comment allows creation of comment nodes. - * CSS searches now support namespaced attributes. #593 - * Java integration feature is added. Now, XML::Document.wrap - and XML::Document#to_java methods are available. - * RelaxNG validator support in the `nokogiri` cli utility. #591 (thanks, Dan Radez!) - -* Bugfixes - - * Fix many memory leaks and segfault opportunities. Thanks, Tim Elliott! - * extconf searches homebrew paths if homebrew is installed. - * Inconsistent behavior of Nokogiri 1.5.0 Java #620 - * Inheriting from Nokogiri::XML::Node on JRuby (1.6.4/5) fails #560 - * XML::Attr nodes are not allowed to be added as node children, so an - exception is raised. #558 - * No longer defensively "pickle" adjacent text nodes on - Node#add_next_sibling and Node#add_previous_sibling calls. #595. - * Java version inconsistency: it returns nil for empty attributes #589 - * to_xhtml incorrectly generates

when tag is empty #557 - * Document#add_child now accepts a Node, NodeSet, DocumentFragment, - or String. #546. - * Document#create_element now recognizes namespaces containing - non-word characters (like "SOAP-ENV"). This is mostly relevant to - users of Builder, which calls Document#create_element for nearly - everything. #531. - * File encoding broken in 1.5.0 / jruby / windows #529 - * Java version does not return namespace defs as attrs for ::HTML #542 - * Bad file descriptor with Nokogiri 1.5.0 #495 - * remove_namespace! doesn't work in pure java version #492 - * The Nokogiri Java native build throws a null pointer exception - when ActiveSupport's .blank? method is called directly on a parsed - object. #489 - * 1.5.0 Not using correct character encoding #488 - * Raw XML string in XML Builder broken on JRuby #486 - * Nokogiri 1.5.0 XML generation broken on JRuby #484 - * Do not allow multiple root nodes. #550 - * Fixes for custom XPath functions. #605, #606 (thanks, Juan Wajnerman!) - * Node#to_xml does not override :save_with if it is provided. #505 - * Node#set is a private method (JRuby). #564 (thanks, Nick Sieger!) - * C14n cleanup and Node#canonicalize (thanks, Ivan Pirlik!) #563 +### Added + +* `XML::Builder#comment` allows creation of comment nodes. +* CSS searches now support namespaced attributes. [#593](https://github.com/sparklemotion/nokogiri/issues/593) +* Java integration feature is added. Now, `XML::Document.wrap` and `XML::Document#to_java` methods are available. +* RelaxNG validator support in the `nokogiri` cli utility. [#591](https://github.com/sparklemotion/nokogiri/issues/591) (thanks, Dan Radez!) + +### Fixed + +* Fix many memory leaks and segfault opportunities. Thanks, Tim Elliott! +* extconf searches homebrew paths if homebrew is installed. +* Inconsistent behavior of Nokogiri 1.5.0 Java [#620](https://github.com/sparklemotion/nokogiri/issues/620) +* Inheriting from `Nokogiri::XML::Node` on JRuby (1.6.4/5) fails [#560](https://github.com/sparklemotion/nokogiri/issues/560) +* `XML::Attr` nodes are not allowed to be added as node children, so an exception is raised. [#558](https://github.com/sparklemotion/nokogiri/issues/558) +* No longer defensively "pickle" adjacent text nodes on `Node#add_next_sibling` and `Node#add_previous_sibling` calls. [#595](https://github.com/sparklemotion/nokogiri/issues/595). +* Java version inconsistency: it returns nil for empty attributes [#589](https://github.com/sparklemotion/nokogiri/issues/589) +* to_xhtml incorrectly generates `

` when tag is empty [#557](https://github.com/sparklemotion/nokogiri/issues/557) +* `Document#add_child` now accepts a `Node`, `NodeSet`, `DocumentFragment`, or `String`. [#546](https://github.com/sparklemotion/nokogiri/issues/546). +* `Document#create_element` now recognizes namespaces containing non-word characters (like "SOAP-ENV"). This is mostly relevant to users of Builder, which calls `Document#create_element` for nearly everything. [#531](https://github.com/sparklemotion/nokogiri/issues/531). +* File encoding broken in 1.5.0 / jruby / windows [#529](https://github.com/sparklemotion/nokogiri/issues/529) +* Java version does not return namespace defs as attrs for `::HTML` [#542](https://github.com/sparklemotion/nokogiri/issues/542) +* Bad file descriptor with Nokogiri 1.5.0 [#495](https://github.com/sparklemotion/nokogiri/issues/495) +* remove_namespace! doesn't work in pure java version [#492](https://github.com/sparklemotion/nokogiri/issues/492) +* The Nokogiri Java native build throws a null pointer exception when ActiveSupport's .blank? method is called directly on a parsed object. [#489](https://github.com/sparklemotion/nokogiri/issues/489) +* 1.5.0 Not using correct character encoding [#488](https://github.com/sparklemotion/nokogiri/issues/488) +* Raw XML string in XML Builder broken on JRuby [#486](https://github.com/sparklemotion/nokogiri/issues/486) +* Nokogiri 1.5.0 XML generation broken on JRuby [#484](https://github.com/sparklemotion/nokogiri/issues/484) +* Do not allow multiple root nodes. [#550](https://github.com/sparklemotion/nokogiri/issues/550) +* Fixes for custom XPath functions. [#605](https://github.com/sparklemotion/nokogiri/issues/605), [#606](https://github.com/sparklemotion/nokogiri/issues/606) (thanks, Juan Wajnerman!) +* `Node#to_xml` does not override `:save_with` if it is provided. [#505](https://github.com/sparklemotion/nokogiri/issues/505) +* `Node#set` is a private method (JRuby). [#564](https://github.com/sparklemotion/nokogiri/issues/564) (thanks, Nick Sieger!) +* C14n cleanup and `Node#canonicalize` (thanks, Ivan Pirlik!) [#563](https://github.com/sparklemotion/nokogiri/issues/563) ## 1.5.0 / 2011-07-01 -* Notes +### Notes - * See changelog from 1.4.7 +* See changelog from 1.4.7 -* Features +### Added - * extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor) +* extracted sets of `Node::SaveOptions` into `Node::SaveOptions::DEFAULT_{X,H,XH}TML` (refactor) -* Bugfixes +### Fixed - * default output of XML on JRuby is no longer formatted due to - inconsistent whitespace handling. #415 - * (JRuby) making empty NodeSets with null `nodes` member safe to operate on. #443 - * Fix a bug in advanced encoding detection that leads to partially - duplicated document when parsing an HTML file with unknown - encoding. - * Add support for . +* default output of XML on JRuby is no longer formatted due to inconsistent whitespace handling. [#415](https://github.com/sparklemotion/nokogiri/issues/415) +* (JRuby) making empty `NodeSet`s with null `nodes` member safe to operate on. [#443](https://github.com/sparklemotion/nokogiri/issues/443) +* Fix a bug in advanced encoding detection that leads to partially duplicated document when parsing an HTML file with unknown encoding. +* Add support for ``. -## 1.5.0 beta3 / 2010/12/02 +## 1.5.0 beta3 / 2010-12-02 -* Notes +### Notes - * JRuby performance tuning - * See changelog from 1.4.4 +* JRuby performance tuning +* See changelog from 1.4.4 -* Bugfixes +### Fixed - * Node#inner_text no longer returns nil. (JRuby) #264 +* `Node#inner_text` no longer returns nil. (JRuby) [#264](https://github.com/sparklemotion/nokogiri/issues/264) -## 1.5.0 beta2 / 2010/07/30 +## 1.5.0 beta2 / 2010-07-30 -* Notes +### Notes + +* See changelog from 1.4.3 - * See changelog from 1.4.3 +## 1.5.0 beta1 / 2010-05-22 + +### Notes -## 1.5.0 beta1 / 2010/05/22 +* JRuby support is provided by a new pure-java backend. -* Notes +### Dependencies - * JRuby support is provided by a new pure-java backend. +* Ruby 1.8.6 is deprecated. Nokogiri will install, but official support is ended. +* LibXML 2.6.16 and earlier are deprecated. Nokogiri will refuse to install. -* Deprecations +### Removed - * Ruby 1.8.6 is deprecated. Nokogiri will install, but official support is ended. - * LibXML 2.6.16 and earlier are deprecated. Nokogiri will refuse to install. - * FFI support is removed. +* FFI support is removed. ## 1.4.7 / 2011-07-01 -* Bugfixes +### Fixed - * Fix a bug in advanced encoding detection that leads to partially - duplicated document when parsing an HTML file with unknown - encoding. Thanks, Timothy Elliott (@ender672)! #478 +* Fix a bug in advanced encoding detection that leads to partially duplicated document when parsing an HTML file with unknown encoding. Thanks, Timothy Elliott ([@ender672](https://github.com/ender672))! [#478](https://github.com/sparklemotion/nokogiri/issues/478) ## 1.4.6 / 2011-06-19 -* Notes +### Notes - * This version is functionally identical to 1.4.5. - * Ruby 1.8.6 support has been restored. +* This version is functionally identical to 1.4.5. +* Ruby 1.8.6 support has been restored. ## 1.4.5 / 2011-05-19 -* New Features +### Added - * Nokogiri::HTML::Document#title accessor gets and sets the document title. - * extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor) - * Raise an exception if a string is passed to Nokogiri::XML::Schema#validate. #406 +* `Nokogiri::HTML::Document#title` accessor gets and sets the document title. +* extracted sets of `Node::SaveOptions` into `Node::SaveOptions::DEFAULT_{X,H,XH}TML` (refactor) +* Raise an exception if a string is passed to `Nokogiri::XML::Schema#validate`. [#406](https://github.com/sparklemotion/nokogiri/issues/406) -* Bugfixes +### Fixed - * Node#serialize-and-friends now accepts a SaveOption object as the, erm, save object. - * Nokogiri::CSS::Parser has-a Nokogiri::CSS::Tokenizer - * (JRUBY+FFI only) Weak references are now threadsafe. #355 - * Make direct start_element() callback (currently used for - HTML::SAX::Parser) pass attributes in assoc array, just as - emulated start_element() callback does. rel. #356 - * HTML::SAX::Parser should call back a block given to parse*() if any, just as XML::SAX::Parser does. - * Add further encoding detection to HTML parser that libxml2 does not do. - * Document#remove_namespaces! now handles attributes with namespaces. #396 - * XSLT::Stylesheet#transform no longer segfaults when handed a non-XML::Document. #452 - * XML::Reader no longer segfaults when under GC pressure. #439 +* `Node#serialize`-and-friends now accepts a `SaveOption` object as the, erm, save object. +* `Nokogiri::CSS::Parser` has-a `Nokogiri::CSS::Tokenizer` +* (JRUBY+FFI only) Weak references are now threadsafe. [#355](https://github.com/sparklemotion/nokogiri/issues/355) +* Make direct `start_element()` callback (currently used for `HTML::SAX::Parser`) pass attributes in assoc array, just as emulated `start_element()` callback does. rel. [#356](https://github.com/sparklemotion/nokogiri/issues/356) +* `HTML::SAX::Parser` should call back a block given to `parse*()` if any, just as `XML::SAX::Parser` does. +* Add further encoding detection to HTML parser that libxml2 does not do. +* `Document#remove_namespaces!` now handles attributes with namespaces. [#396](https://github.com/sparklemotion/nokogiri/issues/396) +* `XSLT::Stylesheet#transform` no longer segfaults when handed a non-`XML::Document`. [#452](https://github.com/sparklemotion/nokogiri/issues/452) +* `XML::Reader` no longer segfaults when under GC pressure. [#439](https://github.com/sparklemotion/nokogiri/issues/439) ## 1.4.4 / 2010-11-15 -* New Features - - * XML::Node#children= sets the node's inner html (much like #inner_html=), but returns the reparent node(s). - * XSLT supports function extensions. #336 - * XPath bind parameter substitution. #329 - * XML::Reader node type constants. #369 - * SAX Parser context provides line and column information - -* Bugfixes - - * XML::DTD#attributes returns an empty hash instead of nil when there are no attributes. - * XML::DTD#{keys,each} now work as expected. #324 - * {XML,HTML}::DocumentFragment.{new,parse} no longer strip leading and trailing whitespace. #319 - * XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace} return a NodeSet when passed a string. - * Unclosed tags parsed more robustly in fragments. #315 - * XML::Node#{replace,add_previous_sibling,add_next_sibling} edge cases fixed related to libxml's text node merging. #308 - * Fixed a segfault when GC occurs during xpath handler argument marshalling. #345 - * Added hack to Slop decorator to work with previously defined methods. #330 - * Fix a memory leak when duplicating child nodes. #353 - * Fixed off-by-one bug with nth-last-{child,of-type} CSS selectors when NOT using an+b notation. #354 - * Fixed passing of non-namespace attributes to SAX::Document#start_element. #356 - * Workaround for libxml2 in-context parsing bug. #362 - * Fixed NodeSet#wrap on nodes within a fragment. #331 - - -## 1.4.3 / 2010/07/28 - -* New Features - - * XML::Reader#empty_element? returns true for empty elements. #262 - * Node#remove_namespaces! now removes namespace *declarations* as well. #294 - * NodeSet#at_xpath, NodeSet#at_css and NodeSet#> do what the corresponding - methods of Node do. - -* Bugfixes - - * XML::NodeSet#{include?,delete,push} accept an XML::Namespace - * XML::Document#parse added for parsing in the context of a document - * XML::DocumentFragment#inner_html= works with contextual parsing! #298, #281 - * lib/nokogiri/css/parser.y Combined CSS functions + pseudo selectors fixed - * Reparenting text nodes is safe, even when the operation frees adjacent merged nodes. #283 - * Fixed libxml2 versionitis issue with xmlFirstElementChild et al. #303 - * XML::Attr#add_namespace now works as expected. #252 - * HTML::DocumentFragment uses the string's encoding. #305 - * Fix the CSS3 selector translation rule for the general sibling combinator - (a.k.a. preceding selector) that incorrectly converted "E ~ F G" to - "//F//G[preceding-sibling::E]". - - -## 1.4.2 / 2010/05/22 - -* New Features - - * XML::Node#parse will parse XML or HTML fragments with respect to the - context node. - * XML::Node#namespaces returns all namespaces defined in the node and all - ancestor nodes - (previously did not return ancestors' namespace definitions). - * Added Enumerable to XML::Node - * Nokogiri::XML::Schema#validate now uses xmlSchemaValidateFile if a - filename is passed, which is faster and more memory-efficient. GH #219 - * XML::Document#create_entity will create new EntityDecl objects. GH #174 - * JRuby FFI implementation no longer uses ObjectSpace._id2ref, - instead using Charles Nutter's rocking Weakling gem. - * Nokogiri::XML::Node#first_element_child fetch the first child node that is - an ELEMENT node. - * Nokogiri::XML::Node#last_element_child fetch the last child node that is - an ELEMENT node. - * Nokogiri::XML::Node#elements fetch all children nodes that are ELEMENT - nodes. - * Nokogiri::XML::Node#add_child, #add_previous_sibling, #before, - #add_next_sibling, #after, #inner_html, #swap and #replace all now - accept a Node, DocumentFragment, NodeSet, or a string containing - markup. - * Node#fragment? indicates whether a node is a DocumentFragment. - -* Bugfixes - - * XML::NodeSet is now always decorated (if the document has decorators). - GH #198 - * XML::NodeSet#slice gracefully handles offset+length larger than the set - length. GH #200 - * XML::Node#content= safely unlinks previous content. GH #203 - * XML::Node#namespace= takes nil as a parameter - * XML::Node#xpath returns things other than NodeSet objects. GH #208 - * XSLT::StyleSheet#transform accepts hashes for parameters. GH #223 - * Psuedo selectors inside not() work. GH #205 - * XML::Builder doesn't break when nodes are unlinked. - Thanks to vihai! GH #228 - * Encoding can be forced on the SAX parser. Thanks Eugene Pimenov! GH #204 - * XML::DocumentFragment uses XML::Node#parse to determine children. - * Fixed a memory leak in xml reader. Thanks sdor! GH #244 - * Node#replace returns the new child node as claimed in the - RDoc. Previously returned +self+. - -* Notes - - * The Windows gems now bundle DLLs for libxml 2.7.6 and libxslt - 1.1.26. Prior to this release, libxml 2.7.3 and libxslt 1.1.24 - were bundled. - - -## 1.4.1 / 2009/12/10 - -* New Features - - * Added Nokogiri::LIBXML_ICONV_ENABLED - * Alias Node#[] to Node#attr - * XML::Node#next_element added - * XML::Node#> added for searching a nodes immediate children - * XML::NodeSet#reverse added - * Added fragment support to Node#add_child, Node#add_next_sibling, - Node#add_previous_sibling, and Node#replace. - * XML::Node#previous_element implemented - * Rubinius support - * Ths CSS selector engine now supports :has() - * XML::NodeSet#filter() was added - * XML::Node.next= and .previous= are aliases for add_next_sibling and add_previous_sibling. GH #183 - -* Bugfixes - - * XML fragments with namespaces do not raise an exception (regression in 1.4.0) - * Node#matches? works in nodes contained by a DocumentFragment. GH #158 - * Document should not define add_namespace() method. GH #169 - * XPath queries returning namespace declarations do not segfault. - * Node#replace works with nodes from different documents. GH #162 - * Adding XML::Document#collect_namespaces - * Fixed bugs in the SOAP4R adapter - * Fixed bug in XML::Node#next_element for certain edge cases - * Fixed load path issue with JRuby under Windows. GH #160. - * XSLT#apply_to will honor the "output method". Thanks richardlehane! - * Fragments containing leading text nodes with newlines now parse properly. GH #178. - - -## 1.4.0 / 2009/10/30 - -* Happy Birthday! - -* New Features - - * Node#at_xpath returns the first element of the NodeSet matching the XPath - expression. - * Node#at_css returns the first element of the NodeSet matching the CSS - selector. - * NodeSet#| for unions GH #119 (Thanks Serabe!) - * NodeSet#inspect makes prettier output - * Node#inspect implemented for more rubyish document inspecting - * Added XML::DTD#external_id - * Added XML::DTD#system_id - * Added XML::ElementContent for DTD Element content validity - * Better namespace declaration support in Nokogiri::XML::Builder - * Added XML::Node#external_subset - * Added XML::Node#create_external_subset - * Added XML::Node#create_internal_subset - * XML Builder can append raw strings (GH #141, patch from dudleyf) - * XML::SAX::ParserContext added - * XML::Document#remove_namespaces! for the namespace-impaired - -* Bugfixes - - * returns nil when HTML documents do not declare a meta encoding tag. GH #115 - * Uses RbConfig::CONFIG['host_os'] to adjust ENV['PATH'] GH #113 - * NodeSet#search is more efficient GH #119 (Thanks Serabe!) - * NodeSet#xpath handles custom xpath functions - * Fixing a SEGV when XML::Reader gets attributes for current node - * Node#inner_html takes the same arguments as Node#to_html GH #117 - * DocumentFragment#css delegates to it's child nodes GH #123 - * NodeSet#[] works with slices larger than NodeSet#length GH #131 - * Reparented nodes maintain their namespace. GH #134 - * Fixed SEGV when adding an XML::Document to NodeSet - * XML::SyntaxError can be duplicated. GH #148 - -* Deprecations - - * Hpricot compatibility layer removed - - -## 1.3.3 / 2009/07/26 - -* New Features - - * NodeSet#children returns all children of all nodes - -* Bugfixes - - * Override libxml-ruby's global error handler - * ParseOption#strict fixed - * Fixed a segfault when sending an empty string to Node#inner_html= GH #88 - * String encoding is now set to UTF-8 in Ruby 1.9 - * Fixed a segfault when moving root nodes between documents. GH #91 - * Fixed an O(n) penalty on node creation. GH #101 - * Allowing XML documents to be output as HTML documents +### Added + +* `XML::Node#children=` sets the node's inner html (much like #inner_html=), but returns the reparent node(s). +* XSLT supports function extensions. [#336](https://github.com/sparklemotion/nokogiri/issues/336) +* XPath bind parameter substitution. [#329](https://github.com/sparklemotion/nokogiri/issues/329) +* `XML::Reader` node type constants. [#369](https://github.com/sparklemotion/nokogiri/issues/369) +* SAX Parser context provides line and column information + +### Fixed + +* `XML::DTD#attributes` returns an empty hash instead of nil when there are no attributes. +* `XML::DTD#{keys,each}` now work as expected. [#324](https://github.com/sparklemotion/nokogiri/issues/324) +* `{XML,HTML}::DocumentFragment.{new,parse}` no longer strip leading and trailing whitespace. [#319](https://github.com/sparklemotion/nokogiri/issues/319) +* `XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace}` return a `NodeSet` when passed a string. +* Unclosed tags parsed more robustly in fragments. [#315](https://github.com/sparklemotion/nokogiri/issues/315) +* `XML::Node#{replace,add_previous_sibling,add_next_sibling}` edge cases fixed related to libxml's text node merging. [#308](https://github.com/sparklemotion/nokogiri/issues/308) +* Fixed a segfault when GC occurs during xpath handler argument marshalling. [#345](https://github.com/sparklemotion/nokogiri/issues/345) +* Added hack to `Slop` decorator to work with previously defined methods. [#330](https://github.com/sparklemotion/nokogiri/issues/330) +* Fix a memory leak when duplicating child nodes. [#353](https://github.com/sparklemotion/nokogiri/issues/353) +* Fixed off-by-one bug with `nth-last-{child,of-type}` CSS selectors when NOT using `an+b` notation. [#354](https://github.com/sparklemotion/nokogiri/issues/354) +* Fixed passing of non-namespace attributes to `SAX::Document#start_element`. [#356](https://github.com/sparklemotion/nokogiri/issues/356) +* Workaround for libxml2 in-context parsing bug. [#362](https://github.com/sparklemotion/nokogiri/issues/362) +* Fixed `NodeSet#wrap` on nodes within a fragment. [#331](https://github.com/sparklemotion/nokogiri/issues/331) + + +## 1.4.3 / 2010-07-28 + +### Added + +* `XML::Reader#empty_element?` returns true for empty elements. [#262](https://github.com/sparklemotion/nokogiri/issues/262) +* `Node#remove_namespaces!` now removes namespace *declarations* as well. [#294](https://github.com/sparklemotion/nokogiri/issues/294) +* `NodeSet#at_xpath`, `NodeSet#at_css` and `NodeSet#>` do what the corresponding methods of `Node` do. + +### Fixed + +* `XML::NodeSet#{include?,delete,push}` accept an `XML::Namespace` +* `XML::Document#parse` added for parsing in the context of a document +* `XML::DocumentFragment#inner_html=` works with contextual parsing! [#298](https://github.com/sparklemotion/nokogiri/issues/298), [#281](https://github.com/sparklemotion/nokogiri/issues/281) +* `lib/nokogiri/css/parser.y` Combined CSS functions + pseudo selectors fixed +* Reparenting text nodes is safe, even when the operation frees adjacent merged nodes. [#283](https://github.com/sparklemotion/nokogiri/issues/283) +* Fixed libxml2 versionitis issue with `xmlFirstElementChild` et al. [#303](https://github.com/sparklemotion/nokogiri/issues/303) +* `XML::Attr#add_namespace` now works as expected. [#252](https://github.com/sparklemotion/nokogiri/issues/252) +* `HTML::DocumentFragment` uses the string's encoding. [#305](https://github.com/sparklemotion/nokogiri/issues/305) +* Fix the CSS3 selector translation rule for the general sibling combinator (a.k.a. preceding selector) that incorrectly converted "E ~ F G" to "//F//G[preceding-sibling::E]". + + +## 1.4.2 / 2010-05-22 + +### Added + +* `XML::Node#parse` will parse XML or HTML fragments with respect to the context node. +* `XML::Node#namespaces` returns all namespaces defined in the node and all ancestor nodes (previously did not return ancestors' namespace definitions). +* Added `Enumerable` to `XML::Node` +* `Nokogiri::XML::Schema#validate` now uses xmlSchemaValidateFile if a filename is passed, which is faster and more memory-efficient. GH [#219](https://github.com/sparklemotion/nokogiri/issues/219) +* `XML::Document#create_entity` will create new `EntityDecl` objects. GH [#174](https://github.com/sparklemotion/nokogiri/issues/174) +* JRuby FFI implementation no longer uses `ObjectSpace._id2ref`, instead using Charles Nutter's rocking Weakling gem. +* `Nokogiri::XML::Node#first_element_child` fetch the first child node that is an ELEMENT node. +* `Nokogiri::XML::Node#last_element_child` fetch the last child node that is an ELEMENT node. +* `Nokogiri::XML::Node#elements` fetch all children nodes that are ELEMENT nodes. +* `Nokogiri::XML::Node#add_child`, `#add_previous_sibling`, `#before`, `#add_next_sibling`, `#after`, `#inner_html`, `#swap` and `#replace` all now accept a `Node`, `DocumentFragment`, `NodeSet`, or a string containing markup. +* `Node#fragment?` indicates whether a node is a `DocumentFragment`. + +### Fixed + +* `XML::NodeSet` is now always decorated (if the document has decorators). GH [#198](https://github.com/sparklemotion/nokogiri/issues/198) +* `XML::NodeSet#slice` gracefully handles offset+length larger than the set length. GH [#200](https://github.com/sparklemotion/nokogiri/issues/200) +* `XML::Node#content=` safely unlinks previous content. GH [#203](https://github.com/sparklemotion/nokogiri/issues/203) +* `XML::Node#namespace=` takes nil as a parameter +* `XML::Node#xpath` returns things other than `NodeSet` objects. GH [#208](https://github.com/sparklemotion/nokogiri/issues/208) +* `XSLT::StyleSheet#transform` accepts hashes for parameters. GH [#223](https://github.com/sparklemotion/nokogiri/issues/223) +* Psuedo selectors inside `not()` work. GH [#205](https://github.com/sparklemotion/nokogiri/issues/205) +* `XML::Builder` doesn't break when nodes are unlinked. Thanks to vihai! GH [#228](https://github.com/sparklemotion/nokogiri/issues/228) +* Encoding can be forced on the SAX parser. Thanks Eugene Pimenov! GH [#204](https://github.com/sparklemotion/nokogiri/issues/204) +* `XML::DocumentFragment` uses `XML::Node#parse` to determine children. +* Fixed a memory leak in xml reader. Thanks sdor! GH [#244](https://github.com/sparklemotion/nokogiri/issues/244) +* `Node#replace` returns the new child node as claimed in the RDoc. Previously returned +self+. + +### Notes + +* The Windows gems now bundle DLLs for libxml 2.7.6 and libxslt 1.1.26. Prior to this release, libxml 2.7.3 and libxslt 1.1.24 were bundled. + + +## 1.4.1 / 2009-12-10 + +### Added + +* Added `Nokogiri::LIBXML_ICONV_ENABLED` +* Alias `Node#[]` to `Node#attr` +* `XML::Node#next_element` added +* `XML::Node#>` added for searching a nodes immediate children +* `XML::NodeSet#reverse` added +* Added fragment support to `Node#add_child`, `Node#add_next_sibling`, `Node#add_previous_sibling`, and `Node#replace`. +* `XML::Node#previous_element` implemented +* Rubinius support +* Ths CSS selector engine now supports `:has()` +* `XML::NodeSet#filter()` was added +* `XML::Node.next=` and .previous= are aliases for add_next_sibling and add_previous_sibling. GH [#183](https://github.com/sparklemotion/nokogiri/issues/183) + +### Fixed + +* XML fragments with namespaces do not raise an exception (regression in 1.4.0) +* `Node#matches?` works in nodes contained by a `DocumentFragment`. GH [#158](https://github.com/sparklemotion/nokogiri/issues/158) +* `Document` should not define `add_namespace()` method. GH [#169](https://github.com/sparklemotion/nokogiri/issues/169) +* `XPath` queries returning namespace declarations do not segfault. +* `Node#replace` works with nodes from different documents. GH [#162](https://github.com/sparklemotion/nokogiri/issues/162) +* Adding `XML::Document#collect_namespaces` +* Fixed bugs in the SOAP4R adapter +* Fixed bug in `XML::Node#next_element` for certain edge cases +* Fixed load path issue with JRuby under Windows. GH [#160](https://github.com/sparklemotion/nokogiri/issues/160). +* `XSLT#apply_to` will honor the "output method". Thanks richardlehane! +* Fragments containing leading text nodes with newlines now parse properly. GH [#178](https://github.com/sparklemotion/nokogiri/issues/178). + + +## 1.4.0 / 2009-10-30 + +### Added + +* `Node#at_xpath` returns the first element of the `NodeSet` matching the XPath expression. +* `Node#at_css` returns the first element of the `NodeSet` matching the CSS selector. +* `NodeSet#|` for unions GH [#119](https://github.com/sparklemotion/nokogiri/issues/119) (Thanks Serabe!) +* `NodeSet#inspect` makes prettier output +* `Node#inspect` implemented for more rubyish document inspecting +* Added `XML::DTD#external_id` +* Added `XML::DTD#system_id` +* Added `XML::ElementContent` for DTD Element content validity +* Better namespace declaration support in `Nokogiri::XML::Builder` +* Added `XML::Node#external_subset` +* Added `XML::Node#create_external_subset` +* Added `XML::Node#create_internal_subset` +* XML Builder can append raw strings (GH [#141](https://github.com/sparklemotion/nokogiri/issues/141), patch from dudleyf) +* `XML::SAX::ParserContext` added +* `XML::Document#remove_namespaces!` for the namespace-impaired + +### Fixed + +* returns nil when HTML documents do not declare a meta encoding tag. GH [#115](https://github.com/sparklemotion/nokogiri/issues/115) +* Uses `RbConfig::CONFIG['host_os']` to adjust `ENV['PATH']` GH [#113](https://github.com/sparklemotion/nokogiri/issues/113) +* `NodeSet#search` is more efficient GH [#119](https://github.com/sparklemotion/nokogiri/issues/119) (Thanks Serabe!) +* `NodeSet#xpath` handles custom xpath functions +* Fixing a SEGV when `XML::Reader` gets attributes for current node +* `Node#inner_html` takes the same arguments as `Node#to_html` GH [#117](https://github.com/sparklemotion/nokogiri/issues/117) +* `DocumentFragment#css` delegates to it's child nodes GH [#123](https://github.com/sparklemotion/nokogiri/issues/123) +* `NodeSet#[]` works with slices larger than `NodeSet#length` GH [#131](https://github.com/sparklemotion/nokogiri/issues/131) +* Reparented nodes maintain their namespace. GH [#134](https://github.com/sparklemotion/nokogiri/issues/134) +* Fixed SEGV when adding an `XML::Document` to `NodeSet` +* `XML::SyntaxError` can be duplicated. GH [#148](https://github.com/sparklemotion/nokogiri/issues/148) + +### Removed + +* Hpricot compatibility layer removed + + +## 1.3.3 / 2009-07-26 + +### Added + +* `NodeSet#children` returns all children of all nodes + +### Fixed + +* Override libxml-ruby's global error handler +* `ParseOption#strict` fixed +* Fixed a segfault when sending an empty string to `Node#inner_html=` GH [#88](https://github.com/sparklemotion/nokogiri/issues/88) +* String encoding is now set to UTF-8 in Ruby 1.9 +* Fixed a segfault when moving root nodes between documents. GH [#91](https://github.com/sparklemotion/nokogiri/issues/91) +* Fixed an O(n) penalty on node creation. GH [#101](https://github.com/sparklemotion/nokogiri/issues/101) +* Allowing XML documents to be output as HTML documents -* Deprecations +### Deprecations - * Hpricot compatibility layer will be removed in 1.4.0 +* Hpricot compatibility layer will be removed in 1.4.0 ## 1.3.2 / 2009-06-22 -* New Features +### Added - * Nokogiri::XML::DTD#validate will validate your document +* `Nokogiri::XML::DTD#validate` will validate your document -* Bugfixes +### Fixed - * Nokogiri::XML::NodeSet#search will search top level nodes. GH #73 - * Removed namespace related methods from Nokogiri::XML::Document - * Fixed a segfault when a namespace was added twice - * Made nokogiri work with Snow Leopard GH #79 - * Mailing list has moved to: http://groups.google.com/group/nokogiri-talk - * HTML fragments now correctly handle comments and CDATA blocks. GH #78 - * Nokogiri::XML::Document#clone is now an alias of dup +* `Nokogiri::XML::NodeSet#search` will search top level nodes. GH [#73](https://github.com/sparklemotion/nokogiri/issues/73) +* Removed namespace related methods from `Nokogiri::XML::Document` +* Fixed a segfault when a namespace was added twice +* Made nokogiri work with Snow Leopard GH [#79](https://github.com/sparklemotion/nokogiri/issues/79) +* Mailing list has moved to: http://groups.google.com/group/nokogiri-talk +* HTML fragments now correctly handle comments and CDATA blocks. GH [#78](https://github.com/sparklemotion/nokogiri/issues/78) +* `Nokogiri::XML::Document#clone` is now an alias of dup -* Deprecations +### Deprecations - * Nokogiri::XML::SAX::Document#start_element_ns is deprecated, please switch - to Nokogiri::XML::SAX::Document#start_element_namespace - * Nokogiri::XML::SAX::Document#end_element_ns is deprecated, please switch - to Nokogiri::XML::SAX::Document#end_element_namespace +* `Nokogiri::XML::SAX::Document#start_element_ns` is deprecated, please switch to `Nokogiri::XML::SAX::Document#start_element_namespace` +* `Nokogiri::XML::SAX::Document#end_element_ns` is deprecated, please switch to `Nokogiri::XML::SAX::Document#end_element_namespace` ## 1.3.1 / 2009-06-07 -* Bugfixes +### Fixed - * extconf.rb checks for optional RelaxNG and Schema functions - * Namespace nodes are added to the Document node cache +* `extconf.rb` checks for optional RelaxNG and Schema functions +* Namespace nodes are added to the Document node cache ## 1.3.0 / 2009-05-30 -* New Features - - * Builder changes scope based on block arity - * Builder supports methods ending in underscore similar to tagz - * Nokogiri::XML::Node#<=> compares nodes based on Document position - * Nokogiri::XML::Node#matches? returns true if Node can be found with - given selector. - * Nokogiri::XML::Node#ancestors now returns an Nokogiri::XML::NodeSet - * Nokogiri::XML::Node#ancestors will match parents against optional selector - * Nokogiri::HTML::Document#meta_encoding for getting the meta encoding - * Nokogiri::HTML::Document#meta_encoding= for setting the meta encoding - * Nokogiri::XML::Document#encoding= to set the document encoding - * Nokogiri::XML::Schema for validating documents against XSD schema - * Nokogiri::XML::RelaxNG for validating documents against RelaxNG schema - * Nokogiri::HTML::ElementDescription for fetching HTML element descriptions - * Nokogiri::XML::Node#description to fetch the node description - * Nokogiri::XML::Node#accept implements Visitor pattern - * bin/nokogiri for easily examining documents (Thanks Yutaka HARA!) - * Nokogiri::XML::NodeSet now supports more Array and Enumerable operators: - index, delete, slice, - (difference), + (concatenation), & (intersection), - push, pop, shift, == - * Nokogiri.XML, Nokogiri.HTML take blocks that receive - Nokogiri::XML::ParseOptions objects - * Nokogiri::XML::Node#namespace returns a Nokogiri::XML::Namespace - * Nokogiri::XML::Node#namespace= for setting a node's namespace - * Nokogiri::XML::DocumentFragment and Nokogiri::HTML::DocumentFragment - have a sensible API and a more robust implementation. - * JRuby 1.3.0 support via FFI. - -* Bugfixes - - * Fixed a problem with nil passed to CDATA constructor - * Fragment method deals with regular expression characters - (Thanks Joel!) LH #73 - * Fixing builder scope issues LH #61, LH #74, LH #70 - * Fixed a problem when adding a child could remove the child namespace LH#78 - * Fixed bug with unlinking a node then reparenting it. (GH#22) - * Fixed failure to catch errors during XSLT parsing (GH#32) - * Fixed a bug with attribute conditions in CSS selectors (GH#36) - * Fixed intolerance of HTML attributes without values in Node#before/after/inner_html=. (GH#35) +### Added + +* Builder changes scope based on block arity +* Builder supports methods ending in underscore similar to tagz +* `Nokogiri::XML::Node#<=>` compares nodes based on Document position +* `Nokogiri::XML::Node#matches?` returns true if Node can be found with given selector. +* `Nokogiri::XML::Node#ancestors` now returns an `Nokogiri::XML::NodeSet` +* `Nokogiri::XML::Node#ancestors` will match parents against optional selector +* `Nokogiri::HTML::Document#meta_encoding` for getting the meta encoding +* `Nokogiri::HTML::Document#meta_encoding=` for setting the meta encoding +* `Nokogiri::XML::Document#encoding=` to set the document encoding +* `Nokogiri::XML::Schema` for validating documents against XSD schema +* `Nokogiri::XML::RelaxNG` for validating documents against RelaxNG schema +* `Nokogiri::HTML::ElementDescription` for fetching HTML element descriptions +* `Nokogiri::XML::Node#description` to fetch the node description +* `Nokogiri::XML::Node#accept` implements Visitor pattern +* `bin/nokogiri` for easily examining documents (Thanks Yutaka HARA!) +* `Nokogiri::XML::NodeSet` now supports more Array and Enumerable operators: index, delete, slice, - (difference), + (concatenation), & (intersection), push, pop, shift, == +* `Nokogiri.XML`, `Nokogiri.HTML` take blocks that receive `Nokogiri::XML::ParseOptions` objects +* `Nokogiri::XML::Node#namespace` returns a `Nokogiri::XML::Namespace` +* `Nokogiri::XML::Node#namespace=` for setting a node's namespace +* `Nokogiri::XML::DocumentFragment` and `Nokogiri::HTML::DocumentFragment` have a sensible API and a more robust implementation. +* JRuby 1.3.0 support via FFI. + +### Fixed + +* Fixed a problem with nil passed to CDATA constructor +* Fragment method deals with regular expression characters (Thanks Joel!) LH [#73](https://github.com/sparklemotion/nokogiri/issues/73) +* Fixing builder scope issues LH [#61](https://github.com/sparklemotion/nokogiri/issues/61), LH [#74](https://github.com/sparklemotion/nokogiri/issues/74), LH [#70](https://github.com/sparklemotion/nokogiri/issues/70) +* Fixed a problem when adding a child could remove the child namespace LH[#78](https://github.com/sparklemotion/nokogiri/issues/78) +* Fixed bug with unlinking a node then reparenting it. (GH[#22](https://github.com/sparklemotion/nokogiri/issues/22)) +* Fixed failure to catch errors during XSLT parsing (GH[#32](https://github.com/sparklemotion/nokogiri/issues/32)) +* Fixed a bug with attribute conditions in CSS selectors (GH[#36](https://github.com/sparklemotion/nokogiri/issues/36)) +* Fixed intolerance of HTML attributes without values in `Node#{before/after/inner_html=}`. (GH[#35](https://github.com/sparklemotion/nokogiri/issues/35)) ## 1.2.3 / 2009-03-22 -* Bugfixes +### Fixed - * Fixing bug where a node is passed in to Node#new - * Namespace should be assigned on DocumentFragment creation. LH #66 - * Nokogiri::XML::NodeSet#dup works GH #10 - * Nokogiri::HTML returns an empty Document when given a blank string GH#11 - * Adding a child will remove duplicate namespace declarations LH #67 - * Builder methods take a hash as a second argument +* Fixing bug where a node is passed in to `Node#new` +* Namespace should be assigned on DocumentFragment creation. LH [#66](https://github.com/sparklemotion/nokogiri/issues/66) +* `Nokogiri::XML::NodeSet#dup` works GH [#10](https://github.com/sparklemotion/nokogiri/issues/10) +* `Nokogiri::HTML` returns an empty Document when given a blank string GH[#11](https://github.com/sparklemotion/nokogiri/issues/11) +* Adding a child will remove duplicate namespace declarations LH [#67](https://github.com/sparklemotion/nokogiri/issues/67) +* Builder methods take a hash as a second argument ## 1.2.2 / 2009-03-14 -* New features +### Added - * Nokogiri may be used with soap4r. See XSD::XMLParser::Nokogiri - * Nokogiri::XML::Node#inner_html= to set the inner html for a node - * Nokogiri builder interface improvements - * Nokogiri::XML::Node#swap swaps html for current node (LH #50) +* Nokogiri may be used with soap4r. See `XSD::XMLParser::Nokogiri` +* `Nokogiri::XML::Node#inner_html=` to set the inner html for a node +* Nokogiri builder interface improvements +* `Nokogiri::XML::Node#swap` swaps html for current node (LH [#50](https://github.com/sparklemotion/nokogiri/issues/50)) -* Bugfixes +### Fixed - * Fixed a tag nesting problem in the Builder API (LH #41) - * Nokogiri::HTML.fragment will properly handle text only nodes (LH #43) - * Nokogiri::XML::Node#before will prepend text nodes (LH #44) - * Nokogiri::XML::Node#after will append text nodes - * Nokogiri::XML::Node#search automatically registers root namespaces (LH #42) - * Nokogiri::XML::NodeSet#search automatically registers namespaces - * Nokogiri::HTML::NamedCharacters delegates to libxml2 - * Nokogiri::XML::Node#[] can take a symbol (LH #48) - * vasprintf for windows updated. Thanks Geoffroy Couprie! - * Nokogiri::XML::Node#[]= should not encode entities (LH #55) - * Namespaces should be copied to reparented nodes (LH #56) - * Nokogiri uses encoding set on the string for default in Ruby 1.9 - * Document#dup should create a new document of the same type (LH #59) - * Document should not have a parent method (LH #64) +* Fixed a tag nesting problem in the Builder API (LH [#41](https://github.com/sparklemotion/nokogiri/issues/41)) +* `Nokogiri::HTML.fragment` will properly handle text only nodes (LH [#43](https://github.com/sparklemotion/nokogiri/issues/43)) +* `Nokogiri::XML::Node#before` will prepend text nodes (LH [#44](https://github.com/sparklemotion/nokogiri/issues/44)) +* `Nokogiri::XML::Node#after` will append text nodes +* `Nokogiri::XML::Node#search` automatically registers root namespaces (LH [#42](https://github.com/sparklemotion/nokogiri/issues/42)) +* `Nokogiri::XML::NodeSet#search` automatically registers namespaces +* `Nokogiri::HTML::NamedCharacters` delegates to libxml2 +* `Nokogiri::XML::Node#[]` can take a symbol (LH [#48](https://github.com/sparklemotion/nokogiri/issues/48)) +* vasprintf for windows updated. Thanks Geoffroy Couprie! +* `Nokogiri::XML::Node#[]=` should not encode entities (LH [#55](https://github.com/sparklemotion/nokogiri/issues/55)) +* Namespaces should be copied to reparented nodes (LH [#56](https://github.com/sparklemotion/nokogiri/issues/56)) +* Nokogiri uses encoding set on the string for default in Ruby 1.9 +* `Document#dup` should create a new document of the same type (LH [#59](https://github.com/sparklemotion/nokogiri/issues/59)) +* `Document` should not have a parent method (LH [#64](https://github.com/sparklemotion/nokogiri/issues/64)) ## 1.2.1 / 2009-02-23 -* Bugfixes +### Fixed - * Fixed a CSS selector space bug - * Fixed Ruby 1.9 String Encoding (Thanks 角谷さん!) +* Fixed a CSS selector space bug +* Fixed Ruby 1.9 String Encoding (Thanks 角谷さん!) ## 1.2.0 / 2009-02-22 -* New features +### Added - * CSS search now supports CSS3 namespace queries - * Namespaces on the root node are automatically registered - * CSS queries use the default namespace - * Nokogiri::XML::Document#encoding get encoding used for this document - * Nokogiri::XML::Document#url get the document url - * Nokogiri::XML::Node#add_namespace add a namespace to the node LH#38 - * Nokogiri::XML::Node#each iterate over attribute name, value pairs - * Nokogiri::XML::Node#keys get all attribute names - * Nokogiri::XML::Node#line get the line number for a node (Thanks Dirkjan Bussink!) - * Nokogiri::XML::Node#serialize now takes an optional encoding parameter - * Nokogiri::XML::Node#to_html, to_xml, and to_xhtml take an optional encoding - * Nokogiri::XML::Node#to_str - * Nokogiri::XML::Node#to_xhtml to produce XHTML documents - * Nokogiri::XML::Node#values get all attribute values - * Nokogiri::XML::Node#write_to writes the node to an IO object with optional encoding - * Nokogiri::XML::ProcessingInstrunction.new - * Nokogiri::XML::SAX::PushParser for all your push parsing needs. +* CSS search now supports CSS3 namespace queries +* Namespaces on the root node are automatically registered +* CSS queries use the default namespace +* `Nokogiri::XML::Document#encoding` get encoding used for this document +* `Nokogiri::XML::Document#url` get the document url +* `Nokogiri::XML::Node#add_namespace` add a namespace to the node LH[#38](https://github.com/sparklemotion/nokogiri/issues/38) +* `Nokogiri::XML::Node#each` iterate over attribute name, value pairs +* `Nokogiri::XML::Node#keys` get all attribute names +* `Nokogiri::XML::Node#line` get the line number for a node (Thanks Dirkjan Bussink!) +* `Nokogiri::XML::Node#serialize` now takes an optional encoding parameter +* `Nokogiri::XML::Node#to_html`, to_xml, and to_xhtml take an optional encoding +* `Nokogiri::XML::Node#to_str` +* `Nokogiri::XML::Node#to_xhtml` to produce XHTML documents +* `Nokogiri::XML::Node#values` get all attribute values +* `Nokogiri::XML::Node#write_to` writes the node to an IO object with optional encoding +* `Nokogiri::XML::ProcessingInstruction.new` +* `Nokogiri::XML::SAX::PushParser` for all your push parsing needs. -* Bugfixes +### Fixed - * Fixed Nokogiri::XML::Document#dup - * Fixed header detection. Thanks rubikitch! - * Fixed a problem where invalid CSS would cause the parser to hang +* Fixed `Nokogiri::XML::Document#dup` +* Fixed header detection. Thanks rubikitch! +* Fixed a problem where invalid CSS would cause the parser to hang -* Deprecations +### Deprecations - * Nokogiri::XML::Node.new_from_str will be deprecated in 1.3.0 +* `Nokogiri::XML::Node.new_from_str` will be deprecated in 1.3.0 -* API Changes +### Changed - * Nokogiri::HTML.fragment now returns an XML::DocumentFragment (LH #32) +* `Nokogiri::HTML.fragment` now returns an XML::DocumentFragment (LH [#32](https://github.com/sparklemotion/nokogiri/issues/32)) ## 1.1.1 -* New features +### Added - * Added XML::Node#elem? - * Added XML::Node#attribute_nodes - * Added XML::Attr - * XML::Node#delete added. - * XML::NodeSet#inner_html added. +* Added `XML::Node#elem?` +* Added `XML::Node#attribute_nodes` +* Added `XML::Attr` +* `XML::Node#delete` added. +* `XML::NodeSet#inner_html` added. -* Bugfixes +### Fixed - * Not including an HTML entity for \r for HTML nodes. - * Removed CSS::SelectorHandler and XML::XPathHandler - * XML::Node#attributes returns an Attr node for the value. - * XML::NodeSet implements to_xml +* Not including an HTML entity for \r for HTML nodes. +* Removed `CSS::SelectorHandler` and `XML::XPathHandler` +* `XML::Node#attributes` returns an `Attr` node for the value. +* `XML::NodeSet` implements `to_xml` ## 1.1.0 -* New Features +### Added - * Custom XPath functions are now supported. See Nokogiri::XML::Node#xpath - * Custom CSS pseudo classes are now supported. See Nokogiri::XML::Node#css - * Nokogiri::XML::Node#<< will add a child to the current node +* Custom XPath functions are now supported. See `Nokogiri::XML::Node#xpath` +* Custom CSS pseudo classes are now supported. See `Nokogiri::XML::Node#css` +* `Nokogiri::XML::Node#<<` will add a child to the current node -* Bugfixes +### Fixed - * Mutex lock on CSS cache access - * Fixed build problems with GCC 3.3.5 - * XML::Node#to_xml now takes an indentation argument - * XML::Node#dup takes an optional depth argument - * XML::Node#add_previous_sibling returns new sibling node. +* Mutex lock on CSS cache access +* Fixed build problems with GCC 3.3.5 +* `XML::Node#to_xml` now takes an indentation argument +* `XML::Node#dup` takes an optional depth argument +* `XML::Node#add_previous_sibling` returns new sibling node. ## 1.0.7 -* Bugfixes +### Fixed - * Fixed memory leak when using Dike - * SAX parser now parses IO streams - * Comment nodes have their own class - * Nokogiri() should delegate to Nokogiri.parse() - * Prepending rather than appending to ENV['PATH'] on windows - * Fixed a bug in complex CSS negation selectors +* Fixed memory leak when using Dike +* SAX parser now parses IO streams +* Comment nodes have their own class +* `Nokogiri()` should delegate to `Nokogiri.parse()` +* Prepending rather than appending to `ENV['PATH']` on windows +* Fixed a bug in complex CSS negation selectors ## 1.0.6 -* 5 Bugfixes +### Fixed - * XPath Parser raises a SyntaxError on parse failure - * CSS Parser raises a SyntaxError on parse failure - * filter() and not() hpricot compatibility added - * CSS searches via Node#search are now always relative - * CSS to XPath conversion is now cached +* XPath Parser raises a `SyntaxError` on parse failure +* CSS Parser raises a `SyntaxError` on parse failure +* `filter()` and `not()` hpricot compatibility added +* CSS searches via `Node#search` are now always relative +* CSS to XPath conversion is now cached ## 1.0.5 -* Bugfixes +### Fixed - * Added mailing list and ticket tracking information to the README.txt - * Sets ENV['PATH'] on windows if it doesn't exist - * Caching results of NodeSet#[] on Document +* Added mailing list and ticket tracking information to the `README.txt` +* Sets `ENV['PATH']` on windows if it doesn't exist +* Caching results of `NodeSet#[]` on `Document` ## 1.0.4 -* Bugfixes +### Fixed - * Changed memory management from weak refs to document refs - * Plugged some memory leaks - * Builder blocks can call methods from surrounding contexts +* Changed memory management from weak refs to document refs +* Plugged some memory leaks +* Builder blocks can call methods from surrounding contexts ## 1.0.3 -* 5 Bugfixes +### Fixed - * NodeSet now implements to_ary - * XML::Document should not implement parent - * More GC Bugs fixed. (Mike is AWESOME!) - * Removed RARRAY_LEN for 1.8.5 compatibility. Thanks Shane Hanna. - * inner_html fixed. (Thanks Yehuda!) +* `NodeSet` now implements `to_ary` +* `XML::Document` should not implement parent +* More GC Bugs fixed. (Mike is AWESOME!) +* Removed RARRAY_LEN for 1.8.5 compatibility. Thanks Shane Hanna. +* `inner_html` fixed. (Thanks Yehuda!) ## 1.0.2 -* 1 Bugfix +### Fixed - * extconf.rb should not check for frex and racc +* `extconf.rb` should not check for frex and racc ## 1.0.1 -* 1 Bugfix +### Fixed - * Made sure extconf.rb searched libdir and prefix so that ports libxml/ruby - will link properly. Thanks lucsky! +* Made sure `extconf.rb` searched libdir and prefix so that ports libxml/ruby will link properly. Thanks lucsky! ## 1.0.0 / 2008-07-13 -* 1 major enhancement +### Added - * Birthday! +* Birthday! diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f61bc1e4e8..192ddce609 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,33 +1,397 @@ -# Issue and Pull Request Guidelines +# Contributing to Nokogiri -Thank you for helping make Nokogiri better! +This doc is intended to be a short introduction on how to modify and maintain Nokogiri. + +If you're looking for guidance on filing a bug report or getting support, please visit the ["Getting Help" tutorial](http://www.nokogiri.org/tutorials/getting_help.html) at the [nokogiri.org](http://nokogiri.org) site. + + +## Contents + + + + + +- [Introduction](#introduction) +- [Code of Conduct](#code-of-conduct) +- [Some guiding principles of the project](#some-guiding-principles-of-the-project) +- [Where to start getting involved](#where-to-start-getting-involved) +- [Submitting Pull Requests](#submitting-pull-requests) +- [Branch Management and Release Management](#branch-management-and-release-management) +- [How to set up your local development environment](#how-to-set-up-your-local-development-environment) +- [How to run the tests](#how-to-run-the-tests) +- [Style Guide](#style-guide) +- [How Continuous Integration ("CI") is configured](#how-continuous-integration-ci-is-configured) +- [Packaging releases](#packaging-releases) +- [Other utilities](#other-utilities) +- [Bumping Java dependencies](#bumping-java-dependencies) +- [Rake tasks](#rake-tasks) +- [Making a release](#making-a-release) + + + +## Introduction + +Hello there! I'm super excited that you're interested in contributing to Nokogiri. Welcome! + +This document is intended only to provide a brief introduction on how to contribute to Nokogiri. It's not a complete specification of everything you need to know, so if you want to know more, I encourage you to reach out to the maintainers via email, twitter, or a new Github issue. We'd love to get to know you a bit better! ## Code of Conduct Our full Code of Conduct is in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md). -For best results, be nice. Remember that Nokogiri maintainers are volunteers, and treat them with respect. +For best results, be kind. Remember that Nokogiri maintainers are volunteers, and treat them with respect. Do not act entitled to service. Do not be rude. Do not use judgmental or foul language. + + +## Some guiding principles of the project + +The top guiding principles, as noted in the README are: + +- be secure-by-default by treating all documents as **untrusted** by default +- be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers + + +Nokogiri supports both CRuby and JRuby, and has native code specific to each (though much Ruby code is shared between them). Some related secondary principles are: + +- Whenever possible, implement the same functionality for both CRuby and JRuby. +- Whenever possible, implement shared behavior as shared Ruby code (i.e., write as little native code as reasonable). +- Whenever possible, avoid writing tests that are platform-specific (but if you do, use `skip` to provide an explanation). + +Notably, despite all parsers being standards-compliant, there are behavioral inconsistencies between the parsers used in the CRuby and JRuby implementations, and Nokogiri does not and should not attempt to remove these inconsistencies. Instead, we surface these differences in the test suite when they are important/semantic; or we intentionally write tests to depend only on the important/semantic bits (omitting whitespace from regex matchers on results, for example). + +Nokogiri is widely used in the Ruby ecosystem, and so extra care should be taken to avoid introducing breaking changes. Please read our [Semantic Versioning Policy](https://nokogiri.org/index.html#semantic-versioning-policy) to understand what we consider to be a breaking change. + + +## Where to start getting involved + +Please take a look at our [Issues marked "Help Wanted"](https://github.com/sparklemotion/nokogiri/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22). + +Also, [pull requests for documentation improvements are always welcome](#documentation)! + + +## Submitting Pull Requests + +Pull requests should be made with `main` as the merge base. See the next section for details. + +**Pull requests that introduce behavior change must always contain a test** demonstrating the behavior being introduced, fixed, or changed. These tests should ideally communicate to the maintainers the problem being solved. We will ask you for clarification if we don't understand the problem you're trying to solve. + +If the pull request contains a feature or a bugfix, please make sure to create a CHANGELOG entry in the "unreleased" section. + +Please do not submit pull requests that make purely cosmetic changes to the code (style, naming, etc.). While we recognize that the code can always be improved, we prefer that you focus on more impactful contributions. + +Feel free to push a "work in progress" to take advantage of the feedback loops from CI. But then please indicate that it's still in progress by marking it as a [Draft Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests#draft-pull-requests). + + +## Branch Management and Release Management + +Nokogiri follows SemVer, and some nuances of that policy are spelled out in [Semantic Versioning Policy](https://nokogiri.org/index.html#semantic-versioning-policy). + +Development should be happening on `main`, which sets `Nokogiri::VERSION` to a development version of the next minor release (e.g., `"1.14.0.dev"`). All pull requests should have `main` as the merge base. + +Patch releases should be made by cherry-picking commits from `main` onto the release branch (e.g., `v1.13.x`) in a pull request labeled `backport`. + + +## How to set up your local development environment + +### Basic + +``` sh +git clone --recurse-submodules https://github.com/sparklemotion/nokogiri +cd nokogiri +bundle install +``` + + +### Advanced + +Please install the latest or previous version of CRuby (e.g., 3.2 or 3.1 as of 2023-01), and a recent version of JRuby. We recommend using `rbenv`, which is used in test scripts when necessary to test gems against multiple rubies. + +Please install a system version of libxml2/libxslt (see [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html#installing-using-standard-system-libraries) for details) so that you can test against both the packaged libraries and your system libraries. + +We recommend that you install `valgrind` if you can, but it's only necessary for debugging problems so feel free to wait until you need it. (I'm not sure valgrind is easily available on MacOS.) + +If you plan to package precompiled native gems, make sure `docker` is installed and is working properly. + + +## How to run the tests + +Note that `rake test` does not compile the native extension, and this is intentional (so we can run the test suite against an installed gem). If you're modifying the extension code, please make sure you re-compile each time you run the tests to ensure you're testing your changes. + + +### The short version + +``` sh +bundle exec rake compile test +``` + +To run a focused test, use MiniTest's `TESTOPTS`: + +``` sh +bundle exec rake compile test TESTOPTS="-n/test_last_element_child/" +``` + + +### CRuby advanced usage + +Test using your system's libraries: + +``` sh +bundle exec rake clean # blow away pre-existing libraries using packaged libs +bundle exec rake compile test -- --enable-system-libraries +``` + +Run performance tests: + +``` sh +bundle exec rake compile test:bench +``` + + +Run tests using valgrind: + +``` sh +bundle exec rake compile test:valgrind +``` + + +Run tests in the debugger: + +``` sh +bundle exec rake compile test:gdb +# or +bundle exec rake compile test:lldb +``` + + +Run tests and look for new memory leaks: + +``` sh +bundle exec rake compile test:memcheck +``` + + +Note that by default the test suite will run a major GC after each test completes. This has shown to be useful for localizing some classes of memory bugs, but does slow the suite down. Some variations of the test suite behavior are available (see `test/helper.rb` for more info): + +``` sh +# see failure messages immediately +NOKOGIRI_TEST_FAIL_FAST=t bundle exec rake compile test + +# ordinary GC behavior +NOKOGIRI_TEST_GC_LEVEL=normal bundle exec rake compile test + +# minor GC after each test +NOKOGIRI_TEST_GC_LEVEL=minor bundle exec rake compile test + +# major GC after each test +NOKOGIRI_TEST_GC_LEVEL=major bundle exec rake compile test + +# major GC after each test and GC compaction after every 20 tests +NOKOGIRI_TEST_GC_LEVEL=compact bundle exec rake compile test + +# verify references after compaction after every 20 tests +# (see https://alanwu.space/post/check-compaction/) +NOKOGIRI_TEST_GC_LEVEL=verify bundle exec rake compile test + +# run with GC "stress mode" on +NOKOGIRI_TEST_GC_LEVEL=stress bundle exec rake compile test +``` + + +### libxml2 advanced usage + +If you want to build Nokogiri against a modified version of libxml2, clone libxml2 to `../libxml2` and then run `scripts/compile-against-libxml2-source`. + +That script also takes an optional command to run with the proper environment variables set to use the local libxml2 library, which can be useful when trying to `git bisect` against libxml2. So, for example: + +``` sh +scripts/compile-against-libxml2-source bundle exec rake test +``` + + +### gumbo HTML5 parser + +To run the test suite for the gumbo parser: + +``` sh +bundle exec rake gumbo +``` + +Please note that additional html5lib tests for Nokogiri's HTML5 parser exist in a submodule. If you haven't checked that submodule out, here's how to do so: + +``` sh +git submodule update --init # test/html5lib-tests +bundle exec rake compile test +``` + + +## Style Guide + +### Documentation + +We use `rdoc` to build Nokogiri's documentation. Run `rake rdoc` to build into the `./html` directory, and see the rdoc tasks in [rakelib/rdoc.rake](rakelib/rdoc.rake). + +Previously we made some effort to move towards `yard` but that work was stopped (and the decision record can be found at [RFC: convert to use `yard` for documentation](https://github.com/sparklemotion/nokogiri/issues/1996)). + +Docstrings should be in `RDoc::Markup` format, though simple docstrings may be in Markdown (using `:markup: markdown`). + +If you submit pull requests that improve documentation, **I will happily merge them** and credit you in the CHANGELOG. + +Some guidelines (see [lib/nokogiri/xml/node.rb](lib/nokogiri/xml/node.rb) and [ext/nokogiri/xml/node.c](ext/nokogiri/xml/node.c) for examples): + +- Use `:call-seq:` to ... + - note the return type of the method whenever possible, e.g. `:call-seq: upcase(name) → String` + - name all the aliases of a method + - indicate block/yield usage of a method +- Briefly explain the purpose of the method, what it returns, and what side effects it has +- Use a `[Parameters]` definition to note the expected types of all the parameters as a bulleted list +- Use a `[Returns]` definition to note the return type +- Use a `[Yields]` definition to note the block parameters +- Use a `⚠` character to warn the user about tricky usage +- Use a `💡` character to call attention to important notes +- `See also:` should be used to call out related methods +- `Since` should be used to indicate the version in which the code was introduced +- Prefer to **show** nuanced behavior in code examples, rather than try to explain it in prose. + + +### Code + +I don't feel very strongly about code style, but this project follows [Shopify's Ruby Style Guide](https://shopify.github.io/ruby-style-guide/), and for C and Java code the project uses the `astyle` configuration laid out in `./rakelib/format.rake`. + +You can auto-format the C, Java, and Ruby code with `rake format`. + +There are some pending Rubocop rules in `.rubocop_todo.yml`. If you'd like to fix them up, I will happily merge your pull request. + +No, I don't want to debate any of the style choices. + + +## How Continuous Integration ("CI") is configured + +The bulk of CI is running in Github Actions since May 2021: https://github.com/sparklemotion/nokogiri/actions + +However, we also run tests against 32-bit windows (which aren't supported by GA as of this writing) in Appveyor: https://ci.appveyor.com/project/flavorjones/nokogiri + +A known hole in CI coverage is the lack native gem tests for arm64-darwin. + + +### Coverage + +The `ci.yml` pipeline includes jobs to: + +- basic security sanity check and formatting check, using Rubocop +- fast feedback for obvious failures: run against system libraries on vanilla ubuntu +- run the Gumbo parser tests on ubuntu, macos, and windows +- run on all supported versions of CRuby: + - once with packaged libraries + - once with system libraries + - once on valgrind (to look for memory bugs) +- run the test suite looking for new memory leaks (using ruby_memcheck) +- run on JRuby +- run on TruffleRuby +- run on a Musl (Alpine) system: + - against system libraries + - with valgrind using packaged libraries +- run with libxml-ruby loaded (because this can interact with libxml2 in conflicting ways) + - against system libraries + - with valgrind using packaged libraries +- build a "ruby" platform gem + - install and test on linux, macos, and windows +- build native gems + - install and test against all supported versions of CRuby + - install and test on a variety of linux, macos, and windows systems +- build a jruby gem, install and test it + +The `upstream.yml` pipeline includes jobs to: + +- run against libxml2 and libxslt head (linux), including a valgrind check +- run against CRuby head (linux, windows, macos) including a valgrind check +- run against JRuby head +- run html5lib-tests from that project's `origin/master` + +The `downstream.yml` pipeline includes jobs to run notable downstream dependents against Nokogiri `main`. + +The `generate-ci-images.yml` pipeline builds some containers used by the other pipelines once a week. This is primarily an optimization to make sure system packages (like `libxml2-dev` and `valgrind`) are already installed. See `oci-images/nokogiri-test/` for details on what's in these containers. + + +### Valgrind and `ruby_memcheck` + +We rely heavily on Valgrind and [`ruby_memcheck`](https://github.com/Shopify/ruby_memcheck) to catch memory bugs by running in combination with every version of CRuby. + +We use suppressions primarily to quiet known small memory leaks or quirks of certain Ruby versions. See the files in the `/suppressions` directory and `/rakelib/test.rake` for more information. + + +### Benchmark / Performance tests + +A separate suite, `test:bench`, can be run to ensure a few performance expectations. As of 2022-02 this suite is small, but we can grow it over time. These tests are run in CI on CRuby and JRuby. + +These tests should use `Nokogiri::TestBenchmark` as the base class, and be in a file matching the glob `test/**/bench_*.rb`. + + +### Helpful hints when writing new CI jobs + +- Always checkout the source code **including submodules** (for the html5lib tests) +- When testing packaged libraries (not system libraries), cache either `ports/` (for compiled libraries) or `ports/archives/` (for just tarballs) + - note that `libgumbo` is built outside of `ports/` to allow us to do this caching safely + + +## Packaging releases + +As a prerequisite please make sure you have `docker` correctly installed, to build native (precompiled) gems. + +Run `scripts/build-gems` which will package gems for all supported platforms, and run some basic sanity tests on those packages using `scripts/test-gem-set`, `scripts/test-gem-file-contents`, and `scripts/test-gem-installation`. + +See [Making a release](#making-a-release) below for the checklist. + + +## Other utilities + +`scripts/test-exported-symbols` checks the compiled `nokogiri.so` library for surprising exported symbols. This script likely only works on Linux, sorry. + +`scripts/test-nokogumbo-compatibility` is used by CI to ensure that Nokogumbo installs correctly against the currently-installed version of Nokogiri. Nokogumbo receives this extra care because it compiles against Nokogiri's and libxml2's header files, and makes assumptions about what symbols are exported by Nokogiri's extension library. + +`scripts/files-modified-by-open-prs` is a hack to see what files are being proposed to change in the set of open pull requests. This might be useful if you're thinking about radically changing a file, to be aware of what merge conflicts might result. This could probably be a rake task. + +There's a `Vagrantfile` in the project root which I've used once or twice to try to reproduce problems non-Linux systems (like OpenBSD). It's not well-maintained so YMMV. + + +## Bumping Java dependencies + +Java dependencies, in the form of `.jar` files, are all vendored as part of the `java` platform gem. -Do not act entitled to service. Do not be rude. Do not use judgmental or foul language. +We use [`jar-dependencies`](https://github.com/mkristian/jar-dependencies) as a development dependency to manage the project's Java dependencies. Note, however, that we use our own fork of NekoDTD that lives at https://github.com/sparklemotion/nekodtd -The maintainers reserve the right to delete comments that are rude, or that contain foul language. The maintainers reserve the right to delete comments that they deem harassing or offensive. +To modify or add a dependency, a few things needs to be in sync: +- `nokogiri.gemspec`: `spec.requirements` need to specify the maven group Id, artifact ID, and version +- `nokogiri.gemspec`: `spec.files` need to include the jar files +- git: the jar files under `lib/nokogiri/jruby/` need to be committed to git +- `lib/nokogiri/jruby/nokogiri_jars.rb`: needs to include all the jars -## Issues +A quick summary of what this looks like for you, the developer: -Please read the ["Getting Help" tutorial](http://www.nokogiri.org/tutorials/getting_help.html) at the [nokogiri.org](http://nokogiri.org) site. +1. edit the `requirements` in the gemspec +2. run `bundle exec rake vendor_jars` which updates everything under `lib/nokogiri/jruby` +3. run `bundle exec rake check_manifest` and if necessary update the gemspec `files` +4. make sure to check everything under `lib/nokogiri/jruby` into git, including the jar files -If you're reporting an issue, it must contain: -* Example code that reproduces the **observed** behavior. -* An explanation of what the **expected** behavior is. +## Rake tasks -That's it. If you don't provide that information, we'll ask you for it, tag the story "needs more information", and then after a time will close it if the information isn't provided. +The `Rakefile` used to be a big fat mess. It's now decomposed into a small set of files in `/rakelib`. If you've got a new rake task you'd like to introduce, please consider whether it belongs in one of the existing concerns, or needs a new file. Please don't add it to `Rakefile` without compelling reasons. -## Pull Requests +## Making a release -Pull requests must always contain a test to prevent regressions. Preferably, the test should demonstrate the __intent__ of the code. +A quick checklist: -We may ask you for clarification if we don't understand the intent of the change. +- [ ] make sure CI is green! +- [ ] update `CHANGELOG.md` and `lib/nokogiri/version/constant.rb` +- [ ] create a git tag +- [ ] run `scripts/build-gems` and make sure it completes and all the tests pass +- [ ] `for g in gems/*.gem ; do gem push $g ; done` +- [ ] create a release at https://github.com/sparklemotion/nokogiri/releases and provide sha2 checksums +- if security-related, + - [ ] publish a GHSA + - [ ] email ruby-security-ann@googlegroups.com and ruby-talk@ruby-lang.org + - [ ] submit a PR to https://github.com/rubysec/ruby-advisory-db +- [ ] update nokogiri.org +- [ ] bump `lib/nokogiri/version/constant.rb` to a prerelease version like `v1.14.0.dev` diff --git a/C_CODING_STYLE.rdoc b/C_CODING_STYLE.rdoc deleted file mode 100644 index 83f8182a9b..0000000000 --- a/C_CODING_STYLE.rdoc +++ /dev/null @@ -1,33 +0,0 @@ -= C/C++ mode style for Nokogiri - -Please don't propose commits that only change whitespace. However, if your -commit touches a function or section that is not using MRI Ruby conventions, -feel free to update whitespace in the surrounding code. - - -= WHITESPACE: - -* indent level: 2 -* indent type: Always spaces -* line Breaks: LF - -This style can be automatically applied by running: - - astyle --indent=spaces=2 --style=1tbs --keep-one-line-blocks $(ack -f --type=cpp --type=cc ext/nokogiri) - - -= FUNCTION DECLARATION: - -ANSI C style: - - type name(args) - { - declarations - - code - } - -= SOURCES: - -* <3<3<3 - diff --git a/Gemfile b/Gemfile index 0fbae5913b..0bd1bdc1a3 100644 --- a/Gemfile +++ b/Gemfile @@ -1,24 +1,44 @@ -# -*- ruby -*- - -# DO NOT EDIT THIS FILE. Instead, edit Rakefile, and run `rake bundler:gemfile`. - -source "https://rubygems.org/" - -gem "mini_portile2", "~>2.4.0" - -gem "concourse", "~>0.24", :group => [:development, :test] -gem "hoe-bundler", "~>1.2", :group => [:development, :test] -gem "hoe-debugging", "~>2.0", :group => [:development, :test] -gem "hoe-gemspec", "~>1.0", :group => [:development, :test] -gem "hoe-git", "~>1.6", :group => [:development, :test] -gem "minitest", "~>5.8", :group => [:development, :test] -gem "racc", "~>1.4.14", :group => [:development, :test] -gem "rake", "~>12.0", :group => [:development, :test] -gem "rake-compiler", "~>1.0.3", :group => [:development, :test] -gem "rake-compiler-dock", "~>0.7.0", :group => [:development, :test] -gem "rexical", "~>1.0.5", :group => [:development, :test] -gem "simplecov", "~>0.16", :group => [:development, :test] -gem "rdoc", ">=4.0", "<7", :group => [:development, :test] -gem "hoe", "~>3.17", :group => [:development, :test] - -# vim: syntax=ruby +# frozen_string_literal: true + +source "https://rubygems.org" + +gemspec + +group :development do + # bootstrapping + gem "bundler", "~> 2.3" + gem "rake", "= 13.0.6" + + # building extensions + gem "rake-compiler", "= 1.2.1" + gem "rake-compiler-dock", "= 1.3.0" + + # documentation + gem "hoe-markdown", "= 1.4.0" + + # parser generator + gem "rexical", "= 1.0.7" + + # tests + gem "minitest", "5.17.0" + gem "minitest-reporters", "= 1.5.0" + gem "ruby_memcheck", "1.2.0" unless RUBY_PLATFORM == "java" + gem "simplecov", "= 0.21.2" + gem "rubyzip", "~> 2.3.2" + + # rubocop + if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION)) + gem "rubocop", "1.44.1" + gem "rubocop-minitest", "0.27.0" + gem "rubocop-performance", "1.15.2" + gem "rubocop-rake", "= 0.6.0" + gem "rubocop-shopify", "2.10.1" + end +end + +# If Psych doesn't build, you can disable this group locally by running +# `bundle config set --local without rdoc` +# Then re-run `bundle install`. +group :rdoc do + gem "rdoc", "6.5.0" +end diff --git a/LICENSE-DEPENDENCIES.md b/LICENSE-DEPENDENCIES.md index 336ab62c54..8dc5e857fc 100644 --- a/LICENSE-DEPENDENCIES.md +++ b/LICENSE-DEPENDENCIES.md @@ -1,266 +1,138 @@ -## Vendored Dependency Licenses +# Vendored Dependency Licenses -Nokogiri ships with some third party dependencies, which are listed -here along with their licenses. +Nokogiri ships with some third party dependencies, which are listed here along with their licenses. -Note that this document is broken into three sections, each of which -will apply to different platform releases of Nokogiri: +Note that this document is broken into multiple sections, each of which describes the dependencies of a different "platform release" of Nokogiri. -1. default platform release -2. `java` platform release -3. binary windows platform releases (`x86-mingw32` and `x64-mingw32`) + -It's encouraged for anyone consuming this file via license-tracking -software to understand which dependencies are used by your particular -software, so as not to misinterpret the contents of this file. + -In particular, I'm sure somebody's lawyer, somewhere, is going to -freak out that the LGPL appears in this file; and so I'd like to take -special note that the dependency covered by LGPL, `libiconv`, is only -being redistributed in the binary Windows platform release. It's not -present in any non-Windows releases. +- [Platform Releases](#platform-releases) + * [Default platform release ("ruby")](#default-platform-release-ruby) + * [Native LinuxⓇ platform releases ("x86_64-linux", "arm64-linux", "aarch64-linux", and "arm-linux")](#native-linux%E2%93%A1-platform-releases-x86_64-linux-arm64-linux-aarch64-linux-and-arm-linux) + * [Native Darwin (macOSⓇ) platform releases ("x86_64-darwin" and "arm64-darwin")](#native-darwin-macos%E2%93%A1-platform-releases-x86_64-darwin-and-arm64-darwin) + * [Native WindowsⓇ platform releases ("x86-mingw32" and "x64-mingw32")](#native-windows%E2%93%A1-platform-releases-x86-mingw32-and-x64-mingw32) + * [JavaⓇ (JRuby) platform release ("java")](#java%E2%93%A1-jruby-platform-release-java) +- [Appendix: Dependencies' License Texts](#appendix-dependencies-license-texts) + * [libgumbo](#libgumbo) + * [libxml2](#libxml2) + * [libxslt](#libxslt) + * [zlib](#zlib) + * [libiconv](#libiconv) + * [isorelax:isorelax](#isorelaxisorelax) + * [net.sf.saxon:Saxon-HE](#netsfsaxonsaxon-he) + * [net.sourceforge.htmlunit:neko-htmlunit](#netsourceforgehtmlunitneko-htmlunit) + * [nu.validator:jing](#nuvalidatorjing) + * [org.nokogiri:nekodtd](#orgnokogirinekodtd) + * [xalan:serializer and xalan:xalan](#xalanserializer-and-xalanxalan) + * [xerces:xercesImpl](#xercesxercesimpl) + * [xml-apis:xml-apis](#xml-apisxml-apis) ------ + -## default platform release +Anyone consuming this file via license-tracking software should endeavor to understand which gem file you're downloading and using, so as not to misinterpret the contents of this file and the licenses of the software being distributed. -### libxml2 +You can double-check the dependencies in your gem file by examining the output of `nokogiri -v` after installation, which will emit the complete set of libraries in use (for versions `>= 1.11.0.rc4`). -MIT +In particular, I'm sure somebody's lawyer, somewhere, is going to freak out that the LGPL appears in this file; and so I'd like to take special note that the dependency covered by LGPL, `libiconv`, is only being redistributed in the native Windows and native Darwin platform releases. It's not present in default, JavaⓇ, or native LinuxⓇ releases. -http://xmlsoft.org/ - Except where otherwise noted in the source code (e.g. the files hash.c, - list.c and the trio files, which are covered by a similar licence but - with different Copyright notices) all the files are: - - Copyright (C) 1998-2012 Daniel Veillard. All Rights Reserved. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is fur- - nished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- - NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - +## Platform Releases -### libxslt +### Default platform release ("ruby") -MIT +The default platform release distributes the following dependencies in source form: -http://xmlsoft.org/libxslt/ +* [libxml2](#libxml2) +* [libxslt](#libxslt) +* [libgumbo](#libgumbo) - Licence for libxslt except libexslt - ---------------------------------------------------------------------- - Copyright (C) 2001-2002 Daniel Veillard. All Rights Reserved. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is fur- - nished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- - NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON- - NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - Except as contained in this notice, the name of Daniel Veillard shall not - be used in advertising or otherwise to promote the sale, use or other deal- - ings in this Software without prior written authorization from him. - - ---------------------------------------------------------------------- - - Licence for libexslt - ---------------------------------------------------------------------- - Copyright (C) 2001-2002 Thomas Broyer, Charlie Bozeman and Daniel Veillard. - All Rights Reserved. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is fur- - nished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- - NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON- - NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - Except as contained in this notice, the name of the authors shall not - be used in advertising or otherwise to promote the sale, use or other deal- - ings in this Software without prior written authorization from him. - ---------------------------------------------------------------------- - -## `java` platform release +This distribution can be identified by inspecting the included Gem::Specification, which will have the value "ruby" for its "platform" attribute. -### isorelax -MIT +### Native LinuxⓇ platform releases ("x86_64-linux", "arm64-linux", "aarch64-linux", and "arm-linux") -http://iso-relax.sourceforge.net/ +The native LinuxⓇ platform release distributes the following dependencies in source form: - Copyright (c) 2001-2002, SourceForge ISO-RELAX Project (ASAMI - Tomoharu, Daisuke Okajima, Kohsuke Kawaguchi, and MURATA Makoto) - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* [libxml2](#libxml2) +* [libxslt](#libxslt) +* [libgumbo](#libgumbo) +* [zlib](#zlib) +This distribution can be identified by inspecting the included Gem::Specification, which will have a value similar to "x86_64-linux" or "arm64-linux" for its "platform.cpu" attribute. -### jing -BSD-3-Clause +### Native Darwin (macOSⓇ) platform releases ("x86_64-darwin" and "arm64-darwin") -http://www.thaiopensource.com/relaxng/jing.html +The native Darwin platform release distributes the following dependencies in source form: - Copyright (c) 2001-2003 Thai Open Source Software Center Ltd - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the Thai Open Source Software Center Ltd nor - the names of its contributors may be used to endorse or promote - products derived from this software without specific prior - written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, - OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - SUCH DAMAGE. +* [libxml2](#libxml2) +* [libxslt](#libxslt) +* [libgumbo](#libgumbo) +* [zlib](#zlib) +* [libiconv](#libiconv) - -### nekodtd +This distribution can be identified by inspecting the included Gem::Specification, which will have a value similar to "x86_64-darwin" or "arm64-darwin" for its "platform.cpu" attribute. Darwin is also known more familiarly as "OSX" or "macOSⓇ" and is the operating system for many AppleⓇ computers. -Apache 1.0-derived -https://people.apache.org/~andyc/neko/doc/dtd/ +### Native WindowsⓇ platform releases ("x86-mingw32" and "x64-mingw32") - The CyberNeko Software License, Version 1.0 - - (C) Copyright 2002-2005, Andy Clark. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - 3. The end-user documentation included with the redistribution, - if any, must include the following acknowledgment: - "This product includes software developed by Andy Clark." - Alternately, this acknowledgment may appear in the software itself, - if and wherever such third-party acknowledgments normally appear. - - 4. The names "CyberNeko" and "NekoHTML" must not be used to endorse - or promote products derived from this software without prior - written permission. For written permission, please contact - andyc@cyberneko.net. - - 5. Products derived from this software may not be called "CyberNeko", - nor may "CyberNeko" appear in their name, without prior written - permission of the author. - - THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS - BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, - OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT - OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ==================================================================== - - This license is based on the Apache Software License, version 1.1. - -### nekohtml +The native WindowsⓇ platform release distributes the following dependencies in source form: + +* [libxml2](#libxml2) +* [libxslt](#libxslt) +* [libgumbo](#libgumbo) +* [zlib](#zlib) +* [libiconv](#libiconv) + +This distribution can be identified by inspecting the included Gem::Specification, which will have a value similar to "x64-mingw32" or "x86-mingw32" for its "platform.cpu" attribute. + + +### JavaⓇ (JRuby) platform release ("java") + +The Java platform release distributes the following dependencies as compiled jar files: + +* [isorelax:isorelax](#isorelaxisorelax) +* [net.sf.saxon:Saxon-HE](#netsfsaxonsaxon-he) +* [net.sourceforge.htmlunit:neko-htmlunit](#netsourceforgehtmlunitneko-htmlunit) +* [nu.validator:jing](#nuvalidatorjing) +* [org.nokogiri:nekodtd](#orgnokogirinekodtd) +* [xalan:serializer and xalan:xalan](#xalanserializer-and-xalanxalan) +* [xerces:xercesImpl](#xercesxercesimpl) +* [xml-apis:xml-apis](#xml-apisxml-apis) + +This distribution can be identified by inspecting the included Gem::Specification, which will have the value "java" for its "platform.os" attribute. + + +## Appendix: Dependencies' License Texts + +This section contains a subsection for each potentially-distributed dependency, which includes the name of the license and the license text. + +Please see previous sections to understand which of these potential dependencies is actually distributed in the gem file you're downloading and using. + + +### libgumbo Apache 2.0 -http://nekohtml.sourceforge.net/ +https://github.com/sparklemotion/nokogiri/blob/main/gumbo-parser/src/README.md + - Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ - + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - + 1. Definitions. - + "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. - + "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. - + "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, @@ -268,24 +140,24 @@ http://nekohtml.sourceforge.net/ direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. - + "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. - + "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. - + "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. - + "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). - + "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications @@ -293,7 +165,7 @@ http://nekohtml.sourceforge.net/ of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. - + "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally @@ -307,18 +179,18 @@ http://nekohtml.sourceforge.net/ Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." - + "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. - + 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. - + 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable @@ -334,24 +206,24 @@ http://nekohtml.sourceforge.net/ or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. - + 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: - + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and - + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and - + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and - + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained @@ -368,14 +240,14 @@ http://nekohtml.sourceforge.net/ or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. - + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. - + 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of @@ -383,12 +255,12 @@ http://nekohtml.sourceforge.net/ Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. - + 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. - + 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, @@ -398,7 +270,7 @@ http://nekohtml.sourceforge.net/ PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. - + 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly @@ -410,7 +282,7 @@ http://nekohtml.sourceforge.net/ work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. - + 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, @@ -421,680 +293,101 @@ http://nekohtml.sourceforge.net/ defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. - + END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -### xalan -Apache 2.0 -https://xml.apache.org/xalan-j/ +### libxml2 -covers xalan.jar and serializer.jar +MIT - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - -### xerces +http://xmlsoft.org/ -Apache 2.0 + Except where otherwise noted in the source code (e.g. the files hash.c, + list.c and the trio files, which are covered by a similar licence but + with different Copyright notices) all the files are: -https://xerces.apache.org/xerces2-j/ + Copyright (C) 1998-2012 Daniel Veillard. All Rights Reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - -### xml-apis + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is fur- + nished to do so, subject to the following conditions: -Apache 2.0 + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. -https://xerces.apache.org/xml-commons/ + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- + NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. - Unless otherwise noted all files in XML Commons are covered under the - Apache License Version 2.0. Please read the LICENSE and NOTICE files. - - XML Commons contains some software and documentation that is covered - under a number of different licenses. This applies particularly to the - xml-commons/java/external/ directory. Most files under - xml-commons/java/external/ are covered under their respective - LICENSE.*.txt files; see the matching README.*.txt files for - descriptions. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - -## binary windows release - -NOTE: these libraries are redistributed ONLY with the binary -cross-compiled Windows platform version of Nokogiri, both x86-mingw32 -and x64-mingw32. +### libxslt + +MIT + +http://xmlsoft.org/libxslt/ + + Licence for libxslt except libexslt + ---------------------------------------------------------------------- + Copyright (C) 2001-2002 Daniel Veillard. All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is fur- + nished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- + NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON- + NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the name of Daniel Veillard shall not + be used in advertising or otherwise to promote the sale, use or other deal- + ings in this Software without prior written authorization from him. + + ---------------------------------------------------------------------- + + Licence for libexslt + ---------------------------------------------------------------------- + Copyright (C) 2001-2002 Thomas Broyer, Charlie Bozeman and Daniel Veillard. + All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is fur- + nished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- + NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON- + NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the name of the authors shall not + be used in advertising or otherwise to promote the sale, use or other deal- + ings in this Software without prior written authorization from him. + ---------------------------------------------------------------------- + ### zlib @@ -1103,15 +396,15 @@ zlib license http://www.zlib.net/zlib_license.html Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler - + This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. - + Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: - + 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be @@ -1119,10 +412,10 @@ http://www.zlib.net/zlib_license.html 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. - + Jean-loup Gailly Mark Adler jloup@gzip.org madler@alumni.caltech.edu - + ### libiconv @@ -1130,41 +423,41 @@ LGPL https://www.gnu.org/software/libiconv/ - GNU LIBRARY GENERAL PUBLIC LICENSE - Version 2, June 1991 - + GNU LIBRARY GENERAL PUBLIC LICENSE + Version 2, June 1991 + Copyright (C) 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. - + [This is the first released version of the library GPL. It is numbered 2 because it goes with version 2 of the ordinary GPL.] - - Preamble - + + Preamble + The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. - + This license, the Library General Public License, applies to some specially designated Free Software Foundation software, and to any other libraries whose authors decide to use it. You can use it for your libraries, too. - + When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. - + To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library, or if you modify it. - + For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source @@ -1172,11 +465,11 @@ https://www.gnu.org/software/libiconv/ complete object files to the recipients so that they can relink them with the library, after making changes to the library and recompiling it. And you must show them these terms so they know their rights. - + Our method of protecting your rights has two steps: (1) copyright the library, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the library. - + Also, for each distributor's protection, we want to make certain that everyone understands that there is no warranty for this free library. If the library is modified by someone else and passed on, we @@ -1190,14 +483,14 @@ https://www.gnu.org/software/libiconv/ transforming the program into proprietary software. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. - + Most GNU software, including some libraries, is covered by the ordinary GNU General Public License, which was designed for utility programs. This license, the GNU Library General Public License, applies to certain designated libraries. This license is quite different from the ordinary one; be sure to read it in full, and don't assume that anything in it is the same as in the ordinary license. - + The reason we have a separate public license for some libraries is that they blur the distinction we usually make between modifying or adding to a program and simply using it. Linking a program with a library, without @@ -1206,12 +499,12 @@ https://www.gnu.org/software/libiconv/ a textual and legal sense, the linked executable is a combined work, a derivative of the original library, and the ordinary General Public License treats it as such. - + Because of this blurred distinction, using the ordinary General Public License for libraries did not effectively promote software sharing, because most developers did not use the libraries. We concluded that weaker conditions might promote sharing better. - + However, unrestricted linking of non-free programs would deprive the users of those programs of all benefit from the free status of the libraries themselves. This Library General Public License is intended to @@ -1221,29 +514,29 @@ https://www.gnu.org/software/libiconv/ this as regards changes in header files, but we have achieved it as regards changes in the actual functions of the Library.) The hope is that this will lead to faster development of free libraries. - + The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, while the latter only works together with the library. - + Note that it is possible for a library to be covered by the ordinary General Public License rather than by this special one. - GNU LIBRARY GENERAL PUBLIC LICENSE + GNU LIBRARY GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - + 0. This License Agreement applies to any software library which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Library General Public License (also called "this License"). Each licensee is addressed as "you". - + A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. - + The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under @@ -1251,13 +544,13 @@ https://www.gnu.org/software/libiconv/ portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) - + "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. - + Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from @@ -1265,7 +558,7 @@ https://www.gnu.org/software/libiconv/ on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. - + 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an @@ -1273,7 +566,7 @@ https://www.gnu.org/software/libiconv/ all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. - + You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. @@ -1282,15 +575,15 @@ https://www.gnu.org/software/libiconv/ of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: - + a) The modified work must itself be a software library. - + b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. - + c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. - + d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility @@ -1298,14 +591,14 @@ https://www.gnu.org/software/libiconv/ in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. - + (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) - + These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in @@ -1316,17 +609,17 @@ https://www.gnu.org/software/libiconv/ this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. - + Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. - + In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. - + 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so @@ -1339,49 +632,49 @@ https://www.gnu.org/software/libiconv/ Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. - + This option is useful when you wish to copy part of the code of the Library into a program that is not a library. - + 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. - + If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. - + 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. - + However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. - + When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. - + If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) - + Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, @@ -1393,7 +686,7 @@ https://www.gnu.org/software/libiconv/ under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. - + You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work @@ -1401,7 +694,7 @@ https://www.gnu.org/software/libiconv/ copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: - + a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under @@ -1413,19 +706,19 @@ https://www.gnu.org/software/libiconv/ that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) - + b) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. - + c) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. - + d) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. - + For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, @@ -1434,7 +727,7 @@ https://www.gnu.org/software/libiconv/ components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. - + It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot @@ -1447,16 +740,16 @@ https://www.gnu.org/software/libiconv/ library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: - + a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. - + b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. - + 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or @@ -1464,7 +757,7 @@ https://www.gnu.org/software/libiconv/ rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. - + 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are @@ -1473,7 +766,7 @@ https://www.gnu.org/software/libiconv/ Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. - + 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library @@ -1494,11 +787,11 @@ https://www.gnu.org/software/libiconv/ all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. - + If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. - + It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the @@ -1509,10 +802,10 @@ https://www.gnu.org/software/libiconv/ system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. - + This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. - + 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add @@ -1520,12 +813,12 @@ https://www.gnu.org/software/libiconv/ so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. - + 13. The Free Software Foundation may publish revised and/or new versions of the Library General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. - + Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and @@ -1542,9 +835,9 @@ https://www.gnu.org/software/libiconv/ decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. - - NO WARRANTY - + + NO WARRANTY + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR @@ -1554,7 +847,7 @@ https://www.gnu.org/software/libiconv/ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU @@ -1565,50 +858,1367 @@ https://www.gnu.org/software/libiconv/ FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - Appendix: How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest - possible use to the public, we recommend making it free software that - everyone can redistribute and change. You can do so by permitting - redistribution under these terms (or, alternatively, under the terms of the - ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is - safest to attach them to the start of each source file to most effectively - convey the exclusion of warranty; and each file should have at least the - "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA - - Also add information on how to contact you by electronic and paper mail. - - You should also get your employer (if you work as a programmer) or your - school, if any, to sign a "copyright disclaimer" for the library, if - necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - , 1 April 1990 - Ty Coon, President of Vice - - That's all there is to it! + + END OF TERMS AND CONDITIONS + + +### isorelax:isorelax + +MIT + +http://iso-relax.sourceforge.net/ + + Copyright (c) 2001-2002, SourceForge ISO-RELAX Project (ASAMI + Tomoharu, Daisuke Okajima, Kohsuke Kawaguchi, and MURATA Makoto) + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +### net.sf.saxon:Saxon-HE + +MPL 2.0 + +http://www.saxonica.com/ + + Mozilla Public License Version 2.0 + ================================== + + 1. Definitions + -------------- + + 1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + + 1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + + 1.3. "Contribution" + means Covered Software of a particular Contributor. + + 1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + + 1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + + 1.6. "Executable Form" + means any form of the work other than Source Code Form. + + 1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + + 1.8. "License" + means this document. + + 1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + + 1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + + 1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + + 1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + + 1.13. "Source Code Form" + means the form of the work preferred for making modifications. + + 1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + + 2. License Grants and Conditions + -------------------------------- + + 2.1. Grants + + Each Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + + (b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + + 2.2. Effective Date + + The licenses granted in Section 2.1 with respect to any Contribution + become effective for each Contribution on the date the Contributor first + distributes such Contribution. + + 2.3. Limitations on Grant Scope + + The licenses granted in this Section 2 are the only rights granted under + this License. No additional rights or licenses will be implied from the + distribution or licensing of Covered Software under this License. + Notwithstanding Section 2.1(b) above, no patent license is granted by a + Contributor: + + (a) for any code that a Contributor has removed from Covered Software; + or + + (b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + + (c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + + This License does not grant any rights in the trademarks, service marks, + or logos of any Contributor (except as may be necessary to comply with + the notice requirements in Section 3.4). + + 2.4. Subsequent Licenses + + No Contributor makes additional grants as a result of Your choice to + distribute the Covered Software under a subsequent version of this + License (see Section 10.2) or under the terms of a Secondary License (if + permitted under the terms of Section 3.3). + + 2.5. Representation + + Each Contributor represents that the Contributor believes its + Contributions are its original creation(s) or it has sufficient rights + to grant the rights to its Contributions conveyed by this License. + + 2.6. Fair Use + + This License is not intended to limit any rights You have under + applicable copyright doctrines of fair use, fair dealing, or other + equivalents. + + 2.7. Conditions + + Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted + in Section 2.1. + + 3. Responsibilities + ------------------- + + 3.1. Distribution of Source Form + + All distribution of Covered Software in Source Code Form, including any + Modifications that You create or to which You contribute, must be under + the terms of this License. You must inform recipients that the Source + Code Form of the Covered Software is governed by the terms of this + License, and how they can obtain a copy of this License. You may not + attempt to alter or restrict the recipients' rights in the Source Code + Form. + + 3.2. Distribution of Executable Form + + If You distribute Covered Software in Executable Form then: + + (a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + + (b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + + 3.3. Distribution of a Larger Work + + You may create and distribute a Larger Work under terms of Your choice, + provided that You also comply with the requirements of this License for + the Covered Software. If the Larger Work is a combination of Covered + Software with a work governed by one or more Secondary Licenses, and the + Covered Software is not Incompatible With Secondary Licenses, this + License permits You to additionally distribute such Covered Software + under the terms of such Secondary License(s), so that the recipient of + the Larger Work may, at their option, further distribute the Covered + Software under the terms of either this License or such Secondary + License(s). + + 3.4. Notices + + You may not remove or alter the substance of any license notices + (including copyright notices, patent notices, disclaimers of warranty, + or limitations of liability) contained within the Source Code Form of + the Covered Software, except that You may alter any license notices to + the extent required to remedy known factual inaccuracies. + + 3.5. Application of Additional Terms + + You may choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of Covered + Software. However, You may do so only on Your own behalf, and not on + behalf of any Contributor. You must make it absolutely clear that any + such warranty, support, indemnity, or liability obligation is offered by + You alone, and You hereby agree to indemnify every Contributor for any + liability incurred by such Contributor as a result of warranty, support, + indemnity or liability terms You offer. You may include additional + disclaimers of warranty and limitations of liability specific to any + jurisdiction. + + 4. Inability to Comply Due to Statute or Regulation + --------------------------------------------------- + + If it is impossible for You to comply with any of the terms of this + License with respect to some or all of the Covered Software due to + statute, judicial order, or regulation then You must: (a) comply with + the terms of this License to the maximum extent possible; and (b) + describe the limitations and the code they affect. Such description must + be placed in a text file included with all distributions of the Covered + Software under this License. Except to the extent prohibited by statute + or regulation, such description must be sufficiently detailed for a + recipient of ordinary skill to be able to understand it. + + 5. Termination + -------------- + + 5.1. The rights granted under this License will terminate automatically + if You fail to comply with any of its terms. However, if You become + compliant, then the rights granted under this License from a particular + Contributor are reinstated (a) provisionally, unless and until such + Contributor explicitly and finally terminates Your grants, and (b) on an + ongoing basis, if such Contributor fails to notify You of the + non-compliance by some reasonable means prior to 60 days after You have + come back into compliance. Moreover, Your grants from a particular + Contributor are reinstated on an ongoing basis if such Contributor + notifies You of the non-compliance by some reasonable means, this is the + first time You have received notice of non-compliance with this License + from such Contributor, and You become compliant prior to 30 days after + Your receipt of the notice. + + 5.2. If You initiate litigation against any entity by asserting a patent + infringement claim (excluding declaratory judgment actions, + counter-claims, and cross-claims) alleging that a Contributor Version + directly or indirectly infringes any patent, then the rights granted to + You by any and all Contributors for the Covered Software under Section + 2.1 of this License shall terminate. + + 5.3. In the event of termination under Sections 5.1 or 5.2 above, all + end user license agreements (excluding distributors and resellers) which + have been validly granted by You or Your distributors under this License + prior to termination shall survive termination. + + ************************************************************************ + * * + * 6. Disclaimer of Warranty * + * ------------------------- * + * * + * Covered Software is provided under this License on an "as is" * + * basis, without warranty of any kind, either expressed, implied, or * + * statutory, including, without limitation, warranties that the * + * Covered Software is free of defects, merchantable, fit for a * + * particular purpose or non-infringing. The entire risk as to the * + * quality and performance of the Covered Software is with You. * + * Should any Covered Software prove defective in any respect, You * + * (not any Contributor) assume the cost of any necessary servicing, * + * repair, or correction. This disclaimer of warranty constitutes an * + * essential part of this License. No use of any Covered Software is * + * authorized under this License except under this disclaimer. * + * * + ************************************************************************ + + ************************************************************************ + * * + * 7. Limitation of Liability * + * -------------------------- * + * * + * Under no circumstances and under no legal theory, whether tort * + * (including negligence), contract, or otherwise, shall any * + * Contributor, or anyone who distributes Covered Software as * + * permitted above, be liable to You for any direct, indirect, * + * special, incidental, or consequential damages of any character * + * including, without limitation, damages for lost profits, loss of * + * goodwill, work stoppage, computer failure or malfunction, or any * + * and all other commercial damages or losses, even if such party * + * shall have been informed of the possibility of such damages. This * + * limitation of liability shall not apply to liability for death or * + * personal injury resulting from such party's negligence to the * + * extent applicable law prohibits such limitation. Some * + * jurisdictions do not allow the exclusion or limitation of * + * incidental or consequential damages, so this exclusion and * + * limitation may not apply to You. * + * * + ************************************************************************ + + 8. Litigation + ------------- + + Any litigation relating to this License may be brought only in the + courts of a jurisdiction where the defendant maintains its principal + place of business and such litigation shall be governed by laws of that + jurisdiction, without reference to its conflict-of-law provisions. + Nothing in this Section shall prevent a party's ability to bring + cross-claims or counter-claims. + + 9. Miscellaneous + ---------------- + + This License represents the complete agreement concerning the subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. Any law or regulation which provides + that the language of a contract shall be construed against the drafter + shall not be used to construe this License against a Contributor. + + 10. Versions of the License + --------------------------- + + 10.1. New Versions + + Mozilla Foundation is the license steward. Except as provided in Section + 10.3, no one other than the license steward has the right to modify or + publish new versions of this License. Each version will be given a + distinguishing version number. + + 10.2. Effect of New Versions + + You may distribute the Covered Software under the terms of the version + of the License under which You originally received the Covered Software, + or under the terms of any subsequent version published by the license + steward. + + 10.3. Modified Versions + + If you create software not governed by this License, and you want to + create a new license for such software, you may create and use a + modified version of this License if you rename the license and remove + any references to the name of the license steward (except to note that + such modified license differs from this License). + + 10.4. Distributing Source Code Form that is Incompatible With Secondary + Licenses + + If You choose to distribute Source Code Form that is Incompatible With + Secondary Licenses under the terms of this version of the License, the + notice described in Exhibit B of this License must be attached. + + +### net.sourceforge.htmlunit:neko-htmlunit + +Apache 2.0 + +https://github.com/HtmlUnit/htmlunit-neko + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + +### nu.validator:jing + +BSD-3-Clause + +http://www.thaiopensource.com/relaxng/jing.html + + Copyright (c) 2001-2003 Thai Open Source Software Center Ltd + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the Thai Open Source Software Center Ltd nor + the names of its contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +### org.nokogiri:nekodtd + +Apache 2.0 + +https://github.com/sparklemotion/nekodtd + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + +### xalan:serializer and xalan:xalan + +Apache 2.0 + +https://xml.apache.org/xalan-j/ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + +### xerces:xercesImpl + +Apache 2.0 + +https://xerces.apache.org/xerces2-j/ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + +### xml-apis:xml-apis + +Apache 2.0 + +https://xerces.apache.org/xml-commons/ + + Unless otherwise noted all files in XML Commons are covered under the + Apache License Version 2.0. Please read the LICENSE and NOTICE files. + + XML Commons contains some software and documentation that is covered + under a number of different licenses. This applies particularly to the + xml-commons/java/external/ directory. Most files under + xml-commons/java/external/ are covered under their respective + LICENSE.*.txt files; see the matching README.*.txt files for + descriptions. + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/LICENSE.md b/LICENSE.md index 1632aee7af..b649dd875d 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ The MIT License -Copyright 2008 -- 2018 by Aaron Patterson, Mike Dalessio, Charles Nutter, Sergio Arbeo, Patrick Mahoney, Yoko Harada, Akinori MUSHA, John Shahid, Lars Kanis +Copyright 2008 -- 2023 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/Manifest.txt b/Manifest.txt deleted file mode 100644 index bb1360f924..0000000000 --- a/Manifest.txt +++ /dev/null @@ -1,239 +0,0 @@ -LICENSE-DEPENDENCIES.md -LICENSE.md -README.md -bin/nokogiri -dependencies.yml -ext/java/nokogiri/EncodingHandler.java -ext/java/nokogiri/HtmlDocument.java -ext/java/nokogiri/HtmlElementDescription.java -ext/java/nokogiri/HtmlEntityLookup.java -ext/java/nokogiri/HtmlSaxParserContext.java -ext/java/nokogiri/HtmlSaxPushParser.java -ext/java/nokogiri/NokogiriService.java -ext/java/nokogiri/XmlAttr.java -ext/java/nokogiri/XmlAttributeDecl.java -ext/java/nokogiri/XmlCdata.java -ext/java/nokogiri/XmlComment.java -ext/java/nokogiri/XmlDocument.java -ext/java/nokogiri/XmlDocumentFragment.java -ext/java/nokogiri/XmlDtd.java -ext/java/nokogiri/XmlElement.java -ext/java/nokogiri/XmlElementContent.java -ext/java/nokogiri/XmlElementDecl.java -ext/java/nokogiri/XmlEntityDecl.java -ext/java/nokogiri/XmlEntityReference.java -ext/java/nokogiri/XmlNamespace.java -ext/java/nokogiri/XmlNode.java -ext/java/nokogiri/XmlNodeSet.java -ext/java/nokogiri/XmlProcessingInstruction.java -ext/java/nokogiri/XmlReader.java -ext/java/nokogiri/XmlRelaxng.java -ext/java/nokogiri/XmlSaxParserContext.java -ext/java/nokogiri/XmlSaxPushParser.java -ext/java/nokogiri/XmlSchema.java -ext/java/nokogiri/XmlSyntaxError.java -ext/java/nokogiri/XmlText.java -ext/java/nokogiri/XmlXpathContext.java -ext/java/nokogiri/XsltStylesheet.java -ext/java/nokogiri/internals/ClosedStreamException.java -ext/java/nokogiri/internals/HtmlDomParserContext.java -ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java -ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java -ext/java/nokogiri/internals/NokogiriDomParser.java -ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java -ext/java/nokogiri/internals/NokogiriEntityResolver.java -ext/java/nokogiri/internals/NokogiriErrorHandler.java -ext/java/nokogiri/internals/NokogiriHandler.java -ext/java/nokogiri/internals/NokogiriHelpers.java -ext/java/nokogiri/internals/NokogiriNamespaceCache.java -ext/java/nokogiri/internals/NokogiriNamespaceContext.java -ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java -ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java -ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java -ext/java/nokogiri/internals/NokogiriXPathFunction.java -ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java -ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java -ext/java/nokogiri/internals/NokogiriXsltErrorListener.java -ext/java/nokogiri/internals/ParserContext.java -ext/java/nokogiri/internals/ReaderNode.java -ext/java/nokogiri/internals/SaveContextVisitor.java -ext/java/nokogiri/internals/SchemaErrorHandler.java -ext/java/nokogiri/internals/UncloseableInputStream.java -ext/java/nokogiri/internals/XalanDTMManagerPatch.java -ext/java/nokogiri/internals/XmlDeclHandler.java -ext/java/nokogiri/internals/XmlDomParserContext.java -ext/java/nokogiri/internals/XmlSaxParser.java -ext/java/nokogiri/internals/c14n/AttrCompare.java -ext/java/nokogiri/internals/c14n/C14nHelper.java -ext/java/nokogiri/internals/c14n/CanonicalFilter.java -ext/java/nokogiri/internals/c14n/CanonicalizationException.java -ext/java/nokogiri/internals/c14n/Canonicalizer.java -ext/java/nokogiri/internals/c14n/Canonicalizer11.java -ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java -ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java -ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java -ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java -ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java -ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java -ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java -ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java -ext/java/nokogiri/internals/c14n/CanonicalizerBase.java -ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java -ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java -ext/java/nokogiri/internals/c14n/Constants.java -ext/java/nokogiri/internals/c14n/ElementProxy.java -ext/java/nokogiri/internals/c14n/HelperNodeList.java -ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java -ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java -ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java -ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java -ext/java/nokogiri/internals/c14n/NodeFilter.java -ext/java/nokogiri/internals/c14n/UtfHelpper.java -ext/java/nokogiri/internals/c14n/XMLUtils.java -ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java -ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java -ext/nokogiri/depend -ext/nokogiri/extconf.rb -ext/nokogiri/html_document.c -ext/nokogiri/html_document.h -ext/nokogiri/html_element_description.c -ext/nokogiri/html_element_description.h -ext/nokogiri/html_entity_lookup.c -ext/nokogiri/html_entity_lookup.h -ext/nokogiri/html_sax_parser_context.c -ext/nokogiri/html_sax_parser_context.h -ext/nokogiri/html_sax_push_parser.c -ext/nokogiri/html_sax_push_parser.h -ext/nokogiri/nokogiri.c -ext/nokogiri/nokogiri.h -ext/nokogiri/xml_attr.c -ext/nokogiri/xml_attr.h -ext/nokogiri/xml_attribute_decl.c -ext/nokogiri/xml_attribute_decl.h -ext/nokogiri/xml_cdata.c -ext/nokogiri/xml_cdata.h -ext/nokogiri/xml_comment.c -ext/nokogiri/xml_comment.h -ext/nokogiri/xml_document.c -ext/nokogiri/xml_document.h -ext/nokogiri/xml_document_fragment.c -ext/nokogiri/xml_document_fragment.h -ext/nokogiri/xml_dtd.c -ext/nokogiri/xml_dtd.h -ext/nokogiri/xml_element_content.c -ext/nokogiri/xml_element_content.h -ext/nokogiri/xml_element_decl.c -ext/nokogiri/xml_element_decl.h -ext/nokogiri/xml_encoding_handler.c -ext/nokogiri/xml_encoding_handler.h -ext/nokogiri/xml_entity_decl.c -ext/nokogiri/xml_entity_decl.h -ext/nokogiri/xml_entity_reference.c -ext/nokogiri/xml_entity_reference.h -ext/nokogiri/xml_io.c -ext/nokogiri/xml_io.h -ext/nokogiri/xml_libxml2_hacks.c -ext/nokogiri/xml_libxml2_hacks.h -ext/nokogiri/xml_namespace.c -ext/nokogiri/xml_namespace.h -ext/nokogiri/xml_node.c -ext/nokogiri/xml_node.h -ext/nokogiri/xml_node_set.c -ext/nokogiri/xml_node_set.h -ext/nokogiri/xml_processing_instruction.c -ext/nokogiri/xml_processing_instruction.h -ext/nokogiri/xml_reader.c -ext/nokogiri/xml_reader.h -ext/nokogiri/xml_relax_ng.c -ext/nokogiri/xml_relax_ng.h -ext/nokogiri/xml_sax_parser.c -ext/nokogiri/xml_sax_parser.h -ext/nokogiri/xml_sax_parser_context.c -ext/nokogiri/xml_sax_parser_context.h -ext/nokogiri/xml_sax_push_parser.c -ext/nokogiri/xml_sax_push_parser.h -ext/nokogiri/xml_schema.c -ext/nokogiri/xml_schema.h -ext/nokogiri/xml_syntax_error.c -ext/nokogiri/xml_syntax_error.h -ext/nokogiri/xml_text.c -ext/nokogiri/xml_text.h -ext/nokogiri/xml_xpath_context.c -ext/nokogiri/xml_xpath_context.h -ext/nokogiri/xslt_stylesheet.c -ext/nokogiri/xslt_stylesheet.h -lib/isorelax.jar -lib/jing.jar -lib/nekodtd.jar -lib/nekohtml.jar -lib/nokogiri.rb -lib/nokogiri/css.rb -lib/nokogiri/css/node.rb -lib/nokogiri/css/parser.rb -lib/nokogiri/css/parser.y -lib/nokogiri/css/parser_extras.rb -lib/nokogiri/css/syntax_error.rb -lib/nokogiri/css/tokenizer.rb -lib/nokogiri/css/tokenizer.rex -lib/nokogiri/css/xpath_visitor.rb -lib/nokogiri/decorators/slop.rb -lib/nokogiri/html.rb -lib/nokogiri/html/builder.rb -lib/nokogiri/html/document.rb -lib/nokogiri/html/document_fragment.rb -lib/nokogiri/html/element_description.rb -lib/nokogiri/html/element_description_defaults.rb -lib/nokogiri/html/entity_lookup.rb -lib/nokogiri/html/sax/parser.rb -lib/nokogiri/html/sax/parser_context.rb -lib/nokogiri/html/sax/push_parser.rb -lib/nokogiri/syntax_error.rb -lib/nokogiri/version.rb -lib/nokogiri/xml.rb -lib/nokogiri/xml/attr.rb -lib/nokogiri/xml/attribute_decl.rb -lib/nokogiri/xml/builder.rb -lib/nokogiri/xml/cdata.rb -lib/nokogiri/xml/character_data.rb -lib/nokogiri/xml/document.rb -lib/nokogiri/xml/document_fragment.rb -lib/nokogiri/xml/dtd.rb -lib/nokogiri/xml/element_content.rb -lib/nokogiri/xml/element_decl.rb -lib/nokogiri/xml/entity_decl.rb -lib/nokogiri/xml/entity_reference.rb -lib/nokogiri/xml/namespace.rb -lib/nokogiri/xml/node.rb -lib/nokogiri/xml/node/save_options.rb -lib/nokogiri/xml/node_set.rb -lib/nokogiri/xml/notation.rb -lib/nokogiri/xml/parse_options.rb -lib/nokogiri/xml/pp.rb -lib/nokogiri/xml/pp/character_data.rb -lib/nokogiri/xml/pp/node.rb -lib/nokogiri/xml/processing_instruction.rb -lib/nokogiri/xml/reader.rb -lib/nokogiri/xml/relax_ng.rb -lib/nokogiri/xml/sax.rb -lib/nokogiri/xml/sax/document.rb -lib/nokogiri/xml/sax/parser.rb -lib/nokogiri/xml/sax/parser_context.rb -lib/nokogiri/xml/sax/push_parser.rb -lib/nokogiri/xml/schema.rb -lib/nokogiri/xml/searchable.rb -lib/nokogiri/xml/syntax_error.rb -lib/nokogiri/xml/text.rb -lib/nokogiri/xml/xpath.rb -lib/nokogiri/xml/xpath/syntax_error.rb -lib/nokogiri/xml/xpath_context.rb -lib/nokogiri/xslt.rb -lib/nokogiri/xslt/stylesheet.rb -lib/serializer.jar -lib/xalan.jar -lib/xercesImpl.jar -lib/xml-apis.jar -lib/xsd/xmlparser/nokogiri.rb -patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch -patches/libxml2/0002-Remove-script-macro-support.patch -patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch -patches/libxslt/0001-Fix-security-framework-bypass.patch diff --git a/README.md b/README.md index 89d818f858..fc21be46bb 100644 --- a/README.md +++ b/README.md @@ -1,105 +1,168 @@ +
+ # Nokogiri -## Description +Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2, libgumbo, and xerces. + +## Guiding Principles + +Some guiding principles Nokogiri tries to follow: -Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among -Nokogiri's many features is the ability to search documents via XPath -or CSS3 selectors. +- be secure-by-default by treating all documents as **untrusted** by default +- be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers -## Links +## Features Overview -* https://nokogiri.org -* [Installation Help](https://nokogiri.org/tutorials/installing_nokogiri.html) -* [Tutorials](https://nokogiri.org) -* [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet) -* [GitHub](https://github.com/sparklemotion/nokogiri) -* [Mailing List](https://groups.google.com/group/nokogiri-talk) -* [Chat/Gitter](https://gitter.im/sparklemotion/nokogiri) +- DOM Parser for XML, HTML4, and HTML5 +- SAX Parser for XML and HTML4 +- Push Parser for XML and HTML4 +- Document search via XPath 1.0 +- Document search via CSS3 selectors, with some jquery-like extensions +- XSD Schema validation +- XSLT transformation +- "Builder" DSL for XML and HTML documents ## Status -[![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/ruby-2.4-system/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri) -[![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/master?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/master) -[![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri) -[![Test Coverage](https://api.codeclimate.com/v1/badges/59c67b0e8976027a45ad/test_coverage)](https://codeclimate.com/github/sparklemotion/nokogiri/test_coverage) +[![Github Actions CI](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml) +[![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/main?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/main) [![Gem Version](https://badge.fury.io/rb/nokogiri.svg)](https://rubygems.org/gems/nokogiri) -[![SemVer compatibility](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score.html?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver) -[![Tidelift dependencies](https://tidelift.com/badges/github/sparklemotion/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme) +[![SemVer compatibility](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&previous-version=1.11.7&new-version=1.12.5)](https://docs.github.com/en/code-security/supply-chain-security/managing-vulnerabilities-in-your-projects-dependencies/about-dependabot-security-updates#about-compatibility-scores) +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5344/badge)](https://bestpractices.coreinfrastructure.org/projects/5344) +[![Tidelift dependencies](https://tidelift.com/badges/package/rubygems/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme) -## Features -* XML/HTML DOM parser which handles broken HTML -* XML/HTML SAX parser -* XML/HTML Push parser -* XPath 1.0 support for document searching -* CSS3 selector support for document searching -* XML/HTML builder -* XSLT transformer +## Support, Getting Help, and Reporting Issues -Nokogiri parses and searches XML/HTML using native libraries (either C -or Java, depending on your Ruby), which means it's fast and -standards-compliant. +All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions). +### Reading -## Installation +Your first stops for learning more about Nokogiri should be: -If this doesn't work: +- [API Documentation](https://nokogiri.org/rdoc/index.html) +- [Tutorials](https://nokogiri.org/tutorials/toc.html) +- An excellent community-maintained [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet) -``` -gem install nokogiri -``` -then please start troubleshooting here: +### Ask For Help -> https://nokogiri.org/tutorials/installing_nokogiri.html +There are a few ways to ask exploratory questions: -There are currently 1,237 Stack Overflow questions about Nokogiri -installation. The vast majority of them are out of date and therefore -incorrect. __Please do not use Stack Overflow.__ +- The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk +- Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues -Instead, [tell us](https://nokogiri.org/tutorials/getting_help.html) -when the above instructions don't work for you. This allows us to both -help you directly and improve the documentation. +Please do not mail the maintainers at their personal addresses. -### Binary packages +### Report A Bug -Binary packages are available for some distributions. +The Nokogiri bug tracker is at https://github.com/sparklemotion/nokogiri/issues -* Debian: https://packages.debian.org/sid/ruby-nokogiri -* SuSE: https://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/ -* Fedora: http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756 +Please use the "Bug Report" or "Installation Difficulties" templates. -## Support +### Security and Vulnerability Reporting -All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions). +Please report vulnerabilities at https://hackerone.com/nokogiri -* The Nokogiri mailing list is active: https://groups.google.com/group/nokogiri-talk -* The Nokogiri bug tracker is here: https://github.com/sparklemotion/nokogiri/issues -* Before filing a bug report, please read our submission guidelines: http://nokogiri.org/tutorials/getting_help.html -* The IRC channel is `#nokogiri` on freenode. -* The project's GitHub wiki has an excellent community-maintained [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet) which might be useful. +Full information and description of our security policy is in [`SECURITY.md`](SECURITY.md) -Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often. - [tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme +### Semantic Versioning Policy +Nokogiri follows [Semantic Versioning](https://semver.org/) (since 2017 or so). [![Dependabot's SemVer compatibility score for Nokogiri](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&previous-version=1.11.7&new-version=1.12.5)](https://docs.github.com/en/code-security/supply-chain-security/managing-vulnerabilities-in-your-projects-dependencies/about-dependabot-security-updates#about-compatibility-scores) -## Security and Vulnerability Reporting +We bump `Major.Minor.Patch` versions following this guidance: -Please report vulnerabilities at https://hackerone.com/nokogiri +`Major`: (we've never done this) -Full information and description of our security policy is in [`SECURITY.md`](SECURITY.md) +- Significant backwards-incompatible changes to the public API that would require rewriting existing application code. +- Some examples of backwards-incompatible changes we might someday consider for a Major release are at [`ROADMAP.md`](ROADMAP.md). + +`Minor`: + +- Features and bugfixes. +- Updating packaged libraries for non-security-related reasons. +- Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api). +- Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry. + +`Patch`: + +- Bugfixes. +- Security updates. +- Updating packaged libraries for security-related reasons. + + +### Sponsorship + +You can help sponsor the maintainers of this software through one of these organizations: + +- [github.com/sponsors/flavorjones](https://github.com/sponsors/flavorjones) +- [opencollective.com/nokogiri](https://opencollective.com/nokogiri) +- [tidelift.com/subscription/pkg/rubygems-nokogiri](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme) + + +## Installation + +Requirements: + +- Ruby >= 2.7 +- JRuby >= 9.4.0.0 + + +### Native Gems: Faster, more reliable installation + +"Native gems" contain pre-compiled libraries for a specific machine architecture. On supported platforms, this removes the need for compiling the C extension and the packaged libraries, or for system dependencies to exist. This results in **much faster installation** and **more reliable installation**, which as you probably know are the biggest headaches for Nokogiri users. + +### Supported Platforms + +Nokogiri ships pre-compiled, "native" gems for the following platforms: + +- Linux: + - `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`) + - `aarch64-linux` and `arm-linux` (req: `glibc >= 2.29`) + - Note that musl platforms like Alpine **are** supported +- Darwin/MacOS: `x86_64-darwin` and `arm64-darwin` +- Windows: `x86-mingw32`, `x64-mingw32`, and `x64-mingw-ucrt` +- Java: any platform running JRuby 9.4 or higher + +To determine whether your system supports one of these gems, look at the output of `bundle platform` or `ruby -e 'puts Gem::Platform.local.to_s'`. + +If you're on a supported platform, either `gem install` or `bundle install` should install a native gem without any additional action on your part. This installation should only take a few seconds, and your output should look something like: + +``` sh +$ gem install nokogiri +Fetching nokogiri-1.11.0-x86_64-linux.gem +Successfully installed nokogiri-1.11.0-x86_64-linux +1 gem installed +``` + + +### Other Installation Options + +Because Nokogiri is a C extension, it requires that you have a C compiler toolchain, Ruby development header files, and some system dependencies installed. +The following may work for you if you have an appropriately-configured system: -## Synopsis +``` bash +gem install nokogiri +``` + +If you have any issues, please visit [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for more complete instructions and troubleshooting. + + +## How To Use Nokogiri -Nokogiri is a large library, but here is example usage for parsing and examining a document: +Nokogiri is a large library, and so it's challenging to briefly summarize it. We've tried to provide long, real-world examples at [Tutorials](https://nokogiri.org/tutorials/toc.html). + +### Parsing and Querying + +Here is example usage for parsing and querying a document: ```ruby #! /usr/bin/env ruby @@ -108,51 +171,26 @@ require 'nokogiri' require 'open-uri' # Fetch and parse HTML document -doc = Nokogiri::HTML(open('https://nokogiri.org/tutorials/installing_nokogiri.html')) +doc = Nokogiri::HTML(URI.open('https://nokogiri.org/tutorials/installing_nokogiri.html')) -puts "### Search for nodes by css" +# Search for nodes by css doc.css('nav ul.menu li a', 'article h2').each do |link| puts link.content end -puts "### Search for nodes by xpath" +# Search for nodes by xpath doc.xpath('//nav//ul//li/a', '//article//h2').each do |link| puts link.content end -puts "### Or mix and match." +# Or mix and match doc.search('nav ul.menu li a', '//article//h2').each do |link| puts link.content end ``` -## Requirements - -* Ruby 2.3.0 or higher, including any development packages necessary - to compile native extensions. - -* In Nokogiri 1.6.0 and later libxml2 and libxslt are bundled with the - gem, but if you want to use the system versions: - - * First, check out [the long list](http://www.xmlsoft.org/news.html) - of fixes and changes between releases before deciding to use any - version older than is bundled with Nokogiri. - - * At install time, set the environment variable - `NOKOGIRI_USE_SYSTEM_LIBRARIES` or else use the - `--use-system-libraries` argument. (See - https://nokogiri.org/tutorials/installing_nokogiri.html#install-with-system-libraries - for specifics.) - - * libxml2 >=2.6.21 with iconv support - (libxml2-dev/-devel is also required) - - * libxslt, built with and supported by the given libxml2 - (libxslt-dev/-devel is also required) - - -## Encoding +### Encoding Strings are always stored as UTF-8 internally. Methods that return text values will always return UTF-8 encoded strings. Methods that @@ -178,12 +216,41 @@ explicitly setting the encoding to EUC-JP on the parser: ``` -## Development +## Technical Overview + +### Guiding Principles + +As noted above, two guiding principles of the software are: + +- be secure-by-default by treating all documents as **untrusted** by default +- be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers + +Notably, despite all parsers being standards-compliant, there are behavioral inconsistencies between the parsers used in the CRuby and JRuby implementations, and Nokogiri does not and should not attempt to remove these inconsistencies. Instead, we surface these differences in the test suite when they are important/semantic; or we intentionally write tests to depend only on the important/semantic bits (omitting whitespace from regex matchers on results, for example). -```bash - bundle install - bundle exec rake compile test -``` + +### CRuby + +The Ruby (a.k.a., CRuby, MRI, YARV) implementation is a C extension that depends on libxml2 and libxslt (which in turn depend on zlib and possibly libiconv). + +These dependencies are met by default by Nokogiri's packaged versions of the libxml2 and libxslt source code, but a configuration option `--use-system-libraries` is provided to allow specification of alternative library locations. See [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for full documentation. + +We provide native gems by pre-compiling libxml2 and libxslt (and potentially zlib and libiconv) and packaging them into the gem file. In this case, no compilation is necessary at installation time, which leads to faster and more reliable installation. + +See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems. + + +### JRuby + +The Java (a.k.a. JRuby) implementation is a Java extension that depends primarily on Xerces and NekoHTML for parsing, though additional dependencies are on `isorelax`, `nekodtd`, `jing`, `serializer`, `xalan-j`, and `xml-apis`. + +These dependencies are provided by pre-compiled jar files packaged in the `java` platform gem. + +See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems. + + +## Contributing + +See [`CONTRIBUTING.md`](CONTRIBUTING.md) for an intro guide to developing Nokogiri. ## Code of Conduct @@ -196,3 +263,25 @@ We've adopted the Contributor Covenant code of conduct, which you can read in fu This project is licensed under the terms of the MIT license. See this license at [`LICENSE.md`](LICENSE.md). + + +### Dependencies + +Some additional libraries may be distributed with your version of Nokogiri. Please see [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for a discussion of the variations as well as the licenses thereof. + + +## Authors + +- Mike Dalessio +- Aaron Patterson +- Yoko Harada +- Akinori MUSHA +- John Shahid +- Karol Bucek +- Sam Ruby +- Craig Barnes +- Stephen Checkoway +- Lars Kanis +- Sergio Arbeo +- Timothy Elliott +- Nobuyoshi Nakada diff --git a/ROADMAP.md b/ROADMAP.md index 4fa34a02da..af307133d7 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -2,10 +2,10 @@ ## overhaul serialize/pretty printing API -* https://github.com/sparklemotion/nokogiri/issues/530 +* [#530](https://github.com/sparklemotion/nokogiri/issues/530) XHTML formatting can't be turned off -* https://github.com/sparklemotion/nokogiri/issues/415 +* [#415](https://github.com/sparklemotion/nokogiri/issues/415) XML formatting should be no formatting @@ -16,36 +16,36 @@ ## Node should not be Enumerable; and should have a better attributes API -* https://github.com/sparklemotion/nokogiri/issues/679 +* [#679](https://github.com/sparklemotion/nokogiri/issues/679) Mixing in Enumerable has some unintended consequences; plus we want to improve the attributes API * Some ideas for a better attributes API? - * (closed) https://github.com/sparklemotion/nokogiri/issues/666 - * https://github.com/sparklemotion/nokogiri/issues/765 + * (closed) [#666](https://github.com/sparklemotion/nokogiri/issues/666) + * [#765](https://github.com/sparklemotion/nokogiri/issues/765) ## improve CSS query parsing -* https://github.com/sparklemotion/nokogiri/issues/528 +* [#528](https://github.com/sparklemotion/nokogiri/issues/528) support `:not()` with a nontrivial argument, like `:not(div p.c)` -* https://github.com/sparklemotion/nokogiri/issues/451 +* [#451](https://github.com/sparklemotion/nokogiri/issues/451) chained :not pseudoselectors * better jQuery selector and CSS pseudo-selector support: - * https://github.com/sparklemotion/nokogiri/issues/621 - * https://github.com/sparklemotion/nokogiri/issues/342 - * https://github.com/sparklemotion/nokogiri/issues/628 - * https://github.com/sparklemotion/nokogiri/issues/652 - * https://github.com/sparklemotion/nokogiri/issues/688 + * [#621](https://github.com/sparklemotion/nokogiri/issues/621) + * [#342](https://github.com/sparklemotion/nokogiri/issues/342) + * [#628](https://github.com/sparklemotion/nokogiri/issues/628) + * [#652](https://github.com/sparklemotion/nokogiri/issues/652) + * [#688](https://github.com/sparklemotion/nokogiri/issues/688) -* https://github.com/sparklemotion/nokogiri/issues/394 +* [#394](https://github.com/sparklemotion/nokogiri/issues/394) nth-of-type is wrong, and possibly other selectors as well -* https://github.com/sparklemotion/nokogiri/issues/309 +* [#309](https://github.com/sparklemotion/nokogiri/issues/309) incorrect query being executed -* https://github.com/sparklemotion/nokogiri/issues/350 +* [#350](https://github.com/sparklemotion/nokogiri/issues/350) :has is wrong? @@ -53,24 +53,24 @@ * there are a few tickets about searches not working properly if you use or do not use the context node as part of the search. - - https://github.com/sparklemotion/nokogiri/issues/213 - - https://github.com/sparklemotion/nokogiri/issues/370 - - https://github.com/sparklemotion/nokogiri/issues/454 - - https://github.com/sparklemotion/nokogiri/issues/572 + - [#213](https://github.com/sparklemotion/nokogiri/issues/213) + - [#370](https://github.com/sparklemotion/nokogiri/issues/370) + - [#454](https://github.com/sparklemotion/nokogiri/issues/454) + - [#572](https://github.com/sparklemotion/nokogiri/issues/572) could we fix this by making DocumentFragment be a subclass of NodeSet? ## Better Syntax for custom XPath function handler -* https://github.com/sparklemotion/nokogiri/pull/464 +* [PR#464](https://github.com/sparklemotion/nokogiri/issues/464) ## Better Syntax around Node#xpath and NodeSet#xpath * look at those methods, and use of Node#extract_params in Node#{css,search} - * we should standardize on a hash of options for these and other calls + * we should standardize on a hash of options for these and other calls * what should NodeSet#xpath return? - * https://github.com/sparklemotion/nokogiri/issues/656 + * [#656](https://github.com/sparklemotion/nokogiri/issues/656) ## Encoding @@ -105,7 +105,23 @@ proper convention. `collect_namespaces` is returning a hash, which means it can't return namespaces with the same prefix. See this issue for background: -> https://github.com/sparklemotion/nokogiri/issues/885 +> [#885](https://github.com/sparklemotion/nokogiri/issues/885) Do we care? This seems like a useless method, but then again I hate XML, so what do I know? + + +## Overhaul `ParseOptions` + +Currently we mirror libxml2's parse options, and then retrofit those options on top of Xerces-J for JRuby. + +* I'd like to identify which options work across both parsers, +* And overhaul the parse methods so that these options are easier to use. + +By "easier to use" I mean: + +* it's unwieldy to create a block to set/unset parse options +* it's unwieldy to create a constant like `MY_PARSE_OPTIONS = Nokogiri::XML::ParseOptions::STRICT | Nokogiri::XML::ParseOptions::RECOVER ...` +* some options are named dangerously poorly, like `NOENT` which [does the opposite of what it says](https://github.com/sparklemotion/nokogiri/issues/1582#issuecomment-562180275) +* semantically some options should be set/unset together, specifically "this is a trusted document" or "this is an untrusted document" should flip the senses of `NONET` and `NOENT` and `DTDLOAD` together. +* we need the ability to invent new parse options, like the one suggested in [#1582](https://github.com/sparklemotion/nokogiri/issues/1582) that would allow local entities but not external entities. diff --git a/Rakefile b/Rakefile index 7526d4df7b..1638c8625a 100644 --- a/Rakefile +++ b/Rakefile @@ -1,336 +1,10 @@ -# -*- ruby -*- -require 'rubygems' -require 'shellwords' +# frozen_string_literal: true -gem 'hoe' -require 'hoe' -Hoe.plugin :debugging -Hoe.plugin :git -Hoe.plugin :gemspec -Hoe.plugin :bundler +# +# Tasks are all loaded from `rakelib/*.rake`. +# You may want to use `rake -T` to see what's available. +# +require "bundler" +NOKOGIRI_SPEC = Bundler.load_gemspec("nokogiri.gemspec") -GENERATED_PARSER = "lib/nokogiri/css/parser.rb" -GENERATED_TOKENIZER = "lib/nokogiri/css/tokenizer.rb" - -def java? - /java/ === RUBY_PLATFORM -end - -ENV['LANG'] = "en_US.UTF-8" # UBUNTU 10.04, Y U NO DEFAULT TO UTF-8? - -CrossRuby = Struct.new(:version, :host) { - def ver - @ver ||= version[/\A[^-]+/] - end - - def minor_ver - @minor_ver ||= ver[/\A\d\.\d(?=\.)/] - end - - def api_ver_suffix - case minor_ver - when nil - raise "unsupported version: #{ver}" - else - minor_ver.delete('.') << '0' - end - end - - def platform - @platform ||= - case host - when /\Ax86_64-/ - 'x64-mingw32' - when /\Ai[3-6]86-/ - 'x86-mingw32' - else - raise "unsupported host: #{host}" - end - end - - def tool(name) - (@binutils_prefix ||= - case platform - when 'x64-mingw32' - 'x86_64-w64-mingw32-' - when 'x86-mingw32' - 'i686-w64-mingw32-' - end) + name - end - - def target - case platform - when 'x64-mingw32' - 'pei-x86-64' - when 'x86-mingw32' - 'pei-i386' - end - end - - def libruby_dll - case platform - when 'x64-mingw32' - "x64-msvcrt-ruby#{api_ver_suffix}.dll" - when 'x86-mingw32' - "msvcrt-ruby#{api_ver_suffix}.dll" - end - end - - def dlls - [ - 'kernel32.dll', - 'msvcrt.dll', - 'ws2_32.dll', - *(case - when ver >= '2.0.0' - 'user32.dll' - end), - libruby_dll - ] - end -} - -CROSS_RUBIES = File.read('.cross_rubies').lines.flat_map { |line| - case line - when /\A([^#]+):([^#]+)/ - CrossRuby.new($1, $2) - else - [] - end -} - -ENV['RUBY_CC_VERSION'] ||= CROSS_RUBIES.map(&:ver).uniq.join(":") - -HOE = Hoe.spec 'nokogiri' do - developer 'Aaron Patterson', 'aaronp@rubyforge.org' - developer 'Mike Dalessio', 'mike.dalessio@gmail.com' - developer 'Yoko Harada', 'yokolet@gmail.com' - developer 'Tim Elliott', 'tle@holymonkey.com' - developer 'Akinori MUSHA', 'knu@idaemons.org' - developer 'John Shahid', 'jvshahid@gmail.com' - developer 'Lars Kanis', 'lars@greiz-reinsdorf.de' - - license "MIT" - - self.readme_file = "README.md" - self.history_file = "CHANGELOG.md" - - self.extra_rdoc_files = FileList['ext/nokogiri/*.c'] - - self.clean_globs += [ - 'nokogiri.gemspec', - 'lib/nokogiri/nokogiri.{bundle,jar,rb,so}', - 'lib/nokogiri/[0-9].[0-9]', - 'concourse/images/*.generated' - ] - self.clean_globs += Dir.glob("ports/*").reject { |d| d =~ %r{/archives$} } - - unless java? - self.extra_deps += [ - ["mini_portile2", "~> 2.4.0"], # keep version in sync with extconf.rb - ] - end - - self.extra_dev_deps += [ - ["concourse", "~> 0.24"], - ["hoe-bundler", "~> 1.2"], - ["hoe-debugging", "~> 2.0"], - ["hoe-gemspec", "~> 1.0"], - ["hoe-git", "~> 1.6"], - ["minitest", "~> 5.8"], - ["racc", "~> 1.4.14"], - ["rake", "~> 12.0"], - ["rake-compiler", "~> 1.0.3"], - ["rake-compiler-dock", "~> 0.7.0"], - ["rexical", "~> 1.0.5"], - ["simplecov", "~> 0.16"], - ] - - self.spec_extras = { - :extensions => ["ext/nokogiri/extconf.rb"], - :required_ruby_version => '>= 2.3.0' - } - - self.testlib = :minitest - self.test_prelude = 'require "helper"' # ensure simplecov gets loaded before anything else -end - -# ---------------------------------------- - -def add_file_to_gem relative_source_path - dest_path = File.join(gem_build_path, relative_source_path) - dest_dir = File.dirname(dest_path) - - mkdir_p dest_dir unless Dir.exist?(dest_dir) - rm_f dest_path if File.exist?(dest_path) - safe_ln relative_source_path, dest_path - - HOE.spec.files << relative_source_path -end - -def gem_build_path - File.join 'pkg', HOE.spec.full_name -end - -if java? - # TODO: clean this section up. - require "rake/javaextensiontask" - Rake::JavaExtensionTask.new("nokogiri", HOE.spec) do |ext| - jruby_home = RbConfig::CONFIG['prefix'] - ext.ext_dir = 'ext/java' - ext.lib_dir = 'lib/nokogiri' - ext.source_version = '1.6' - ext.target_version = '1.6' - jars = ["#{jruby_home}/lib/jruby.jar"] + FileList['lib/*.jar'] - ext.classpath = jars.map { |x| File.expand_path x }.join ':' - ext.debug = true if ENV['JAVA_DEBUG'] - end - - task gem_build_path => [:compile] do - add_file_to_gem 'lib/nokogiri/nokogiri.jar' - end -else - begin - require 'rake/extensioncompiler' - # Ensure mingw compiler is installed - Rake::ExtensionCompiler.mingw_host - mingw_available = true - rescue - mingw_available = false - end - require "rake/extensiontask" - - HOE.spec.files.reject! { |f| f =~ %r{\.(java|jar)$} } - - dependencies = YAML.load_file("dependencies.yml") - - task gem_build_path do - %w[libxml2 libxslt].each do |lib| - version = dependencies[lib]["version"] - archive = File.join("ports", "archives", "#{lib}-#{version}.tar.gz") - add_file_to_gem archive - patchesdir = File.join("patches", lib) - patches = `#{['git', 'ls-files', patchesdir].shelljoin}`.split("\n").grep(/\.patch\z/) - patches.each { |patch| - add_file_to_gem patch - } - (untracked = Dir[File.join(patchesdir, '*.patch')] - patches).empty? or - at_exit { - untracked.each { |patch| - puts "** WARNING: untracked patch file not added to gem: #{patch}" - } - } - end - end - - Rake::ExtensionTask.new("nokogiri", HOE.spec) do |ext| - ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact) - ext.config_options << ENV['EXTOPTS'] - if mingw_available - ext.cross_compile = true - ext.cross_platform = CROSS_RUBIES.map(&:platform).uniq - ext.cross_config_options << "--enable-cross-build" - ext.cross_compiling do |spec| - libs = dependencies.map { |name, dep| "#{name}-#{dep["version"]}" }.join(', ') - - spec.post_install_message = <<-EOS -Nokogiri is built with the packaged libraries: #{libs}. - EOS - spec.files.reject! { |path| File.fnmatch?('ports/*', path) } - end - end - end -end - -# ---------------------------------------- - -desc "Generate css/parser.rb and css/tokenizer.rex" -task 'generate' => [GENERATED_PARSER, GENERATED_TOKENIZER] -task 'gem:spec' => 'generate' if Rake::Task.task_defined?("gem:spec") -[:compile, :check_manifest].each do |task_name| - Rake::Task[task_name].prerequisites << GENERATED_PARSER - Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER -end - -file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t| - sh "racc -l -o #{t.name} #{t.prerequisites.first}" -end - -file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t| - sh "rex --independent -o #{t.name} #{t.prerequisites.first}" -end - -# ---------------------------------------- - -desc "set environment variables to build and/or test with debug options" -task :debug do - ENV['NOKOGIRI_DEBUG'] = "true" - ENV['CFLAGS'] ||= "" - ENV['CFLAGS'] += " -DDEBUG" -end - -task :java_debug do - ENV['JRUBY_OPTS'] = "#{ENV['JRUBY_OPTS']} --debug --dev" - ENV['JAVA_OPTS'] = '-Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y' if ENV['JAVA_DEBUG'] -end -Rake::Task[:test].prerequisites << :java_debug - -if Hoe.plugins.include?(:debugging) - ['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name| - Rake::Task["test:#{task_name}"].prerequisites << :compile - end -end - -require 'concourse' -Concourse.new("nokogiri", fly_target: "ci") do |c| - c.add_pipeline "nokogiri", "nokogiri.yml" - c.add_pipeline "nokogiri-pr", "nokogiri-pr.yml" -end - -# ---------------------------------------- - -def verify_dll(dll, cross_ruby) - dll_imports = cross_ruby.dlls - dump = `#{['env', 'LANG=C', cross_ruby.tool('objdump'), '-p', dll].shelljoin}` - raise "unexpected file format for generated dll #{dll}" unless /file format #{Regexp.quote(cross_ruby.target)}\s/ === dump - raise "export function Init_nokogiri not in dll #{dll}" unless /Table.*\sInit_nokogiri\s/mi === dump - - # Verify that the expected DLL dependencies match the actual dependencies - # and that no further dependencies exist. - dll_imports_is = dump.scan(/DLL Name: (.*)$/).map(&:first).map(&:downcase).uniq - if dll_imports_is.sort != dll_imports.sort - raise "unexpected dll imports #{dll_imports_is.inspect} in #{dll}" - end - puts "#{dll}: Looks good!" -end - -task :cross do - rake_compiler_config_path = File.expand_path("~/.rake-compiler/config.yml") - unless File.exists? rake_compiler_config_path - raise "rake-compiler has not installed any cross rubies. Use rake-compiler-dock or 'rake gem:windows' for building binary windows gems." - end - - CROSS_RUBIES.each do |cross_ruby| - task "tmp/#{cross_ruby.platform}/nokogiri/#{cross_ruby.ver}/nokogiri.so" do |t| - # To reduce the gem file size strip mingw32 dlls before packaging - sh [cross_ruby.tool('strip'), '-S', t.name].shelljoin - verify_dll t.name, cross_ruby - end - end -end - -desc "build a windows gem without all the ceremony" -task "gem:windows" do - require "rake_compiler_dock" - RakeCompilerDock.sh "bundle && rake cross native gem MAKE='nice make -j`nproc`' RUBY_CC_VERSION=#{ENV['RUBY_CC_VERSION']}" -end - -desc "build a jruby gem with docker" -task "gem:jruby" do - require "rake_compiler_dock" - RakeCompilerDock.sh "bundle && rake java gem", rubyvm: 'jruby' -end - -require_relative "tasks/docker" - -# vim: syntax=Ruby +task default: [:rubocop, :gumbo, :compile, :test] diff --git a/STANDARD_RESPONSES.md b/STANDARD_RESPONSES.md deleted file mode 100644 index 2c8b0cea03..0000000000 --- a/STANDARD_RESPONSES.md +++ /dev/null @@ -1,47 +0,0 @@ -# Standard Responses to Requests - -These responses are needed often enough that I figured, let's just -check them in for future reference and use. - - -# Not enough information to help - -Hello! - -Thanks for asking this question! However, without more information, -Team Nokogiri cannot reproduce your issue, and so we cannot offer much -help. - -Please provide us with: - -* A self-contained script (one that we can run without modification, - and preferably without making external network connections). - -* Please note that you need to include the XML/HTML that you are - operating on. - -* The output of `nokogiri -v`, which will provide details about your - platform and versions of ruby, libxml2 and nokogiri. - -For more information about requesting help or reporting bugs, please -take a look at http://bit.ly/nokohelp - -Thank you so much! - - -# Not a bug - -Hello! - -Thanks for asking this question! Your request for assistance using -Nokogiri will not go unanswered! - -However, Nokogiri's Github Issues is reserved for reporting bugs or -submitting patches. If you ask your question on the mailing list, Team -Nokogiri promises someone will provide you with an answer in a timely -manner. - -If you'd like to read up on Team Nokogiri's rationale for this policy, -please go to http://bit.ly/nokohelp. - -Thank you so much for understanding! And thank you for using Nokogiri. diff --git a/Vagrantfile b/Vagrantfile new file mode 100644 index 0000000000..87056691e3 --- /dev/null +++ b/Vagrantfile @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +# -*- mode: ruby -*- +# vi: set ft=ruby : + +Box = Struct.new(:shortname, :name, :provision) + +# Every Vagrant development environment requires a box. You can search for +# boxes at https://vagrantcloud.com/search. +boxen = [] +boxen << Box.new("openbsd", "generic/openbsd6", <<~EOF) + # install rvm + pkg_add gnupg-2.2.12p0 + gpg2 --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB + curl -sSL https://get.rvm.io | bash -s stable + source /etc/profile.d/rvm.sh + usermod -G rvm vagrant + + # install ruby and build-essentials + rvm install ruby-2.7 +EOF +boxen << Box.new("bionic32", "mkorenkov/ubuntu-bionic32", <<~EOF) + export DEBIAN_FRONTEND=noninteractive + apt-get update + apt-get install -y apt-utils + apt-get install -y libxslt-dev libxml2-dev pkg-config + apt-get install -y ruby ruby-dev bundler git +EOF +boxen << Box.new("freebsd", "freebsd/FreeBSD-13.0-CURRENT", <<~EOF) + pkg install rbenv ruby-build +EOF + +Vagrant.configure("2") do |config| + boxen.each do |box| + config.vm.define(box.shortname) do |config| + config.vm.box = box.name + + # Share an additional folder to the guest VM. The first argument is + # the path on the host to the actual folder. The second argument is + # the path on the guest to mount the folder. And the optional third + # argument is a set of non-required options. + # config.vm.synced_folder "../data", "/vagrant_data" + + config.vm.provider("virtualbox") do |vb| + vb.customize(["modifyvm", :id, "--cpus", 2]) + vb.customize(["modifyvm", :id, "--memory", 1024]) + end + + config.vm.synced_folder(".", "/nokogiri") + + if box.provision + config.vm.provision("shell", inline: box.provision) + end + end + end + + # Provider-specific configuration so you can fine-tune various + # backing providers for Vagrant. These expose provider-specific options. + # Example for VirtualBox: + # + # config.vm.provider "virtualbox" do |vb| + # # Display the VirtualBox GUI when booting the machine + # vb.gui = true + # + # # Customize the amount of memory on the VM: + # vb.memory = "1024" + # end + # + # View the documentation for the provider you are using for more + # information on available options. + + # Enable provisioning with a shell script. Additional provisioners such as + # Ansible, Chef, Docker, Puppet and Salt are also available. Please see the + # documentation for more information about their specific syntax and use. + # config.vm.provision "shell", inline: <<-SHELL + # apt-get update + # apt-get install -y apache2 + # SHELL +end diff --git a/Y_U_NO_GEMSPEC.md b/Y_U_NO_GEMSPEC.md deleted file mode 100644 index 710e2ae25b..0000000000 --- a/Y_U_NO_GEMSPEC.md +++ /dev/null @@ -1,155 +0,0 @@ -(note: this was originally a blog post published at http://blog.flavorjon.es/2012/03/y-u-no-gemspec.html) - -## tl;dr - -1. Team Nokogiri are not 10-foot-tall code-crunching robots, so `master` is usually unstable. -2. Unstable code can corrupt your data and crash your application, which would make everybody look bad. -3. Therefore, the _risk_ associated with using unstable code is severe; for you _and_ for Team Nokogiri. -4. The absence of a gemspec is a risk mitigation tactic. -5. You can always ask for an RC release. - - -## Why Isn't There a Gemspec!? - -OHAI! Thank you for asking this question! - -Team Nokogiri gets asked this pretty frequently. Just a sample from -the historical record: - -* [Issue #274](https://github.com/sparklemotion/nokogiri/issues/274) -* [Issue #371](https://github.com/sparklemotion/nokogiri/issues/371) -* [A commit removing nokogiri.gemspec](https://github.com/sparklemotion/nokogiri/commit/7f17a643a05ca381d65131515b54d4a3a61ca2e1#commitcomment-667477) -* [A nokogiri-talk thread](http://groups.google.com/group/nokogiri-talk/browse_thread/thread/4706b002e492d23f) -* [Another nokogiri-talk thread](http://groups.google.com/group/nokogiri-talk/browse_thread/thread/0b201bb80ea3eea0) - -Sometimes people imply that we've forgotten, or that we don't know how to -properly manage our codebase. Those people are super fun to respond -to! - -We've gone back and forth a couple of times over the past few years, -but the current policy of Team Nokogiri is to **not** provide a -gemspec in the Github repo. This is a conscious choice, not an -oversight. - - -## But You Didn't Answer the Question! - -Ah, I was hoping you wouldn't notice. Well, OK, let's do this, if -you're serious about it. - -I'd like to start by talking about _risk_. Specifically, the risk -associated with using a known-unstable version of Nokogiri. - - -### Risk - -One common way to evaluate the _risk_ of an incident is: - - risk = probability x impact - -You can read more about this on [the internets](http://en.wikipedia.org/wiki/Risk_Matrix). - -The _risk_ associated with a Nokogiri bug could be loosely defined by -answering the questions: - -* "How likely is it that a bug exists?" (probability) -* "How severe will the consequences of a bug be?" (impact) - - -### Probability - -The `master` branch should be considered unstable. Team Nokogiri are -not 10-foot-tall code-crunching robots; we are humans. We make -mistakes, and as a result, any arbitrary commit on `master` is likely -to contain bugs. - -Just as an example, Nokogiri `master` was unstable for about five -months between November 2011 and March 2012. It was unstable not -because we were sloppy, or didn't care, but because the fixes were -hard and unobvious. - -When we release Nokogiri, we test for memory leaks and invalid memory -access on all kinds of platforms with many flavors of Ruby and lots of -versions of libxml2. Because these tests are time-consuming, we don't -run them on every commit. We run them often when preparing a release. - -If we're releasing Nokogiri, it means we think it's rock solid. - -And if we're not releasing it, it means there are probably bugs. - - -### Impact - -Nokogiri is a gem with native extensions. This means it's not pure -Ruby -- there's C or Java code being compiled and run, which means -that there's always a chance that the gem will crash your application, -or worse. Possible outcomes include: - -* leaking memory -* corrupting data -* making benign code crash (due to memory corruption) - -So, then, a bug in a native extension can have much worse downside -than you might think. It's not just going to do something unexpected; -it's possibly going to do terrible, awful things to your application -and data. - -**Nobody** wants that to happen. Especially Team Nokogiri. - - -### Risk, Redux - -So, if you accept the equation - - risk = probability x impact - -and you believe me when I say that: - -* the probablility of a bug in unreleased code is high, and -* the impact of a bug is likely to be severe, - -then you should easily see that the _risk_ associated with a bug in -Nokogiri is quite high. - -Part of Team Nokogiri's job is to try to mitigate this risk. We have a -number of tactics that we use to accomplish this: - -* we respond quickly to bug reports, particularly when they are possible memory issues -* we review each others' commits -* we have a thorough test suite, and we test-drive new features -* we discuss code design and issues on a core developer mailing list -* we use valgrind to test for memory issues (leaks and invalid - access) on multiple combinations of OS, libxml2 and Ruby -* we package release candidates, and encourage devs to use them -* **we do NOT commit a gemspec in our git repository** - -Yes, that's right, the absence of a gemspec is a risk mitigation -tactic. Not only does Team Nokogiri not want to imply support for -`master`, we want to **actively discourage** people from using -it. Because it's not stable. - - -## But I Want to Do It Anyway - -Another option, is to email the [nokogiri-talk -list](http://groups.google.com/group/nokogiri-talk) and ask for a -release candidate to be built. We're pretty accommodating if there's a -bugfix that's a blocker for you. And if we can't release an RC, we'll -tell you why. - -And in the end, nothing is stopping you from cloning the repo and -generating a private gemspec. This is an extra step or two, but it has -the benefit of making sure developers have thought through the costs -and risks involved; and it tends to select for developers who know -what they're doing. - - -## In Conclusion - -Team Nokogiri takes stability very seriously. We want everybody who -uses Nokogiri to have a pleasant experience. And so we want to make -sure that you're using the best software we can make. - -Please keep in mind that we're trying very hard to do the right thing -for all Nokogiri users out there in Rubyland. Nokogiri loves you very -much, and we hope you love it back. diff --git a/adr/2022-12-darwin-symbol-resolution.md b/adr/2022-12-darwin-symbol-resolution.md new file mode 100644 index 0000000000..06cde6472c --- /dev/null +++ b/adr/2022-12-darwin-symbol-resolution.md @@ -0,0 +1,53 @@ + +# 2022-12 Hide libxml2 and libxslt symbols on Darwin in Ruby 3.2 native gem + +## Status + +Accepted, but reversible if an alternative technical solution can be found. + + +## Context + +In the final days of shipping Nokogiri v1.14.0 with native (precompiled) support for Ruby 3.2, we're struggling a bit with symbol resolution. + +Ruby 3.2, when compiling on Darwin, uses the `-bundle_loader` linker flag to resolve symbols against the Ruby executable as if it were a shared library. (This means that, when running a Ruby compiled with the `--enable-shared` flag, that the extension will fail to resolve Ruby symbols like `rb_cObject`.) + +We can work around that with the `-flat_namespace` linker flag, which mimics the behavior we already see on Linux and allows us to resolve these symbols at runtime. But for reasons I don't fully understand, many Rubies on Darwin seem to load the libxml2 and libxslt dylibs that ship with XCode commandline tools ("CLT"), and so _every_ libxml2 symbol is a collision and resolves to the _wrong_ libxml2 (not the version we've patched and statically linked into the extension). + +To work around this last problem, the best solution we know of right now seems to be to avoid exporting those symbols by using the `-load_hidden` flag (or a similar mechanism, there are several we could choose from). + + +## Decision + +Nokogiri v1.14.0's precompiled native gem for Darwin (MacOS) Ruby 3.2 will be built with: + +- the `-flat_namespace` flag to ensure the extension can be used by both `--enable-shared` and `--disable-shared` Rubies, +- and the `-load_hidden` flag for both `libxml2` and `libxslt` to avoid accidentally resolving to non-vendored versions of those libraries + + +## Consequences + +This would prevent accidental symbol collisions such as the https://github.com/sparklemotion/nokogiri/pull/2106 on Linux, and would ensure that we always pull in the desired version of libxml2, avoiding problems like the ones we're currently experiencing with Ruby 3.2 (see https://github.com/rake-compiler/rake-compiler-dock/issues/87 for extended discussion and more links). + +This would also, however, prevent a small but non-zero number of downstream gems from integrating with Nokogiri's C API, or the C API of libxml2, libxslt, or libgumbo. A notable gem that did this was https://github.com/rubys/nokogumbo (now merged into Nokogiri itself). Another notable gem that I know that does this is `nokogiri-xmlsec` (and the various forks of it, the most popular seems to be https://github.com/instructure/nokogiri-xmlsec-instructure). So this may prevent experimentation and innovation (see Nokogumbo) as well as putting hurdles in front of useful integrations like xmlsec. + + +## Alternatives considered + +__Remove the `-bundle_loader` flag from the link line.__ Although this works, it feels a bit like fighting the toolchain and the Ruby core team. It's a bit more complicated of a solution, it's harder for me to reason about, and I'm not positive we won't discover some weird side effect later on. + +__Fully hide all symbols everywhere__ is taking the chosen solution to the extreme, and may be what we decide to do in the future (see [RFC: Stop exporting symbols · Discussion #2746 · sparklemotion/nokogiri](https://github.com/sparklemotion/nokogiri/discussions/2746)). For now, though, I'd like to keep our options open and not break compatibility completely in this v1.14.0 release. By only doing this where we're forced to, we have a chance to learn about how the API is being used, and also buy some time to hear feedback and to find an alternative solution. + +__Stop precompiling__ or __Stop vendoring libraries__ should always be options we consider, because offering native gems and vendoring libraries introduces complexity. However, I covered many of the reasons I think it's good for Nokogiri to do this in [my RubyConf 2021 talk titled "Building Native Extensions. This Could Take A While..."](https://www.youtube.com/watch?v=jtpOci5o50g) and those reasons are still valid, notably our ability to patch libxml2 for performance (see [#2144](https://github.com/sparklemotion/nokogiri/pull/2144)), functional (see [#2403](https://github.com/sparklemotion/nokogiri/pull/2403)), or security (see [#2294](https://github.com/sparklemotion/nokogiri/pull/2294)) reasons. + + +## References + +- Ruby commit introducing `-bundle_loader`: https://github.com/ruby/ruby/commit/50d81bf +- The PR implementing this decision is [dep: add ruby 3.2 support by flavorjones · Pull Request #2732 · sparklemotion/nokogiri](https://github.com/sparklemotion/nokogiri/pull/2732) +- Future symbol visibility decision will be made at [RFC: Stop exporting symbols · Discussion #2746 · sparklemotion/nokogiri](https://github.com/sparklemotion/nokogiri/discussions/2746) +- Background context and solution details at + - [[Ruby 3.2] having runtime issues on darwin · Issue #87 · rake-compiler/rake-compiler-dock](https://github.com/rake-compiler/rake-compiler-dock/issues/87) + - https://github.com/stevecheckoway/bundle_test + - [explore whether \`-load\_hidden\` will work around flat namespace by flavorjones · Pull Request #1 · stevecheckoway/bundle\_test](https://github.com/stevecheckoway/bundle_test/pull/1) +- [Video from Apple explaining the Darwin toolchain changes](https://developer.apple.com/videos/play/wwdc2022/110362/) diff --git a/appveyor.yml b/appveyor.yml index 188ad24307..15a26bf37d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,19 @@ +image: Visual Studio 2019 + +branches: + only: + - main + +skip_branch_with_pr: true + +clone_depth: 1 + +cache: + - vendor/bundle + - ports/archives + install: + - IF DEFINED INSTALL_PACKAGES ( ridk exec pacman --noconfirm --needed --sync %INSTALL_PACKAGES% ) - SET PATH=C:\ruby%ruby_version%\bin;%PATH% - ps: | if ($env:ruby_version -like "*head*") { @@ -8,8 +23,9 @@ install: - ruby --version - gem --version - gem install bundler --conservative + - git submodule update --init + - bundle config --local path vendor/bundle - bundle install - - IF DEFINED INSTALL_PACKAGES ( ridk exec pacman --noconfirm --needed --sync %INSTALL_PACKAGES% ) build: off @@ -18,16 +34,14 @@ test_script: environment: matrix: - - ruby_version: head-x64 - INSTALL_PACKAGES: "mingw-w64-x86_64-libxslt" + - ruby_version: 31 + INSTALL_PACKAGES: "mingw-w64-i686-libyaml" + - ruby_version: 31 + INSTALL_PACKAGES: "mingw-w64-i686-libxslt mingw-w64-i686-libyaml" EXTCONF_PARAMS: "--use-system-libraries" - - ruby_version: 26 - - ruby_version: 25-x64 - - ruby_version: 24 - INSTALL_PACKAGES: "mingw-w64-i686-libxslt" - EXTCONF_PARAMS: "--use-system-libraries" - - ruby_version: 23-x64 -matrix: - allow_failures: - - ruby_version: head + - ruby_version: 27 + INSTALL_PACKAGES: "mingw-w64-i686-libyaml" + - ruby_version: 27 + INSTALL_PACKAGES: "mingw-w64-i686-libxslt mingw-w64-i686-libyaml" + EXTCONF_PARAMS: "--use-system-libraries" diff --git a/bin/nokogiri b/bin/nokogiri index 60b1eb3cea..04a5ceae19 100755 --- a/bin/nokogiri +++ b/bin/nokogiri @@ -1,61 +1,77 @@ #!/usr/bin/env ruby -require 'optparse' -require 'open-uri' -require 'uri' -require 'rubygems' -require 'nokogiri' -autoload :IRB, 'irb' +# frozen_string_literal: true + +require "optparse" +require "open-uri" +require "uri" +require "rubygems" +require "nokogiri" +autoload :IRB, "irb" parse_class = Nokogiri encoding = nil # This module provides some tunables with the nokogiri CLI for use in # your ~/.nokogirirc. -module Nokogiri::CLI - class << self - # Specify the console engine, defaulted to IRB. - # - # call-seq: - # require 'pry' - # Nokogiri::CLI.console = Pry - attr_writer :console - - def console - case @console - when Symbol - Kernel.const_get(@console) - else - @console +module Nokogiri + module CLI + class << self + # Specify the console engine, defaulted to IRB. + # + # call-seq: + # require 'pry' + # Nokogiri::CLI.console = Pry + attr_writer :console + + def console + case @console + when Symbol + Kernel.const_get(@console) + else + @console + end end + + attr_accessor :rcfile end - attr_accessor :rcfile + self.rcfile = File.expand_path("~/.nokogirirc") + self.console = :IRB end +end - self.rcfile = File.expand_path('~/.nokogirirc') - self.console = :IRB +def safe_read(uri_or_path) + uri = URI.parse(uri_or_path) + case uri + when URI::HTTP + uri.read + when URI::File + File.read(uri.path) + else + File.read(uri_or_path) + end end opts = OptionParser.new do |opts| opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser" - opts.define_head "Usage: nokogiri [options]" - opts.separator "" - opts.separator "Examples:" - opts.separator " nokogiri https://www.ruby-lang.org/" - opts.separator " nokogiri ./public/index.html" - opts.separator " curl -s http://www.nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'" - opts.separator "" - opts.separator "Options:" + opts.define_head("Usage: nokogiri [options]") + opts.separator("") + opts.separator("Examples:") + opts.separator(" nokogiri https://www.ruby-lang.org/") + opts.separator(" nokogiri ./public/index.html") + opts.separator(" curl -s http://www.nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'") + opts.separator("") + opts.separator("Options:") opts.on("--type type", "Parse as type: xml or html (default: auto)", [:xml, :html]) do |v| - parse_class = {:xml => Nokogiri::XML, :html => Nokogiri::HTML}[v] + parse_class = { xml: Nokogiri::XML, html: Nokogiri::HTML }[v] end opts.on("-C file", "Specifies initialization file to load (default #{Nokogiri::CLI.rcfile})") do |v| Nokogiri::CLI.rcfile = v end - opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding || 'none'})") do |v| + opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding || "none"})") do |v| encoding = v end @@ -64,7 +80,7 @@ opts = OptionParser.new do |opts| end opts.on("--rng ", "Validate using this rng file.") do |v| - @rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)} + @rng = Nokogiri::XML::RelaxNG(safe_read(v)) end opts.on_tail("-?", "--help", "Show this message") do @@ -90,15 +106,10 @@ if File.file?(Nokogiri::CLI.rcfile) load Nokogiri::CLI.rcfile end -if url || $stdin.tty? - case uri = (URI(url) rescue url) - when URI::HTTP - @doc = parse_class.parse(uri.read, url, encoding) - else - @doc = parse_class.parse(open(url).read, nil, encoding) - end +@doc = if url || $stdin.tty? + parse_class.parse(safe_read(url), url, encoding) else - @doc = parse_class.parse($stdin, nil, encoding) + parse_class.parse($stdin, nil, encoding) end $_ = @doc @@ -107,12 +118,14 @@ if @rng @rng.validate(@doc).each do |error| puts error.message end -else - if @script - eval @script, binding, '
' - else - puts "Your document is stored in @doc..." - Nokogiri::CLI.console.start +elsif @script + begin + eval(@script, binding, "
") # rubocop:disable Security/Eval + rescue Exception => e # rubocop:disable Lint/RescueException + warn("ERROR: Exception raised while evaluating '#{@script}'") + raise e end +else + puts "Your document is stored in @doc..." + Nokogiri::CLI.console.start end - diff --git a/build_all b/build_all deleted file mode 100755 index f7f6e99792..0000000000 --- a/build_all +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/env bash -# -# script to build gems for all relevant platforms -# -set -o errexit - -rm -rf tmp pkg gems -mkdir -p gems - -# MRI et al (standard gem) -bundle exec rake clean -bundle exec rake compile test -bundle exec rake gem -cp -v pkg/nokogiri*.gem gems - -# jruby -bundle exec rake clean -bundle exec rake gem:jruby -cp -v pkg/nokogiri*java.gem gems - -# windows (x86-mingw32 and x64-mingw32) -bundle exec rake clean -bundle exec rake gem:windows -cp -v pkg/nokogiri*{x86,x64}-mingw32*.gem gems diff --git a/concourse/TODO.md b/concourse/TODO.md deleted file mode 100644 index 30e7af6019..0000000000 --- a/concourse/TODO.md +++ /dev/null @@ -1,48 +0,0 @@ -# nokogiri concourse to-do - -## concourse.yml - -* [x] real ssl cert -* [x] github authentication -* [x] bastion host -* [x] upgrade bbl - -## nokogiri.yml - -* [x] test using system libraries -* [x] handle pull requests -* [x] run windows tests under devkit -* [ ] osx - * system - * system-homebrew - * vendored -* [ ] build an rc gem and upload to rubygems [→ rubygems resource] - * should always check manifest -* install gem and test: - * [ ] osx - * [ ] linux (system) - * [ ] linux (vendored) - * [ ] linux (vendored, --disable-static) - * [ ] OpenSuse with site_config (lib64, #1562) - * [ ] windows (fat binary) - * [ ] windows (devkit) -* notifications on failure / success - * [x] irc [→ irc resource] - -## other projects - -* [x] pipeline: mini_portile [→ bosh release] -* [x] pipeline: chromedriver-helper -* [x] bosh release for windows worker config: - * [ ] ruby of all supported versions - * [ ] devkit installed in all rubies - * [ ] cmake -* [x] resource: irc -* [ ] resource: rubygems - -## nokogiri stretch goals - -* [ ] get openbsd / freebsd / etc. people to donate worker machines -* [ ] use an S3 bucket for sub-artifacts: - * source tarballs - * compiled .dlls diff --git a/concourse/common_anchors.yml b/concourse/common_anchors.yml deleted file mode 100644 index ee4a092c15..0000000000 --- a/concourse/common_anchors.yml +++ /dev/null @@ -1,3 +0,0 @@ - notify_failure_to_irc: ¬ify_failure_to_irc - put: nokogiri-irc - params: {message: "($BUILD_PIPELINE_NAME/$BUILD_JOB_NAME) The build failed ($BUILD_URL)"} diff --git a/concourse/common_prelude.rb b/concourse/common_prelude.rb deleted file mode 100644 index dcae14a7a9..0000000000 --- a/concourse/common_prelude.rb +++ /dev/null @@ -1,8 +0,0 @@ -require 'json' - -$common_ignore_paths = [ - "CHANGELOG.md", - "README.md", - "concourse/**", - "suppressions/**", -].to_json diff --git a/concourse/common_resource_types.yml b/concourse/common_resource_types.yml deleted file mode 100644 index 54f1679843..0000000000 --- a/concourse/common_resource_types.yml +++ /dev/null @@ -1,4 +0,0 @@ - - name: irc-notification - type: docker-image - source: - repository: flavorjones/irc-notification-resource diff --git a/concourse/common_resources.yml b/concourse/common_resources.yml deleted file mode 100644 index 95016f27c0..0000000000 --- a/concourse/common_resources.yml +++ /dev/null @@ -1,16 +0,0 @@ - - name: ci - type: git - icon: "settings" - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: master - disable_ci_skip: true # always get the latest pipeline configuration - - name: nokogiri-irc - type: irc-notification - icon: "bell" - source: - server: chat.freenode.net - port: 7070 - channel: "#nokogiri" - user: {{nokobot-irc-username}} - password: {{nokobot-irc-password}} diff --git a/concourse/images/Dockerfile.jruby.erb b/concourse/images/Dockerfile.jruby.erb deleted file mode 100644 index 49c8ac2ae9..0000000000 --- a/concourse/images/Dockerfile.jruby.erb +++ /dev/null @@ -1,7 +0,0 @@ -FROM jruby:<%= version %>-jdk - -<%= File.read "java-opts.step" %> - -<%= File.read "update-bundler.step" %> - -<%= File.read "bundle-install.step" %> diff --git a/concourse/images/Dockerfile.mri.erb b/concourse/images/Dockerfile.mri.erb deleted file mode 100644 index e93e24f9be..0000000000 --- a/concourse/images/Dockerfile.mri.erb +++ /dev/null @@ -1,11 +0,0 @@ -FROM ruby:<%= version %> - -<%= File.read "debian-prelude.step" %> - -<%= File.read "debian-valgrind.step" %> - -<%= File.read "debian-libxml-et-al.step" %> - -<%= File.read "update-bundler.step" %> - -<%= File.read "bundle-install.step" %> diff --git a/concourse/images/Dockerfile.xenial.erb b/concourse/images/Dockerfile.xenial.erb deleted file mode 100644 index ee6f2b235c..0000000000 --- a/concourse/images/Dockerfile.xenial.erb +++ /dev/null @@ -1,9 +0,0 @@ -FROM ubuntu:xenial - -<%= File.read "debian-prelude.step" %> - -<%= File.read "debian-libxml-et-al.step" %> - -<%= File.read "debian-ruby.step" %> - -<%= File.read "bundle-install.step" %> diff --git a/concourse/images/bundle-install.step b/concourse/images/bundle-install.step deleted file mode 100644 index 70c1cf4e6b..0000000000 --- a/concourse/images/bundle-install.step +++ /dev/null @@ -1,7 +0,0 @@ -# -*- dockerfile -*- - -COPY Gemfile nokogiri/ -COPY Gemfile.lock nokogiri/ - -RUN gem install bundler -RUN cd nokogiri && bundle install diff --git a/concourse/images/debian-libxml-et-al.step b/concourse/images/debian-libxml-et-al.step deleted file mode 100644 index 5f73f12f3b..0000000000 --- a/concourse/images/debian-libxml-et-al.step +++ /dev/null @@ -1,3 +0,0 @@ -# -*- dockerfile -*- - -RUN apt-get install -y libxslt-dev libxml2-dev pkg-config diff --git a/concourse/images/java-opts.step b/concourse/images/java-opts.step deleted file mode 100644 index cc7d0c7747..0000000000 --- a/concourse/images/java-opts.step +++ /dev/null @@ -1,4 +0,0 @@ -# -*- dockerfile -*- - -# https://github.com/docker-library/openjdk/issues/32 -ENV JAVA_OPTS="-Dfile.encoding=UTF8" diff --git a/concourse/nokogiri-pr.yml b/concourse/nokogiri-pr.yml deleted file mode 100644 index f407049394..0000000000 --- a/concourse/nokogiri-pr.yml +++ /dev/null @@ -1,344 +0,0 @@ -% require "common_prelude.rb" - -resource_types: -<%= erbify_file "common_resource_types.yml" -%> - - name: pull-request - type: docker-image - source: - repository: jtarchie/pr - - -resources: -<%= erbify_file "common_resources.yml" -%> - - name: nokogiri-pr - type: pull-request - icon: "github-circle" - source: - repo: sparklemotion/nokogiri - access_token: {{github-repo-status-access-token}} - ignore_paths: <%= $common_ignore_paths %> - - -anchors: -<%= erbify_file "common_anchors.yml" -%> - notify_failure_to_pr: ¬ify_failure_to_pr - put: nokogiri-pr - params: {path: nokogiri-pr, status: failure} - - -jobs: - - name: pr-pending - public: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - - put: nokogiri-pr - params: {path: nokogiri-pr, status: pending} - - -% Concourse.production_rubies.each do |ruby_version| - - name: ruby-<%= ruby_version %>-system - public: true - serial_groups: ["ruby-<%= ruby_version %>"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every -% if ruby_version == Concourse.production_rubies.last - passed: [pr-pending] -% else - passed: ["ruby-<%= Concourse.production_rubies.last %>-system"] -% end - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= ruby_version %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t -% if ruby_version == Concourse.production_rubies.last - CC_TEST_REPORTER_ID: {{code_climate_reporter_id_nokogiri}} -% end - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-<%= ruby_version %>-vendored - public: true - serial_groups: ["ruby-<%= ruby_version %>"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-<%= ruby_version %>-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= ruby_version %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-<%= ruby_version %>-valgrind - public: true - serial_groups: ["ruby-<%= ruby_version %>"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-<%= ruby_version %>-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= ruby_version %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } -% end - - -% RUBIES[:jruby].each_with_index do |jruby_version, j| - - name: jruby-<%= jruby_version %> - public: true - serial_groups: ["jruby"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every -% if j > 0 - passed: ["jruby-<%= RUBIES[:jruby][j-1] %>"] -% else - passed: ["ruby-<%= Concourse.production_rubies.last %>-system"] -% end - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-<%= jruby_version %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } -% end - - - name: gate - public: true - plan: - - get: nokogiri-pr - trigger: true - version: every - passed: -% Concourse.production_rubies.each do |ruby_version| - - "ruby-<%= ruby_version %>-valgrind" -% end - - "jruby-<%= RUBIES[:jruby].last %>" - - - - name: gem-test - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: gem-build - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= Concourse.production_rubies.last %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - task: gem-install-and-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= Concourse.production_rubies.last %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - name: gem-test-java - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: gem-build - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-<%= RUBIES[:jruby].last %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build-java.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - task: gem-install-and-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-<%= RUBIES[:jruby].last %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-vanilla-system - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: xenial} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - TEST_WITH_APT_REPO_RUBY: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-libxmlruby-system - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= Concourse.production_rubies.last %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-libxmlruby-valgrind - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= Concourse.production_rubies.last %>"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: pr-success - public: true - disable_manual_trigger: true - plan: - - get: nokogiri-pr - trigger: true - version: every - passed: - - ruby-vanilla-system - - ruby-libxmlruby-system - - ruby-libxmlruby-valgrind - - gem-test - - gem-test-java - - put: nokogiri-pr - params: {path: nokogiri-pr, status: success} - - put: nokogiri-irc - params: {message: "($BUILD_PIPELINE_NAME/$BUILD_JOB_NAME) The build passed ($BUILD_URL)"} diff --git a/concourse/nokogiri-pr.yml.generated b/concourse/nokogiri-pr.yml.generated deleted file mode 100644 index d474cd5d7f..0000000000 --- a/concourse/nokogiri-pr.yml.generated +++ /dev/null @@ -1,594 +0,0 @@ - -resource_types: - - name: irc-notification - type: docker-image - source: - repository: flavorjones/irc-notification-resource - - name: pull-request - type: docker-image - source: - repository: jtarchie/pr - - -resources: - - name: ci - type: git - icon: "settings" - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: master - disable_ci_skip: true # always get the latest pipeline configuration - - name: nokogiri-irc - type: irc-notification - icon: "bell" - source: - server: chat.freenode.net - port: 7070 - channel: "#nokogiri" - user: {{nokobot-irc-username}} - password: {{nokobot-irc-password}} - - name: nokogiri-pr - type: pull-request - icon: "github-circle" - source: - repo: sparklemotion/nokogiri - access_token: {{github-repo-status-access-token}} - ignore_paths: ["CHANGELOG.md","README.md","concourse/**","suppressions/**"] - - -anchors: - notify_failure_to_irc: ¬ify_failure_to_irc - put: nokogiri-irc - params: {message: "($BUILD_PIPELINE_NAME/$BUILD_JOB_NAME) The build failed ($BUILD_URL)"} - notify_failure_to_pr: ¬ify_failure_to_pr - put: nokogiri-pr - params: {path: nokogiri-pr, status: failure} - - -jobs: - - name: pr-pending - public: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - - put: nokogiri-pr - params: {path: nokogiri-pr, status: pending} - - - - name: ruby-2.3-system - public: true - serial_groups: ["ruby-2.3"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.6-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.3"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-2.3-vendored - public: true - serial_groups: ["ruby-2.3"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.3-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.3"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-2.3-valgrind - public: true - serial_groups: ["ruby-2.3"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.3-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.3"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - name: ruby-2.4-system - public: true - serial_groups: ["ruby-2.4"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.6-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.4"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-2.4-vendored - public: true - serial_groups: ["ruby-2.4"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.4-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.4"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-2.4-valgrind - public: true - serial_groups: ["ruby-2.4"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.4-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.4"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - name: ruby-2.5-system - public: true - serial_groups: ["ruby-2.5"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.6-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.5"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-2.5-vendored - public: true - serial_groups: ["ruby-2.5"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.5-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.5"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-2.5-valgrind - public: true - serial_groups: ["ruby-2.5"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.5-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.5"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - name: ruby-2.6-system - public: true - serial_groups: ["ruby-2.6"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: [pr-pending] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - CC_TEST_REPORTER_ID: {{code_climate_reporter_id_nokogiri}} - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-2.6-vendored - public: true - serial_groups: ["ruby-2.6"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.6-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-2.6-valgrind - public: true - serial_groups: ["ruby-2.6"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.6-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: jruby-9.1 - public: true - serial_groups: ["jruby"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["ruby-2.6-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-9.1"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - name: jruby-9.2 - public: true - serial_groups: ["jruby"] - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["jruby-9.1"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-9.2"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - name: gate - public: true - plan: - - get: nokogiri-pr - trigger: true - version: every - passed: - - "ruby-2.3-valgrind" - - "ruby-2.4-valgrind" - - "ruby-2.5-valgrind" - - "ruby-2.6-valgrind" - - "jruby-9.2" - - - - name: gem-test - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: gem-build - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - task: gem-install-and-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - name: gem-test-java - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: gem-build - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-9.2"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build-java.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - task: gem-install-and-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-9.2"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-vanilla-system - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: xenial} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - TEST_WITH_APT_REPO_RUBY: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-libxmlruby-system - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: ruby-libxmlruby-valgrind - public: true - serial: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: { aggregate: [*notify_failure_to_irc, *notify_failure_to_pr] } - - - - name: pr-success - public: true - disable_manual_trigger: true - plan: - - get: nokogiri-pr - trigger: true - version: every - passed: - - ruby-vanilla-system - - ruby-libxmlruby-system - - ruby-libxmlruby-valgrind - - gem-test - - gem-test-java - - put: nokogiri-pr - params: {path: nokogiri-pr, status: success} - - put: nokogiri-irc - params: {message: "($BUILD_PIPELINE_NAME/$BUILD_JOB_NAME) The build passed ($BUILD_URL)"} diff --git a/concourse/nokogiri.yml b/concourse/nokogiri.yml deleted file mode 100644 index 4b5f2f1d93..0000000000 --- a/concourse/nokogiri.yml +++ /dev/null @@ -1,302 +0,0 @@ -% require "common_prelude.rb" - -resource_types: -<%= erbify_file "common_resource_types.yml" -%> - - -resources: -<%= erbify_file "common_resources.yml" -%> - - name: nokogiri - type: git - icon: "github-circle" - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: master - ignore_paths: <%= $common_ignore_paths %> - - -anchors: -<%= erbify_file "common_anchors.yml" -%> - - -jobs: -% RUBIES[:mri].each do |ruby_version| - - name: ruby-<%= ruby_version %>-system - public: true - serial_groups: ["ruby-<%= ruby_version %>"] - plan: - - get: ci - - get: nokogiri - trigger: true - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= ruby_version %>"} - inputs: - - name: ci - - name: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t -% if ruby_version == Concourse.production_rubies.last - CC_TEST_REPORTER_ID: {{code_climate_reporter_id_nokogiri}} - GIT_BRANCH: master -% end - run: - path: ci/concourse/tasks/rake-test/run.sh -% if Concourse.production_rubies.include? ruby_version - on_failure: *notify_failure_to_irc -% end - - - name: ruby-<%= ruby_version %>-vendored - public: true - serial_groups: ["ruby-<%= ruby_version %>"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-<%= ruby_version %>-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= ruby_version %>"} - inputs: - - name: ci - - name: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh -% if Concourse.production_rubies.include? ruby_version - on_failure: *notify_failure_to_irc -% end - - - name: ruby-<%= ruby_version %>-valgrind - public: true - serial_groups: ["ruby-<%= ruby_version %>"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-<%= ruby_version %>-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= ruby_version %>"} - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh -% if Concourse.production_rubies.include? ruby_version - on_failure: *notify_failure_to_irc -% end -% end - - -% RUBIES[:jruby].each_with_index do |jruby_version, j| - - name: jruby-<%= jruby_version %> - public: true - serial_groups: ["jruby"] - plan: - - get: ci - - get: nokogiri - trigger: true -% if j > 0 - passed: ["jruby-<%= RUBIES[:jruby][j-1] %>"] -% end - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-<%= jruby_version %>"} - inputs: - - name: ci - - name: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc -% end - - - - name: gate - public: true - plan: - - get: nokogiri - trigger: true - passed: -% Concourse.production_rubies.each do |ruby_version| - - "ruby-<%= ruby_version %>-valgrind" -% end - - "jruby-<%= RUBIES[:jruby].last %>" - - - - name: ruby-vanilla-system - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: xenial} - inputs: - - name: ci - - name: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - TEST_WITH_APT_REPO_RUBY: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - - name: ruby-libxmlruby-system - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= Concourse.production_rubies.last %>"} - inputs: - - name: ci - - name: nokogiri - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - - name: ruby-libxmlruby-valgrind - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= Concourse.production_rubies.last %>"} - inputs: - - name: ci - - name: nokogiri - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - - name: gem-test - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: gem-build - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= Concourse.production_rubies.last %>"} - inputs: - - name: ci - - name: nokogiri - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - on_failure: *notify_failure_to_irc - - task: gem-install-and-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-<%= Concourse.production_rubies.last %>"} - inputs: - - name: ci - - name: nokogiri - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - on_failure: *notify_failure_to_irc - - - name: gem-test-java - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: gem-build - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-<%= RUBIES[:jruby].last %>"} - inputs: - - name: ci - - name: nokogiri - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build-java.sh - on_failure: *notify_failure_to_irc - - task: gem-install-and-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-<%= RUBIES[:jruby].last %>"} - inputs: - - name: ci - - name: nokogiri - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - on_failure: *notify_failure_to_irc - - - - name: build-success - public: true - disable_manual_trigger: true - plan: - - get: nokogiri - trigger: true - version: every - passed: - - ruby-vanilla-system - - ruby-libxmlruby-system - - ruby-libxmlruby-valgrind - - gem-test - - gem-test-java - - put: nokogiri-irc - params: {message: "($BUILD_PIPELINE_NAME/$BUILD_JOB_NAME) The build passed ($BUILD_URL)"} diff --git a/concourse/nokogiri.yml.generated b/concourse/nokogiri.yml.generated deleted file mode 100644 index 5cea73d377..0000000000 --- a/concourse/nokogiri.yml.generated +++ /dev/null @@ -1,522 +0,0 @@ - -resource_types: - - name: irc-notification - type: docker-image - source: - repository: flavorjones/irc-notification-resource - - -resources: - - name: ci - type: git - icon: "settings" - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: master - disable_ci_skip: true # always get the latest pipeline configuration - - name: nokogiri-irc - type: irc-notification - icon: "bell" - source: - server: chat.freenode.net - port: 7070 - channel: "#nokogiri" - user: {{nokobot-irc-username}} - password: {{nokobot-irc-password}} - - name: nokogiri - type: git - icon: "github-circle" - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: master - ignore_paths: ["CHANGELOG.md","README.md","concourse/**","suppressions/**"] - - -anchors: - notify_failure_to_irc: ¬ify_failure_to_irc - put: nokogiri-irc - params: {message: "($BUILD_PIPELINE_NAME/$BUILD_JOB_NAME) The build failed ($BUILD_URL)"} - - -jobs: - - name: ruby-2.3-system - public: true - serial_groups: ["ruby-2.3"] - plan: - - get: ci - - get: nokogiri - trigger: true - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.3"} - inputs: - - name: ci - - name: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - name: ruby-2.3-vendored - public: true - serial_groups: ["ruby-2.3"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-2.3-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.3"} - inputs: - - name: ci - - name: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - name: ruby-2.3-valgrind - public: true - serial_groups: ["ruby-2.3"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-2.3-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.3"} - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - name: ruby-2.4-system - public: true - serial_groups: ["ruby-2.4"] - plan: - - get: ci - - get: nokogiri - trigger: true - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.4"} - inputs: - - name: ci - - name: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - name: ruby-2.4-vendored - public: true - serial_groups: ["ruby-2.4"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-2.4-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.4"} - inputs: - - name: ci - - name: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - name: ruby-2.4-valgrind - public: true - serial_groups: ["ruby-2.4"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-2.4-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.4"} - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - name: ruby-2.5-system - public: true - serial_groups: ["ruby-2.5"] - plan: - - get: ci - - get: nokogiri - trigger: true - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.5"} - inputs: - - name: ci - - name: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - name: ruby-2.5-vendored - public: true - serial_groups: ["ruby-2.5"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-2.5-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.5"} - inputs: - - name: ci - - name: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - name: ruby-2.5-valgrind - public: true - serial_groups: ["ruby-2.5"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-2.5-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.5"} - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - name: ruby-2.6-system - public: true - serial_groups: ["ruby-2.6"] - plan: - - get: ci - - get: nokogiri - trigger: true - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - CC_TEST_REPORTER_ID: {{code_climate_reporter_id_nokogiri}} - GIT_BRANCH: master - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - name: ruby-2.6-vendored - public: true - serial_groups: ["ruby-2.6"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-2.6-system"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - name: ruby-2.6-valgrind - public: true - serial_groups: ["ruby-2.6"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["ruby-2.6-vendored"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - - name: jruby-9.1 - public: true - serial_groups: ["jruby"] - plan: - - get: ci - - get: nokogiri - trigger: true - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-9.1"} - inputs: - - name: ci - - name: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - name: jruby-9.2 - public: true - serial_groups: ["jruby"] - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["jruby-9.1"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-9.2"} - inputs: - - name: ci - - name: nokogiri - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - - name: gate - public: true - plan: - - get: nokogiri - trigger: true - passed: - - "ruby-2.3-valgrind" - - "ruby-2.4-valgrind" - - "ruby-2.5-valgrind" - - "ruby-2.6-valgrind" - - "jruby-9.2" - - - - name: ruby-vanilla-system - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: xenial} - inputs: - - name: ci - - name: nokogiri - params: - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - TEST_WITH_APT_REPO_RUBY: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - - name: ruby-libxmlruby-system - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - NOKOGIRI_USE_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - - name: ruby-libxmlruby-valgrind - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: rake-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - on_failure: *notify_failure_to_irc - - - - name: gem-test - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: gem-build - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - on_failure: *notify_failure_to_irc - - task: gem-install-and-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "mri-2.6"} - inputs: - - name: ci - - name: nokogiri - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - on_failure: *notify_failure_to_irc - - - name: gem-test-java - public: true - serial: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["gate"] - - task: gem-build - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-9.2"} - inputs: - - name: ci - - name: nokogiri - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build-java.sh - on_failure: *notify_failure_to_irc - - task: gem-install-and-test - config: - platform: linux - image_resource: - type: docker-image - source: {repository: "flavorjones/nokogiri-test", tag: "jruby-9.2"} - inputs: - - name: ci - - name: nokogiri - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - on_failure: *notify_failure_to_irc - - - - name: build-success - public: true - disable_manual_trigger: true - plan: - - get: nokogiri - trigger: true - version: every - passed: - - ruby-vanilla-system - - ruby-libxmlruby-system - - ruby-libxmlruby-valgrind - - gem-test - - gem-test-java - - put: nokogiri-irc - params: {message: "($BUILD_PIPELINE_NAME/$BUILD_JOB_NAME) The build passed ($BUILD_URL)"} diff --git a/concourse/shared/code-climate.sh b/concourse/shared/code-climate.sh deleted file mode 100644 index 48c283259d..0000000000 --- a/concourse/shared/code-climate.sh +++ /dev/null @@ -1,45 +0,0 @@ -# -# Source this file to have access to two functions: -# -# code-climate-setup -# -# * downloads the CC CLI -# * sets up CC environment variables -# * invokes CC's `before-build` -# -# -# code-climate-shipit -# -# * invokes CC's `after-build` -# -# Note that the env var CC_TEST_REPORTER_ID will need to be set. You -# can find this on your Code Climate project's "Repo Settings" page. -# - -CC_CLI_URI="https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64" -CC_CLI=$(basename ${CC_CLI_URI}) - -function code-climate-setup { - if [ -z "${CC_TEST_REPORTER_ID:-}" ] ; then - echo "WARNING: code-climate-setup: CC_TEST_REPORTER_ID is not set, skipping." - return - fi - - wget --no-verbose ${CC_CLI_URI} - chmod +x ${CC_CLI} - - export CI_NAME="concourse" - - ./${CC_CLI} env - ./${CC_CLI} before-build -} - -function code-climate-shipit { - if [ -z "${CC_TEST_REPORTER_ID:-}" ] ; then - echo "WARNING: code-climate-shipit: CC_TEST_REPORTER_ID is not set, skipping." - return - fi - - # let's remove the `|| true` once all pull requests from pre-simplecov are cleared out - ./${CC_CLI} after-build || true -} diff --git a/concourse/tasks/gem-test/gem-build-java.sh b/concourse/tasks/gem-test/gem-build-java.sh deleted file mode 100755 index 2436d719e2..0000000000 --- a/concourse/tasks/gem-test/gem-build-java.sh +++ /dev/null @@ -1,22 +0,0 @@ -#! /usr/bin/env bash - -set -e -x -u - -pushd nokogiri - - OUTPUT_DIR="../gems" - - # inputs from a real git resource will contain this dir, but we may - # run this task via `fly execute` and so we need to do this to avoid - # cleanup, see extconf.rb do_clean - mkdir -p .git - - bundle install --local || bundle install - - bundle exec rake java gem - - mkdir -p ${OUTPUT_DIR} - cp -v pkg/nokogiri*java.gem ${OUTPUT_DIR} - sha256sum ${OUTPUT_DIR}/* - -popd diff --git a/concourse/tasks/gem-test/gem-build.sh b/concourse/tasks/gem-test/gem-build.sh deleted file mode 100755 index f388cead40..0000000000 --- a/concourse/tasks/gem-test/gem-build.sh +++ /dev/null @@ -1,25 +0,0 @@ -#! /usr/bin/env bash - -set -e -x -u - -pushd nokogiri - - OUTPUT_DIR="../gems" - - # inputs from a real git resource will contain this dir, but we may - # run this task via `fly execute` and so we need to do this to avoid - # cleanup, see extconf.rb do_clean - mkdir -p .git - - bundle install --local || bundle install - - # TODO we're only compiling so that we retrieve libxml2/libxslt - # tarballs, we can do better a couple of different ways - bundle exec rake clean compile - bundle exec rake gem - - mkdir -p ${OUTPUT_DIR} - cp -v pkg/nokogiri*.gem ${OUTPUT_DIR} - sha256sum ${OUTPUT_DIR}/* - -popd diff --git a/concourse/tasks/gem-test/gem-install-and-test.sh b/concourse/tasks/gem-test/gem-install-and-test.sh deleted file mode 100755 index 521067c97d..0000000000 --- a/concourse/tasks/gem-test/gem-install-and-test.sh +++ /dev/null @@ -1,27 +0,0 @@ -#! /usr/bin/env bash - -set -e -x -u - -pushd gems - - gemfile=$(ls *.gem | head -n1) - sha256sum ${gemfile} - gem install ${gemfile} - gem list -d nokogiri - nokogiri -v - -popd - -pushd nokogiri - - export BUNDLE_GEMFILE=$(pwd)/Gemfile - bundle -v - bundle config - - bundle add nokogiri --skip-install - bundle install --local || bundle install - bundle show nokogiri - - bundle exec rake test - -popd diff --git a/concourse/tasks/rake-test/run.ps1 b/concourse/tasks/rake-test/run.ps1 deleted file mode 100644 index 473f3eb725..0000000000 --- a/concourse/tasks/rake-test/run.ps1 +++ /dev/null @@ -1,11 +0,0 @@ -. "c:\var\vcap\packages\windows-ruby-dev-tools\prelude.ps1" - -$env:RUBYOPT = "-rdevkit" - -push-location nokogiri - - system-cmd "gem install bundler" - system-cmd "bundle install" - system-cmd "bundle exec rake compile test" - -pop-location diff --git a/concourse/tasks/rake-test/run.sh b/concourse/tasks/rake-test/run.sh deleted file mode 100755 index 2a27928d46..0000000000 --- a/concourse/tasks/rake-test/run.sh +++ /dev/null @@ -1,60 +0,0 @@ -#! /usr/bin/env bash - -set -e -x -u - -source "$(dirname "$0")/../../shared/code-climate.sh" - -VERSION_INFO=$(ruby -v) -RUBY_ENGINE=$(cut -d" " -f1 <<< "${VERSION_INFO}") -RUBY_VERSION=$(cut -d" " -f2 <<< "${VERSION_INFO}") - -FROZEN_STRING_REF="53f9b66" - -function mri-24-or-greater { - if [[ $RUBY_ENGINE != "ruby" ]] ; then - return 1 - fi - - if echo $RUBY_VERSION | grep "^[0-2]\.[0-3]\." > /dev/null ; then - return 1 - fi - - return 0 -} - -function commit-is-post-frozen-string-support { - if git merge-base --is-ancestor ${FROZEN_STRING_REF} HEAD ; then - return 0 - fi - return 1 -} - -pushd nokogiri - - test_task="test" - - bundle install --local || bundle install - bundle exec rake generate # do this before setting frozen string option, because racc isn't compatible with frozen string literals yet - - if mri-24-or-greater && commit-is-post-frozen-string-support ; then - export RUBYOPT="--enable-frozen-string-literal --debug=frozen-string-literal" - fi - - if [[ ${TEST_WITH_VALGRIND:-} != "" ]] ; then - test_task="test:valgrind" # override - # export TESTOPTS="-v" # see more verbose output to help narrow down warnings - - # always use the CI suppressions if they exist - if [[ -d ../ci/suppressions ]] ; then - rm -rf suppressions - cp -var ../ci/suppressions . - fi - fi - - code-climate-setup - - bundle exec rake compile ${test_task} - - code-climate-shipit - -popd diff --git a/dependencies.yml b/dependencies.yml index 533bfd2631..98eccc8984 100644 --- a/dependencies.yml +++ b/dependencies.yml @@ -1,72 +1,41 @@ libxml2: - version: "2.9.9" - sha256: "94fb70890143e3c6549f265cee93ec064c80a84c42ad0f23e85ee1fd6540a871" - # manually verified checksum: - # - # $ gpg --verify libxml2-2.9.9.tar.gz.asc ports/archives/libxml2-2.9.9.tar.gz - # gpg: Signature made Thu 03 Jan 2019 01:14:47 PM EST - # gpg: using RSA key 15588B26596BEA5D - # gpg: Good signature from "Daniel Veillard (Red Hat work email) " [unknown] - # gpg: aka "Daniel Veillard " [unknown] - # gpg: WARNING: This key is not certified with a trusted signature! - # gpg: There is no indication that the signature belongs to the owner. - # Primary key fingerprint: C744 15BA 7C9C 7F78 F02E 1DC3 4606 B8A5 DE95 BC1F - # Subkey fingerprint: DB46 681B B91A DCEA 170F A2D4 1558 8B26 596B EA5D - # - # using this pgp signature: - # - # -----BEGIN PGP SIGNATURE----- - # - # iQEbBAABAgAGBQJcLlEXAAoJEBVYiyZZa+pd1B8H93xeCYNBLx+eX0xe3qS3ReS/ - # YstjkXKUkmDQYwqQ/9Knmv1P6NX64hQL5E1pZX5sXp36giwXXJ5tCK72VRzektzU - # Kpo+M1/QA9feZQs1GmyKaXYzNwTSJnsdKA9nWqTHZ3bzfdhFSZ0czo94vgY/cz5z - # 9P3FIgeldj1vi8p2rjXbArMFQyaxHnve9LdxI8hbudNSeUw/FEV6mjtXrlZ7MXqn - # hmAkah2JwktOStF5tIlddCRqZeUPUX5flBxT95gfskXXlGEhaoGMXcC3izqqJyV2 - # sx5nY7fnXdkwfYsgRUXYWmDmbs8DnFjXH9lux9O4OWglLonaRoAqFPcOzE3aCw== - # =4qWg - # -----END PGP SIGNATURE----- - # + version: "2.10.3" + sha256: "5d2cc3d78bec3dbe212a9d7fa629ada25a7da928af432c93060ff5c17ee28a9c" + # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.10/libxml2-2.10.3.sha256sum libxslt: - version: "1.1.33" - sha256: "8e36605144409df979cab43d835002f63988f3dc94d5d3537c12796db90e38c8" - # manually verified checksum: - # - # $ gpg --verify libxslt-1.1.33.tar.gz.asc ports/archives/libxslt-1.1.33.tar.gz - # gpg: Signature made Thu 03 Jan 2019 01:30:49 PM EST - # gpg: using RSA key 15588B26596BEA5D - # gpg: Good signature from "Daniel Veillard (Red Hat work email) " [unknown] - # gpg: aka "Daniel Veillard " [unknown] - # gpg: WARNING: This key is not certified with a trusted signature! - # gpg: There is no indication that the signature belongs to the owner. - # Primary key fingerprint: C744 15BA 7C9C 7F78 F02E 1DC3 4606 B8A5 DE95 BC1F - # Subkey fingerprint: DB46 681B B91A DCEA 170F A2D4 1558 8B26 596B EA5D - # - # using this pgp signature: - # - # -----BEGIN PGP SIGNATURE----- - # - # iQEcBAABAgAGBQJcLlTZAAoJEBVYiyZZa+pd9NkIAIf6ei2iSpR/0QOyS71esDq8 - # 407PcUXd/yUjDANm4Uvm7kKK+SbbfBxFIPva4g984Noe1zYMfjK3u3iNs6jykySf - # mN5eo2wNCxsZnqjbnsLgQvn5VCQpPInTddTuGUxgqJyvnR7p785L1oA2EStSPMP4 - # BGZ9dZGlbreK35WzgrhUi0VN5egJW2fpMsw7rTPvfwK+90gXL0DEm8v3WlA7fCDL - # QsvuPm7jPOXxdt5bYrVP8wpNMTJIGqV6jxh7Vvl6kiGLldUjCyoCh0AGXLror0Gs - # sAMlRKJNodpcCYkIWxzjLt74sUciKNrPLHZlXJcclZMONen1GWnVDcv83Tt9n6w= - # =iAm8 - # -----END PGP SIGNATURE----- - # + version: "1.1.37" + sha256: "3a4b27dc8027ccd6146725950336f1ec520928f320f144eb5fa7990ae6123ab4" + # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.37.sha256sum zlib: - version: "1.2.11" - sha256: "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1" + version: "1.2.13" + sha256: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30" # SHA-256 hash provided on http://zlib.net/ libiconv: - version: "1.15" - sha256: "ccf536620a45458d26ba83887a983b96827001e92a13847b45e4925cc8913178" - # gpg: Signature made Fri Feb 3 00:38:12 2017 CET - # gpg: using RSA key 4F494A942E4616C2 - # gpg: Good signature from "Bruno Haible (Open Source Development) " [unknown] - # gpg: WARNING: This key is not certified with a trusted signature! - # gpg: There is no indication that the signature belongs to the owner. - # Primary key fingerprint: 68D9 4D8A AEEA D48A E7DC 5B90 4F49 4A94 2E46 16C2 + version: "1.17" + sha256: "8f74213b56238c85a50a5329f77e06198771e70dd9a739779f4c02f65d971313" + # signature verified by following this path: + # - release announced at https://savannah.gnu.org/forum/forum.php?forum_id=10175 + # - which links to https://savannah.gnu.org/users/haible as the releaser + # - which links to https://savannah.gnu.org/people/viewgpg.php?user_id=1871 as the gpg key + # + # So: + # - wget -q -O - https://savannah.gnu.org/people/viewgpg.php?user_id=1871 | gpg --import + # gpg: key F5BE8B267C6A406D: 1 signature not checked due to a missing key + # gpg: key F5BE8B267C6A406D: public key "Bruno Haible (Open Source Development) " imported + # gpg: Total number processed: 1 + # gpg: imported: 1 + # gpg: marginals needed: 3 completes needed: 1 trust model: pgp + # gpg: depth: 0 valid: 4 signed: 0 trust: 0-, 0q, 0n, 0m, 0f, 4u + # gpg: next trustdb check due at 2024-05-09 + # - gpg --verify libiconv-1.17.tar.gz.sig ports/archives/libiconv-1.17.tar.gz + # gpg: Signature made Sun 15 May 2022 11:26:42 AM EDT + # gpg: using RSA key 9001B85AF9E1B83DF1BDA942F5BE8B267C6A406D + # gpg: Good signature from "Bruno Haible (Open Source Development) " [unknown] + # gpg: WARNING: This key is not certified with a trusted signature! + # gpg: There is no indication that the signature belongs to the owner. + # Primary key fingerprint: 9001 B85A F9E1 B83D F1BD A942 F5BE 8B26 7C6A 406D + # + # And this sha256sum is calculated from that verified tarball. diff --git a/ext/java/nokogiri/EncodingHandler.java b/ext/java/nokogiri/EncodingHandler.java deleted file mode 100644 index 4adce465b3..0000000000 --- a/ext/java/nokogiri/EncodingHandler.java +++ /dev/null @@ -1,124 +0,0 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package nokogiri; - -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; - -import java.util.HashMap; - -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.RubyObject; -import org.jruby.anno.JRubyClass; -import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; - -/** - * Stub class to satisfy unit tests. I'm not sure where this class is - * meant to be used. As coded it won't really interact with any other - * classes and will have no effect on character encodings reported by - * documents being parsed. - * - * @author Patrick Mahoney - */ -@JRubyClass(name="Nokogiri::EncodingHandler") -public class EncodingHandler extends RubyObject { - protected static HashMap map = new HashMap(); - static { - addInitial(); - } - - protected String name; - - protected static void addInitial() { - map.put("UTF-8", "UTF-8"); - } - - public EncodingHandler(Ruby ruby, RubyClass klass, String value) { - super(ruby, klass); - name = value; - } - - @JRubyMethod(name="[]", meta=true) - public static IRubyObject get(ThreadContext context, - IRubyObject _klass, - IRubyObject keyObj) { - Ruby ruby = context.getRuntime(); - String key = keyObj.toString(); - String value = map.get(key); - if (value == null) - return ruby.getNil(); - - return new EncodingHandler( - ruby, - getNokogiriClass(ruby, "Nokogiri::EncodingHandler"), - value); - } - - @JRubyMethod(meta=true) - public static IRubyObject delete(ThreadContext context, - IRubyObject _klass, - IRubyObject keyObj) { - String key = keyObj.toString(); - String value = map.remove(key); - if (value == null) - return context.getRuntime().getNil(); - return context.getRuntime().newString(value); - } - - @JRubyMethod(name="clear_aliases!", meta=true) - public static IRubyObject clear_aliases(ThreadContext context, - IRubyObject _klass) { - map.clear(); - addInitial(); - return context.getRuntime().getNil(); - } - - @JRubyMethod(meta=true) - public static IRubyObject alias(ThreadContext context, - IRubyObject _klass, - IRubyObject orig, - IRubyObject alias) { - String value = map.get(orig.toString()); - if (value != null) - map.put(alias.toString(), value); - - return context.getRuntime().getNil(); - } - - @JRubyMethod - public IRubyObject name(ThreadContext context) { - return context.getRuntime().newString(name); - } -} diff --git a/ext/java/nokogiri/Html4Document.java b/ext/java/nokogiri/Html4Document.java new file mode 100644 index 0000000000..b0de1adf1e --- /dev/null +++ b/ext/java/nokogiri/Html4Document.java @@ -0,0 +1,157 @@ +package nokogiri; + +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Helpers; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Attr; +import org.w3c.dom.Document; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import nokogiri.internals.HtmlDomParserContext; + +import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; + +/** + * Class for Nokogiri::HTML4::Document. + * + * @author sergio + * @author Yoko Harada + */ +@JRubyClass(name = "Nokogiri::HTML4::Document", parent = "Nokogiri::XML::Document") +public class Html4Document extends XmlDocument +{ + private static final long serialVersionUID = 1L; + + private static final String DEFAULT_CONTENT_TYPE = "html"; + private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN"; + private static final String DEFAULT_SYTEM_ID = "http://www.w3.org/TR/html4/strict.dtd"; + + private String parsed_encoding = null; + + public + Html4Document(Ruby ruby, RubyClass klazz) + { + super(ruby, klazz); + } + + public + Html4Document(Ruby runtime, Document document) + { + this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document); + } + + public + Html4Document(Ruby ruby, RubyClass klazz, Document doc) + { + super(ruby, klazz, doc); + } + + @JRubyMethod(name = "new", meta = true, rest = true, required = 0) + public static IRubyObject + rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args) + { + final Ruby runtime = context.runtime; + Html4Document htmlDocument; + try { + Document docNode = createNewDocument(runtime); + htmlDocument = (Html4Document) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(runtime, (RubyClass) klazz); + htmlDocument.setDocumentNode(context.runtime, docNode); + } catch (Exception ex) { + throw asRuntimeError(runtime, "couldn't create document: ", ex); + } + + Helpers.invoke(context, htmlDocument, "initialize", args); + + return htmlDocument; + } + + public IRubyObject + getInternalSubset(ThreadContext context) + { + IRubyObject internalSubset = super.getInternalSubset(context); + + // html documents are expected to have a default internal subset + // the default values are the same ones used when the following + // feature is turned on + // "http://cyberneko.org/html/features/insert-doctype" + // the reason we don't turn it on, is because it overrides the document's + // declared doctype declaration. + + if (internalSubset.isNil()) { + internalSubset = XmlDtd.newEmpty(context.getRuntime(), + getDocument(), + context.getRuntime().newString(DEFAULT_CONTENT_TYPE), + context.getRuntime().newString(DEFAULT_PUBLIC_ID), + context.getRuntime().newString(DEFAULT_SYTEM_ID)); + setInternalSubset(internalSubset); + } + + return internalSubset; + } + + @Override + void + init(Ruby runtime, Document document) + { + stabilizeTextContent(document); + document.normalize(); + setInstanceVariable("@decorators", runtime.getNil()); + if (document.getDocumentElement() != null) { + stabilizeAttrs(document.getDocumentElement()); + } + } + + private static void + stabilizeAttrs(Node node) + { + if (node.hasAttributes()) { + NamedNodeMap nodeMap = node.getAttributes(); + for (int i = 0; i < nodeMap.getLength(); i++) { + Node n = nodeMap.item(i); + if (n instanceof Attr) { + stabilizeAttr((Attr) n); + } + } + } + NodeList children = node.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + stabilizeAttrs(children.item(i)); + } + } + + public void + setParsedEncoding(String encoding) + { + parsed_encoding = encoding; + } + + public String + getPraedEncoding() + { + return parsed_encoding; + } + + @JRubyMethod(meta = true, required = 4) + public static IRubyObject + read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args) + { + HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]); + ctx.setIOInputSource(context, args[0], args[1]); + return ctx.parse(context, (RubyClass) klass, args[1]); + } + + @JRubyMethod(meta = true, required = 4) + public static IRubyObject + read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args) + { + HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]); + ctx.setStringInputSource(context, args[0], args[1]); + return ctx.parse(context, (RubyClass) klass, args[1]); + } +} diff --git a/ext/java/nokogiri/Html4ElementDescription.java b/ext/java/nokogiri/Html4ElementDescription.java new file mode 100644 index 0000000000..8613245914 --- /dev/null +++ b/ext/java/nokogiri/Html4ElementDescription.java @@ -0,0 +1,133 @@ +package nokogiri; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import net.sourceforge.htmlunit.cyberneko.HTMLElements; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; + +/** + * Class for Nokogiri::HTML4::ElementDescription. + * + * @author Patrick Mahoney + */ +@JRubyClass(name = "Nokogiri::HTML4::ElementDescription") +public class Html4ElementDescription extends RubyObject +{ + private static final long serialVersionUID = 1L; + private static final HTMLElements htmlElements_ = new HTMLElements(); + + /** + * Stores memoized hash of element -> list of valid subelements. + */ + static protected Map> subElements; + static + { + Map> _subElements = + new HashMap>(); + subElements = Collections.synchronizedMap(_subElements); + } + + protected HTMLElements.Element element; + + public + Html4ElementDescription(Ruby runtime, RubyClass rubyClass) + { + super(runtime, rubyClass); + } + + /** + * Lookup the list of sub elements of code. If not + * already stored, iterate through all elements to find valid + * subelements; save this list and return it. + */ + protected static List + findSubElements(HTMLElements.Element elem) + { + List subs = subElements.get(elem.code); + + if (subs == null) { + subs = new ArrayList(); + + /* + * A bit of a hack. NekoHtml source code shows that + * UNKNOWN is the highest value element. We cannot access + * the list of elements directly because it's protected. + */ + for (short c = 0; c < HTMLElements.UNKNOWN; c++) { + HTMLElements.Element maybe_sub = htmlElements_.getElement(c); + if (maybe_sub != null && maybe_sub.isParent(elem)) { + subs.add(maybe_sub.name); + } + } + + subElements.put(elem.code, subs); + } + + return subs; + } + + @JRubyMethod(name = "[]", meta = true) + public static IRubyObject + get(ThreadContext context, + IRubyObject klazz, IRubyObject name) + { + + // nekohtml will return an element even for invalid names, which breaks `test_fetch_nonexistent' + // see getElement() in HTMLElements.java + HTMLElements.Element elem = htmlElements_.getElement(name.asJavaString(), htmlElements_.NO_SUCH_ELEMENT); + if (elem == htmlElements_.NO_SUCH_ELEMENT) { + return context.nil; + } + + Html4ElementDescription desc = + new Html4ElementDescription(context.getRuntime(), (RubyClass)klazz); + desc.element = elem; + return desc; + } + + @JRubyMethod() + public IRubyObject + name(ThreadContext context) + { + return context.getRuntime().newString(element.name.toLowerCase()); + } + + @JRubyMethod(name = "inline?") + public IRubyObject + inline_eh(ThreadContext context) + { + return context.getRuntime().newBoolean(element.isInline()); + } + + @JRubyMethod(name = "empty?") + public IRubyObject + empty_eh(ThreadContext context) + { + return context.getRuntime().newBoolean(element.isEmpty()); + } + + @JRubyMethod() + public IRubyObject + sub_elements(ThreadContext context) + { + Ruby ruby = context.getRuntime(); + List subs = findSubElements(element); + IRubyObject[] ary = new IRubyObject[subs.size()]; + for (int i = 0; i < subs.size(); ++i) { + ary[i] = ruby.newString(subs.get(i)); + } + + return ruby.newArray(ary); + } + +} diff --git a/ext/java/nokogiri/Html4EntityLookup.java b/ext/java/nokogiri/Html4EntityLookup.java new file mode 100644 index 0000000000..5d98f1f8c9 --- /dev/null +++ b/ext/java/nokogiri/Html4EntityLookup.java @@ -0,0 +1,63 @@ +package nokogiri; + +import static org.jruby.runtime.Helpers.invoke; + +import net.sourceforge.htmlunit.cyberneko.HTMLEntitiesParser; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; + +/** + * Class for Nokogiri::HTML4::EntityLookup. + * + * @author Patrick Mahoney + */ +@JRubyClass(name = "Nokogiri::HTML4::EntityLookup") +public class Html4EntityLookup extends RubyObject +{ + private static final long serialVersionUID = 1L; + + public + Html4EntityLookup(Ruby runtime, RubyClass rubyClass) + { + super(runtime, rubyClass); + } + + /** + * Looks up an HTML entity key. + * + * The description is a bit lacking. + */ + @JRubyMethod() + public IRubyObject + get(ThreadContext context, IRubyObject key) + { + Ruby ruby = context.getRuntime(); + String name = key.toString(); + + HTMLEntitiesParser parser = new HTMLEntitiesParser(); + for (int j = 0 ; j < name.length() ; j++) { + if (!parser.parse(name.charAt(j))) { + break; + } + } + String match = parser.getMatch(); + + if (match == null) { return ruby.getNil(); } + + int val = match.charAt(0); + + IRubyObject edClass = + ruby.getClassFromPath("Nokogiri::HTML4::EntityDescription"); + IRubyObject edObj = invoke(context, edClass, "new", + ruby.newFixnum(val), ruby.newString(name), + ruby.newString(name + " entity")); + + return edObj; + } + +} diff --git a/ext/java/nokogiri/Html4SaxParserContext.java b/ext/java/nokogiri/Html4SaxParserContext.java new file mode 100644 index 0000000000..ed34e5b313 --- /dev/null +++ b/ext/java/nokogiri/Html4SaxParserContext.java @@ -0,0 +1,289 @@ +package nokogiri; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.xerces.parsers.AbstractSAXParser; +import net.sourceforge.htmlunit.cyberneko.parsers.SAXParser; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyFixnum; +import org.jruby.RubyString; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.xml.sax.SAXException; + +import nokogiri.internals.NokogiriHandler; +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; + +/** + * Class for Nokogiri::HTML4::SAX::ParserContext. + * + * @author serabe + * @author Patrick Mahoney + * @author Yoko Harada + */ +@JRubyClass(name = "Nokogiri::HTML4::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext") +public class Html4SaxParserContext extends XmlSaxParserContext +{ + private static final long serialVersionUID = 1L; + + static Html4SaxParserContext + newInstance(final Ruby runtime, final RubyClass klazz) + { + Html4SaxParserContext instance = new Html4SaxParserContext(runtime, klazz); + instance.initialize(runtime); + return instance; + } + + public + Html4SaxParserContext(Ruby ruby, RubyClass rubyClass) + { + super(ruby, rubyClass); + } + + @Override + protected AbstractSAXParser + createParser() throws SAXException + { + SAXParser parser = new SAXParser(); + + try { + parser.setProperty( + "http://cyberneko.org/html/properties/names/elems", "lower"); + parser.setProperty( + "http://cyberneko.org/html/properties/names/attrs", "lower"); + + // NekoHTML should not try to guess the encoding based on the meta + // tags or other information in the document. This is already + // handled by the EncodingReader. + parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true); + return parser; + } catch (SAXException ex) { + throw new SAXException( + "Problem while creating HTML4 SAX Parser: " + ex.toString()); + } + } + + @JRubyMethod(name = "memory", meta = true) + public static IRubyObject + parse_memory(ThreadContext context, + IRubyObject klazz, + IRubyObject data, + IRubyObject encoding) + { + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klazz); + String javaEncoding = findEncodingName(context, encoding); + if (javaEncoding != null) { + CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding); + ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes()); + ctx.setInputSource(istream); + ctx.getInputSource().setEncoding(javaEncoding); + } + return ctx; + } + + public enum EncodingType { + NONE(0, "NONE"), + UTF_8(1, "UTF-8"), + UTF16LE(2, "UTF16LE"), + UTF16BE(3, "UTF16BE"), + UCS4LE(4, "UCS4LE"), + UCS4BE(5, "UCS4BE"), + EBCDIC(6, "EBCDIC"), + UCS4_2143(7, "ICS4-2143"), + UCS4_3412(8, "UCS4-3412"), + UCS2(9, "UCS2"), + ISO_8859_1(10, "ISO-8859-1"), + ISO_8859_2(11, "ISO-8859-2"), + ISO_8859_3(12, "ISO-8859-3"), + ISO_8859_4(13, "ISO-8859-4"), + ISO_8859_5(14, "ISO-8859-5"), + ISO_8859_6(15, "ISO-8859-6"), + ISO_8859_7(16, "ISO-8859-7"), + ISO_8859_8(17, "ISO-8859-8"), + ISO_8859_9(18, "ISO-8859-9"), + ISO_2022_JP(19, "ISO-2022-JP"), + SHIFT_JIS(20, "SHIFT-JIS"), + EUC_JP(21, "EUC-JP"), + ASCII(22, "ASCII"); + + private final int value; + private final String name; + + EncodingType(int value, String name) + { + this.value = value; + this.name = name; + } + + public int getValue() + { + return value; + } + + public String toString() + { + return name; + } + + private static transient EncodingType[] values; + + // NOTE: assuming ordinal == value + static EncodingType get(final int ordinal) + { + EncodingType[] values = EncodingType.values; + if (values == null) { + values = EncodingType.values(); + EncodingType.values = values; + } + if (ordinal >= 0 && ordinal < values.length) { + return values[ordinal]; + } + return null; + } + + } + + private static String + findEncodingName(final int value) + { + EncodingType type = EncodingType.get(value); + if (type == null) { return null; } + assert type.value == value; + return type.name; + } + + private static String + findEncodingName(ThreadContext context, IRubyObject encoding) + { + String rubyEncoding = null; + if (encoding instanceof RubyString) { + rubyEncoding = rubyStringToString((RubyString) encoding); + } else if (encoding instanceof RubyFixnum) { + rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding)); + } + if (rubyEncoding == null) { return null; } + try { + return Charset.forName(rubyEncoding).displayName(); + } catch (UnsupportedCharsetException e) { + throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported"); + } catch (IllegalCharsetNameException e) { + throw context.getRuntime().newEncodingError(e.getMessage()); + } + } + + private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+", + Pattern.CASE_INSENSITIVE); + + private static CharSequence + applyEncoding(final String input, final String enc) + { + int start_pos = 0; + int end_pos = 0; + if (containsIgnoreCase(input, "charset")) { + Matcher m = CHARSET_PATTERN.matcher(input); + while (m.find()) { + start_pos = m.start(); + end_pos = m.end(); + } + } + if (start_pos != end_pos) { + return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc); + } + return input; + } + + private static boolean + containsIgnoreCase(final String str, final String sub) + { + final int len = sub.length(); + final int max = str.length() - len; + + if (len == 0) { return true; } + final char c0Lower = Character.toLowerCase(sub.charAt(0)); + final char c0Upper = Character.toUpperCase(sub.charAt(0)); + + for (int i = 0; i <= max; i++) { + final char ch = str.charAt(i); + if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) { + continue; // first char doesn't match + } + + if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) { + return true; + } + } + return false; + } + + @JRubyMethod(name = "file", meta = true) + public static IRubyObject + parse_file(ThreadContext context, + IRubyObject klass, + IRubyObject data, + IRubyObject encoding) + { + if (!(data instanceof RubyString)) { + throw context.getRuntime().newTypeError("data must be kind_of String"); + } + if (!(encoding instanceof RubyString)) { + throw context.getRuntime().newTypeError("data must be kind_of String"); + } + + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass); + ctx.setInputSourceFile(context, data); + String javaEncoding = findEncodingName(context, encoding); + if (javaEncoding != null) { + ctx.getInputSource().setEncoding(javaEncoding); + } + return ctx; + } + + @JRubyMethod(name = "io", meta = true) + public static IRubyObject + parse_io(ThreadContext context, + IRubyObject klass, + IRubyObject data, + IRubyObject encoding) + { + if (!(encoding instanceof RubyFixnum)) { + throw context.getRuntime().newTypeError("encoding must be kind_of String"); + } + + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass); + ctx.setIOInputSource(context, data, context.nil); + String javaEncoding = findEncodingName(context, encoding); + if (javaEncoding != null) { + ctx.getInputSource().setEncoding(javaEncoding); + } + return ctx; + } + + /** + * Create a new parser context that will read from a raw input stream. + * Meant to be run in a separate thread by Html4SaxPushParser. + */ + static Html4SaxParserContext + parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) + { + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(runtime, klass); + ctx.setInputSource(stream); + return ctx; + } + + @Override + protected void + preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) + { + // this function is meant to be empty. It overrides the one in XmlSaxParserContext + } + +} diff --git a/ext/java/nokogiri/Html4SaxPushParser.java b/ext/java/nokogiri/Html4SaxPushParser.java new file mode 100644 index 0000000000..c338fd3696 --- /dev/null +++ b/ext/java/nokogiri/Html4SaxPushParser.java @@ -0,0 +1,213 @@ +package nokogiri; + +import nokogiri.internals.ClosedStreamException; +import nokogiri.internals.NokogiriBlockingQueueInputStream; +import nokogiri.internals.NokogiriHelpers; +import nokogiri.internals.ParserContext; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.*; + +import static nokogiri.XmlSaxPushParser.terminateExecution; +import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; +import static org.jruby.runtime.Helpers.invoke; + +/** + * Class for Nokogiri::HTML4::SAX::PushParser + * + * @author + * @author Piotr Szmielew - based on Nokogiri::XML::SAX::PushParser + */ +@JRubyClass(name = "Nokogiri::HTML4::SAX::PushParser") +public class Html4SaxPushParser extends RubyObject +{ + private static final long serialVersionUID = 1L; + + ParserContext.Options options; + IRubyObject saxParser; + + NokogiriBlockingQueueInputStream stream; + + private ParserTask parserTask = null; + private FutureTask futureTask = null; + private ExecutorService executor = null; + + public + Html4SaxPushParser(Ruby ruby, RubyClass rubyClass) + { + super(ruby, rubyClass); + } + + @SuppressWarnings("deprecation") + @Override + public void + finalize() + { + try { + terminateImpl(); + } catch (Exception e) { /* ignored */ } + } + + @JRubyMethod + public IRubyObject + initialize_native(final ThreadContext context, + IRubyObject saxParser, + IRubyObject fileName, + IRubyObject encoding) + { + // NOTE: Silently skips provided encoding + options = new ParserContext.Options(0); + this.saxParser = saxParser; + return this; + } + + private transient IRubyObject parse_options; + + private IRubyObject + parse_options(final ThreadContext context) + { + if (parse_options == null) { + parse_options = invoke(context, context.runtime.getClassFromPath("Nokogiri::XML::ParseOptions"), "new"); + } + return parse_options; + } + + @JRubyMethod(name = "options") + public IRubyObject + getOptions(ThreadContext context) + { + return invoke(context, parse_options(context), "options"); + } + + @JRubyMethod(name = "options=") + public IRubyObject + setOptions(ThreadContext context, IRubyObject opts) + { + invoke(context, parse_options(context), "options=", opts); + options = new ParserContext.Options(opts.convertToInteger().getLongValue()); + return getOptions(context); + } + + @JRubyMethod + public IRubyObject + native_write(ThreadContext context, IRubyObject chunk, IRubyObject isLast) + { + try { + initialize_task(context); + } catch (IOException e) { + throw context.getRuntime().newRuntimeError(e.getMessage()); + } + final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk); + if (data == null) { + terminateTask(context.runtime); + throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::HTML4::SyntaxError + } + + int errorCount0 = parserTask.getErrorCount(); + + if (isLast.isTrue()) { + IRubyObject document = invoke(context, this, "document"); + invoke(context, document, "end_document"); + terminateTask(context.runtime); + } else { + try { + Future task = stream.addChunk(data); + task.get(); + } catch (ClosedStreamException ex) { + // this means the stream is closed, ignore this exception + } catch (Exception e) { + throw context.runtime.newRuntimeError(e.getMessage()); + } + + } + + if (!options.recover && parserTask.getErrorCount() > errorCount0) { + terminateTask(context.runtime); + throw parserTask.getLastError().toThrowable(); + } + + return this; + } + + @SuppressWarnings("unchecked") + private void + initialize_task(ThreadContext context) throws IOException + { + if (futureTask == null || stream == null) { + stream = new NokogiriBlockingQueueInputStream(); + + assert saxParser != null : "saxParser null"; + parserTask = new ParserTask(context, saxParser, stream); + futureTask = new FutureTask((Callable) parserTask); + executor = Executors.newSingleThreadExecutor(new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setName("Html4SaxPushParser"); + t.setDaemon(true); + return t; + } + }); + executor.submit(futureTask); + } + } + + private void + terminateTask(final Ruby runtime) + { + if (executor == null) { return; } + + try { + terminateImpl(); + } catch (InterruptedException e) { + throw runtime.newRuntimeError(e.toString()); + } catch (Exception e) { + throw runtime.newRuntimeError(e.toString()); + } + } + + private synchronized void + terminateImpl() throws InterruptedException, ExecutionException + { + terminateExecution(executor, stream, futureTask); + + executor = null; + stream = null; + futureTask = null; + } + + private static Html4SaxParserContext + parse(final Ruby runtime, final InputStream stream) + { + RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML4::SAX::ParserContext"); + return Html4SaxParserContext.parse_stream(runtime, klazz, stream); + } + + static class ParserTask extends XmlSaxPushParser.ParserTask /* */ + { + + private + ParserTask(ThreadContext context, IRubyObject handler, InputStream stream) + { + super(context, handler, parse(context.runtime, stream), stream); + } + + @Override + public Html4SaxParserContext + call() throws Exception + { + return (Html4SaxParserContext) super.call(); + } + + } + +} diff --git a/ext/java/nokogiri/HtmlDocument.java b/ext/java/nokogiri/HtmlDocument.java deleted file mode 100644 index ff5245434f..0000000000 --- a/ext/java/nokogiri/HtmlDocument.java +++ /dev/null @@ -1,190 +0,0 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package nokogiri; - -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.anno.JRubyClass; -import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.Arity; -import org.jruby.runtime.Helpers; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Attr; -import org.w3c.dom.Document; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - -import nokogiri.internals.HtmlDomParserContext; - -/** - * Class for Nokogiri::HTML::Document. - * - * @author sergio - * @author Yoko Harada - */ -@JRubyClass(name="Nokogiri::HTML::Document", parent="Nokogiri::XML::Document") -public class HtmlDocument extends XmlDocument { - private static final String DEFAULT_CONTENT_TYPE = "html"; - private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN"; - private static final String DEFAULT_SYTEM_ID = "http://www.w3.org/TR/html4/strict.dtd"; - - private String parsed_encoding = null; - - public HtmlDocument(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); - } - - public HtmlDocument(Ruby ruby, RubyClass klazz, Document doc) { - super(ruby, klazz, doc); - } - - @JRubyMethod(name="new", meta = true, rest = true, required=0) - public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, - IRubyObject[] args) { - HtmlDocument htmlDocument; - try { - Document docNode = createNewDocument(); - htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass) klazz); - htmlDocument.setDocumentNode(context, docNode); - } catch (Exception ex) { - throw context.getRuntime().newRuntimeError("couldn't create document: " + ex); - } - - Helpers.invoke(context, htmlDocument, "initialize", args); - - return htmlDocument; - } - - public IRubyObject getInternalSubset(ThreadContext context) { - IRubyObject internalSubset = super.getInternalSubset(context); - - // html documents are expected to have a default internal subset - // the default values are the same ones used when the following - // feature is turned on - // "http://cyberneko.org/html/features/insert-doctype" - // the reason we don't turn it on, is because it overrides the document's - // declared doctype declaration. - - if (internalSubset.isNil()) { - internalSubset = XmlDtd.newEmpty(context.getRuntime(), - getDocument(), - context.getRuntime().newString(DEFAULT_CONTENT_TYPE), - context.getRuntime().newString(DEFAULT_PUBLIC_ID), - context.getRuntime().newString(DEFAULT_SYTEM_ID)); - setInternalSubset(internalSubset); - } - - return internalSubset; - } - - public static IRubyObject do_parse(ThreadContext context, - IRubyObject klass, - IRubyObject[] args) { - Ruby ruby = context.getRuntime(); - Arity.checkArgumentCount(ruby, args, 4, 4); - HtmlDomParserContext ctx = - new HtmlDomParserContext(ruby, args[2], args[3]); - ctx.setInputSource(context, args[0], args[1]); - return ctx.parse(context, klass, args[1]); - } - - public void setDocumentNode(ThreadContext context, Node node) { - super.setNode(context, node); - Ruby runtime = context.getRuntime(); - if (node != null) { - Document document = (Document)node; - document.normalize(); - stabilzeAttrValue(document.getDocumentElement()); - } - setInstanceVariable("@decorators", runtime.getNil()); - } - - private void stabilzeAttrValue(Node node) { - if (node == null) return; - if (node.hasAttributes()) { - NamedNodeMap nodeMap = node.getAttributes(); - for (int i=0; i - */ -@JRubyClass(name="Nokogiri::HTML::ElementDescription") -public class HtmlElementDescription extends RubyObject { - - /** - * Stores memoized hash of element -> list of valid subelements. - */ - static protected Map> subElements; - static { - Map> _subElements = - new HashMap>(); - subElements = Collections.synchronizedMap(_subElements); - } - - protected HTMLElements.Element element; - - public HtmlElementDescription(Ruby runtime, RubyClass rubyClass) { - super(runtime, rubyClass); - } - - /** - * Lookup the list of sub elements of code. If not - * already stored, iterate through all elements to find valid - * subelements; save this list and return it. - */ - protected static List findSubElements(HTMLElements.Element elem) { - List subs = subElements.get(elem.code); - - if (subs == null) { - subs = new ArrayList(); - - /* - * A bit of a hack. NekoHtml source code shows that - * UNKNOWN is the highest value element. We cannot access - * the list of elements directly because it's protected. - */ - for (short c = 0; c < HTMLElements.UNKNOWN; c++) { - HTMLElements.Element maybe_sub = - HTMLElements.getElement(c); - if (maybe_sub.isParent(elem)) { - subs.add(maybe_sub.name); - } - } - - subElements.put(elem.code, subs); - } - - return subs; - } - - @JRubyMethod(name="[]", meta=true) - public static IRubyObject get(ThreadContext context, - IRubyObject klazz, IRubyObject name) { - - // nekohtml will return an element even for invalid names, see - // http://sourceforge.net/p/nekohtml/code/HEAD/tree/trunk/src/org/cyberneko/html/HTMLElements.java#l514 - // which breaks `test_fetch_nonexistent' - HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT); - if (elem == HTMLElements.NO_SUCH_ELEMENT) - return context.nil; - - HtmlElementDescription desc = - new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz); - desc.element = elem; - return desc; - } - - @JRubyMethod() - public IRubyObject name(ThreadContext context) { - return context.getRuntime().newString(element.name.toLowerCase()); - } - - @JRubyMethod(name="inline?") - public IRubyObject inline_eh(ThreadContext context) { - return context.getRuntime().newBoolean(element.isInline()); - } - - @JRubyMethod(name="empty?") - public IRubyObject empty_eh(ThreadContext context) { - return context.getRuntime().newBoolean(element.isEmpty()); - } - - @JRubyMethod() - public IRubyObject sub_elements(ThreadContext context) { - Ruby ruby = context.getRuntime(); - List subs = findSubElements(element); - IRubyObject[] ary = new IRubyObject[subs.size()]; - for (int i = 0; i < subs.size(); ++i) { - ary[i] = ruby.newString(subs.get(i)); - } - - return ruby.newArray(ary); - } - -} diff --git a/ext/java/nokogiri/HtmlEntityLookup.java b/ext/java/nokogiri/HtmlEntityLookup.java deleted file mode 100644 index 20ab5fd33d..0000000000 --- a/ext/java/nokogiri/HtmlEntityLookup.java +++ /dev/null @@ -1,79 +0,0 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package nokogiri; - -import static org.jruby.runtime.Helpers.invoke; - -import org.cyberneko.html.HTMLEntities; -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.RubyObject; -import org.jruby.anno.JRubyClass; -import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; - -/** - * Class for Nokogiri::HTML::EntityLookup. - * - * @author Patrick Mahoney - */ -@JRubyClass(name="Nokogiri::HTML::EntityLookup") -public class HtmlEntityLookup extends RubyObject { - - public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) { - super(runtime, rubyClass); - } - - /** - * Looks up an HTML entity key. - * - * The description is a bit lacking. - */ - @JRubyMethod() - public IRubyObject get(ThreadContext context, IRubyObject key) { - Ruby ruby = context.getRuntime(); - String name = key.toString(); - int val = HTMLEntities.get(name); - if (val == -1) return ruby.getNil(); - - IRubyObject edClass = - ruby.getClassFromPath("Nokogiri::HTML::EntityDescription"); - IRubyObject edObj = invoke(context, edClass, "new", - ruby.newFixnum(val), ruby.newString(name), - ruby.newString(name + " entity")); - - return edObj; - } - -} diff --git a/ext/java/nokogiri/HtmlSaxParserContext.java b/ext/java/nokogiri/HtmlSaxParserContext.java deleted file mode 100644 index 2fa4ee71c5..0000000000 --- a/ext/java/nokogiri/HtmlSaxParserContext.java +++ /dev/null @@ -1,252 +0,0 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package nokogiri; - -import static nokogiri.internals.NokogiriHelpers.rubyStringToString; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.nio.charset.Charset; -import java.nio.charset.IllegalCharsetNameException; -import java.nio.charset.UnsupportedCharsetException; -import java.util.EnumSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import nokogiri.internals.NokogiriHandler; - -import org.apache.xerces.parsers.AbstractSAXParser; -import org.cyberneko.html.parsers.SAXParser; -import org.jruby.*; -import org.jruby.anno.JRubyClass; -import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.xml.sax.SAXException; - -/** - * Class for Nokogiri::HTML::SAX::ParserContext. - * - * @author serabe - * @author Patrick Mahoney - * @author Yoko Harada - */ - -@JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext") -public class HtmlSaxParserContext extends XmlSaxParserContext { - - public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) { - super(ruby, rubyClass); - } - - @Override - protected AbstractSAXParser createParser() throws SAXException { - SAXParser parser = new SAXParser(); - - try{ - parser.setProperty( - "http://cyberneko.org/html/properties/names/elems", "lower"); - parser.setProperty( - "http://cyberneko.org/html/properties/names/attrs", "lower"); - return parser; - } catch(SAXException ex) { - throw new SAXException( - "Problem while creating HTML SAX Parser: " + ex.toString()); - } - } - - @JRubyMethod(name="memory", meta=true) - public static IRubyObject parse_memory(ThreadContext context, - IRubyObject klazz, - IRubyObject data, - IRubyObject encoding) { - HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz); - ctx.initialize(context.getRuntime()); - String javaEncoding = findEncoding(context, encoding); - if (javaEncoding != null) { - String input = applyEncoding(rubyStringToString(data), javaEncoding); - ByteArrayInputStream istream = new ByteArrayInputStream(input.getBytes()); - ctx.setInputSource(istream); - ctx.getInputSource().setEncoding(javaEncoding); - } - return ctx; - } - - public enum EncodingType { - NONE(0, "NONE"), - UTF_8(1, "UTF-8"), - UTF16LE(2, "UTF16LE"), - UTF16BE(3, "UTF16BE"), - UCS4LE(4, "UCS4LE"), - UCS4BE(5, "UCS4BE"), - EBCDIC(6, "EBCDIC"), - UCS4_2143(7, "ICS4-2143"), - UCS4_3412(8, "UCS4-3412"), - UCS2(9, "UCS2"), - ISO_8859_1(10, "ISO-8859-1"), - ISO_8859_2(11, "ISO-8859-2"), - ISO_8859_3(12, "ISO-8859-3"), - ISO_8859_4(13, "ISO-8859-4"), - ISO_8859_5(14, "ISO-8859-5"), - ISO_8859_6(15, "ISO-8859-6"), - ISO_8859_7(16, "ISO-8859-7"), - ISO_8859_8(17, "ISO-8859-8"), - ISO_8859_9(18, "ISO-8859-9"), - ISO_2022_JP(19, "ISO-2022-JP"), - SHIFT_JIS(20, "SHIFT-JIS"), - EUC_JP(21, "EUC-JP"), - ASCII(22, "ASCII"); - - private final int value; - private final String name; - - EncodingType(int value, String name) { - this.value = value; - this.name = name; - } - - public int getValue() { - return value; - } - - public String toString() { - return name; - } - } - - private static String findName(final int value) { - for (EncodingType type : EncodingType.values()) { - if (type.getValue() == value) return type.toString(); - } - return null; - } - - private static String findEncoding(ThreadContext context, IRubyObject encoding) { - String rubyEncoding = null; - if (encoding instanceof RubyString) { - rubyEncoding = rubyStringToString(encoding); - } - else if (encoding instanceof RubyFixnum) { - int value = RubyFixnum.fix2int((RubyFixnum) encoding); - rubyEncoding = findName(value); - } - if (rubyEncoding == null) return null; - try { - return Charset.forName(rubyEncoding).displayName(); - } - catch (UnsupportedCharsetException e) { - throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported"); - } - catch (IllegalCharsetNameException e) { - throw context.getRuntime().newInvalidEncoding(e.getMessage()); - } - } - - private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+"); - - private static String applyEncoding(String input, String enc) { - String str = input.toLowerCase(); - int start_pos = 0; - int end_pos = 0; - if (input.contains("meta") && input.contains("charset")) { - Matcher m = CHARSET_PATTERN.matcher(str); - while (m.find()) { - start_pos = m.start(); - end_pos = m.end(); - } - } - if (start_pos != end_pos) { - String substr = input.substring(start_pos, end_pos); - input = input.replace(substr, "charset=" + enc); - } - return input; - } - - @JRubyMethod(name="file", meta=true) - public static IRubyObject parse_file(ThreadContext context, - IRubyObject klazz, - IRubyObject data, - IRubyObject encoding) { - HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz); - ctx.initialize(context.getRuntime()); - ctx.setInputSourceFile(context, data); - String javaEncoding = findEncoding(context, encoding); - if (javaEncoding != null) { - ctx.getInputSource().setEncoding(javaEncoding); - } - return ctx; - } - - @JRubyMethod(name="io", meta=true) - public static IRubyObject parse_io(ThreadContext context, - IRubyObject klazz, - IRubyObject data, - IRubyObject encoding) { - HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz); - ctx.initialize(context.getRuntime()); - ctx.setInputSource(context, data, context.getRuntime().getNil()); - String javaEncoding = findEncoding(context, encoding); - if (javaEncoding != null) { - ctx.getInputSource().setEncoding(javaEncoding); - } - return ctx; - } - - /** - * Create a new parser context that will read from a raw input stream. - * Meant to be run in a separate thread by HtmlSaxPushParser. - */ - static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream) { - HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz); - ctx.initialize(runtime); - ctx.setInputSource(stream); - return ctx; - } - - @Override - protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) { - // final String path = "Nokogiri::XML::FragmentHandler"; - // final String docFrag = - // "http://cyberneko.org/html/features/balance-tags/document-fragment"; - // RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter(); - // IRubyObject doc = adapter.getInstanceVariable(handlerRuby, "@document"); - // RubyModule mod = runtime.getClassFromPath(path); - // try { - // if (doc != null && !doc.isNil() && adapter.isKindOf(doc, mod)) - // parser.setFeature(docFrag, true); - // } catch (Exception e) { - // // ignore - // } - } - -} diff --git a/ext/java/nokogiri/HtmlSaxPushParser.java b/ext/java/nokogiri/HtmlSaxPushParser.java deleted file mode 100644 index ae30a86f6d..0000000000 --- a/ext/java/nokogiri/HtmlSaxPushParser.java +++ /dev/null @@ -1,222 +0,0 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package nokogiri; - -import static nokogiri.XmlSaxPushParser.terminateExecution; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.IOException; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.ThreadFactory; - -import nokogiri.internals.*; - -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.RubyObject; -import org.jruby.anno.JRubyClass; -import org.jruby.anno.JRubyMethod; -import org.jruby.exceptions.RaiseException; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; - -/** - * Class for Nokogiri::HTML::SAX::PushParser - * - * @author - * @author Piotr Szmielew - based on Nokogiri::XML::SAX::PushParser - */ -@JRubyClass(name="Nokogiri::HTML::SAX::PushParser") -public class HtmlSaxPushParser extends RubyObject { - ParserContext.Options options; - IRubyObject saxParser; - - NokogiriBlockingQueueInputStream stream; - - private ParserTask parserTask = null; - private FutureTask futureTask = null; - private ExecutorService executor = null; - - public HtmlSaxPushParser(Ruby ruby, RubyClass rubyClass) { - super(ruby, rubyClass); - } - - @Override - public void finalize() { - try { - terminateImpl(); - } - catch (Exception e) { /* ignored */ } - } - - @JRubyMethod - public IRubyObject initialize_native(final ThreadContext context, - IRubyObject saxParser, - IRubyObject fileName, - IRubyObject encoding) { - // NOTE: Silently skips provided encoding - options = new ParserContext.Options(0); - this.saxParser = saxParser; - return this; - } - - private transient IRubyObject parse_options; - - private IRubyObject parse_options(final ThreadContext context) { - if (parse_options == null) { - parse_options = invoke(context, context.runtime.getClassFromPath("Nokogiri::XML::ParseOptions"), "new"); - } - return parse_options; - } - - @JRubyMethod(name="options") - public IRubyObject getOptions(ThreadContext context) { - return invoke(context, parse_options(context), "options"); - } - - @JRubyMethod(name="options=") - public IRubyObject setOptions(ThreadContext context, IRubyObject opts) { - invoke(context, parse_options(context), "options=", opts); - options = new ParserContext.Options(opts.convertToInteger().getLongValue()); - return getOptions(context); - } - - @JRubyMethod - public IRubyObject native_write(ThreadContext context, IRubyObject chunk, IRubyObject isLast) { - try { - initialize_task(context); - } catch (IOException e) { - throw context.getRuntime().newRuntimeError(e.getMessage()); - } - final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk); - if (data == null) { - terminateTask(context.runtime); - throw new RaiseException(XmlSyntaxError.createHTMLSyntaxError(context.runtime)); // Nokogiri::HTML::SyntaxError - } - - int errorCount0 = parserTask.getErrorCount(); - - if (isLast.isTrue()) { - IRubyObject document = invoke(context, this, "document"); - invoke(context, document, "end_document"); - terminateTask(context.runtime); - } else { - try { - Future task = stream.addChunk(data); - task.get(); - } - catch (ClosedStreamException ex) { - // this means the stream is closed, ignore this exception - } - catch (Exception e) { - throw context.runtime.newRuntimeError(e.getMessage()); - } - - } - - if (!options.recover && parserTask.getErrorCount() > errorCount0) { - terminateTask(context.runtime); - throw parserTask.getLastError(); - } - - return this; - } - - @SuppressWarnings("unchecked") - private void initialize_task(ThreadContext context) throws IOException { - if (futureTask == null || stream == null) { - stream = new NokogiriBlockingQueueInputStream(); - - assert saxParser != null : "saxParser null"; - parserTask = new ParserTask(context, saxParser, stream); - futureTask = new FutureTask((Callable) parserTask); - executor = Executors.newSingleThreadExecutor(new ThreadFactory() { - @Override - public Thread newThread(Runnable r) { - Thread t = new Thread(r); - t.setName("HtmlSaxPushParser"); - t.setDaemon(true); - return t; - } - }); - executor.submit(futureTask); - } - } - - private void terminateTask(final Ruby runtime) { - if (executor == null) return; - - try { - terminateImpl(); - } - catch (InterruptedException e) { - throw runtime.newRuntimeError(e.toString()); - } - catch (Exception e) { - throw runtime.newRuntimeError(e.toString()); - } - } - - private synchronized void terminateImpl() throws InterruptedException, ExecutionException { - terminateExecution(executor, stream, futureTask); - - executor = null; stream = null; futureTask = null; - } - - private static HtmlSaxParserContext parse(final Ruby runtime, final InputStream stream) { - RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML::SAX::ParserContext"); - return HtmlSaxParserContext.parse_stream(runtime, klazz, stream); - } - - static class ParserTask extends XmlSaxPushParser.ParserTask /* */ { - - private ParserTask(ThreadContext context, IRubyObject handler, InputStream stream) { - super(context, handler, parse(context.runtime, stream), stream); - } - - @Override - public HtmlSaxParserContext call() throws Exception { - return (HtmlSaxParserContext) super.call(); - } - - } - -} diff --git a/ext/java/nokogiri/NokogiriService.java b/ext/java/nokogiri/NokogiriService.java index 6b40e61260..a5d5b462ba 100644 --- a/ext/java/nokogiri/NokogiriService.java +++ b/ext/java/nokogiri/NokogiriService.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import java.util.Collections; @@ -48,550 +16,598 @@ /** * Class to provide Nokogiri. This class is used to make "require 'nokogiri'" work * in JRuby. Also, this class holds a Ruby type cache and allocators of Ruby types. - * + * * @author headius * @author Yoko Harada */ -public class NokogiriService implements BasicLibraryService { - public boolean basicLoad(Ruby ruby) { - init(ruby); - return true; +public class NokogiriService implements BasicLibraryService +{ + public boolean + basicLoad(Ruby ruby) + { + init(ruby); + return true; + } + + @SuppressWarnings("unchecked") + public static Map + getNokogiriClassCache(Ruby ruby) + { + return (Map) ruby.getModule("Nokogiri").getInternalVariable("cache"); + } + + private static Map + populateNokogiriClassCahce(Ruby ruby) + { + Map nokogiriClassCache = new HashMap(); + nokogiriClassCache.put("Nokogiri::HTML4::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML4::Document")); + nokogiriClassCache.put("Nokogiri::HTML4::ElementDescription", + (RubyClass)ruby.getClassFromPath("Nokogiri::HTML4::ElementDescription")); + nokogiriClassCache.put("Nokogiri::XML::Attr", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Attr")); + nokogiriClassCache.put("Nokogiri::XML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Document")); + nokogiriClassCache.put("Nokogiri::XML::DocumentFragment", + (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DocumentFragment")); + nokogiriClassCache.put("Nokogiri::XML::DTD", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DTD")); + nokogiriClassCache.put("Nokogiri::XML::Text", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Text")); + nokogiriClassCache.put("Nokogiri::XML::Comment", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Comment")); + nokogiriClassCache.put("Nokogiri::XML::Element", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element")); + nokogiriClassCache.put("Nokogiri::XML::ElementContent", + (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementContent")); + nokogiriClassCache.put("Nokogiri::XML::ElementDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementDecl")); + nokogiriClassCache.put("Nokogiri::XML::EntityDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityDecl")); + nokogiriClassCache.put("Nokogiri::XML::EntityReference", + (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityReference")); + nokogiriClassCache.put("Nokogiri::XML::ProcessingInstruction", + (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ProcessingInstruction")); + nokogiriClassCache.put("Nokogiri::XML::CDATA", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::CDATA")); + nokogiriClassCache.put("Nokogiri::XML::Node", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Node")); + nokogiriClassCache.put("Nokogiri::XML::NodeSet", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::NodeSet")); + nokogiriClassCache.put("Nokogiri::XML::Namespace", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Namespace")); + nokogiriClassCache.put("Nokogiri::XML::SyntaxError", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::SyntaxError")); + nokogiriClassCache.put("Nokogiri::XML::Reader", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Reader")); + nokogiriClassCache.put("Nokogiri::XML::RelaxNG", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::RelaxNG")); + nokogiriClassCache.put("Nokogiri::XML::Schema", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Schema")); + nokogiriClassCache.put("Nokogiri::XML::XPathContext", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::XPathContext")); + nokogiriClassCache.put("Nokogiri::XML::AttributeDecl", + (RubyClass)ruby.getClassFromPath("Nokogiri::XML::AttributeDecl")); + nokogiriClassCache.put("Nokogiri::XML::SAX::ParserContext", + (RubyClass)ruby.getClassFromPath("Nokogiri::XML::SAX::ParserContext")); + return Collections.unmodifiableMap(nokogiriClassCache); + } + + private void + init(Ruby ruby) + { + RubyModule nokogiri = ruby.defineModule("Nokogiri"); + RubyModule xmlModule = nokogiri.defineModuleUnder("XML"); + RubyModule xmlSaxModule = xmlModule.defineModuleUnder("SAX"); + RubyModule htmlModule = nokogiri.defineModuleUnder("HTML4"); + RubyModule htmlSaxModule = htmlModule.defineModuleUnder("SAX"); + RubyModule xsltModule = nokogiri.defineModuleUnder("XSLT"); + + createSyntaxErrors(ruby, nokogiri, xmlModule); + RubyClass xmlNode = createXmlModule(ruby, xmlModule); + createHtmlModule(ruby, htmlModule); + createDocuments(ruby, xmlModule, htmlModule, xmlNode); + createSaxModule(ruby, xmlSaxModule, htmlSaxModule); + createXsltModule(ruby, xsltModule); + nokogiri.setInternalVariable("cache", populateNokogiriClassCahce(ruby)); + } + + private void + createSyntaxErrors(Ruby ruby, RubyModule nokogiri, RubyModule xmlModule) + { + RubyClass syntaxError = nokogiri.defineClassUnder("SyntaxError", ruby.getStandardError(), + ruby.getStandardError().getAllocator()); + RubyClass xmlSyntaxError = xmlModule.defineClassUnder("SyntaxError", syntaxError, XML_SYNTAXERROR_ALLOCATOR); + xmlSyntaxError.defineAnnotatedMethods(XmlSyntaxError.class); + } + + private RubyClass + createXmlModule(Ruby ruby, RubyModule xmlModule) + { + RubyClass node = xmlModule.defineClassUnder("Node", ruby.getObject(), XML_NODE_ALLOCATOR); + node.defineAnnotatedMethods(XmlNode.class); + + RubyClass attr = xmlModule.defineClassUnder("Attr", node, XML_ATTR_ALLOCATOR); + attr.defineAnnotatedMethods(XmlAttr.class); + + RubyClass attrDecl = xmlModule.defineClassUnder("AttributeDecl", node, XML_ATTRIBUTE_DECL_ALLOCATOR); + attrDecl.defineAnnotatedMethods(XmlAttributeDecl.class); + + RubyClass characterData = xmlModule.defineClassUnder("CharacterData", node, null); + + RubyClass comment = xmlModule.defineClassUnder("Comment", characterData, XML_COMMENT_ALLOCATOR); + comment.defineAnnotatedMethods(XmlComment.class); + + RubyClass text = xmlModule.defineClassUnder("Text", characterData, XML_TEXT_ALLOCATOR); + text.defineAnnotatedMethods(XmlText.class); + + RubyModule cdata = xmlModule.defineClassUnder("CDATA", text, XML_CDATA_ALLOCATOR); + cdata.defineAnnotatedMethods(XmlCdata.class); + + RubyClass dtd = xmlModule.defineClassUnder("DTD", node, XML_DTD_ALLOCATOR); + dtd.defineAnnotatedMethods(XmlDtd.class); + + RubyClass documentFragment = xmlModule.defineClassUnder("DocumentFragment", node, XML_DOCUMENT_FRAGMENT_ALLOCATOR); + documentFragment.defineAnnotatedMethods(XmlDocumentFragment.class); + + RubyClass element = xmlModule.defineClassUnder("Element", node, XML_ELEMENT_ALLOCATOR); + element.defineAnnotatedMethods(XmlElement.class); + + RubyClass elementContent = xmlModule.defineClassUnder("ElementContent", ruby.getObject(), + XML_ELEMENT_CONTENT_ALLOCATOR); + elementContent.defineAnnotatedMethods(XmlElementContent.class); + + RubyClass elementDecl = xmlModule.defineClassUnder("ElementDecl", node, XML_ELEMENT_DECL_ALLOCATOR); + elementDecl.defineAnnotatedMethods(XmlElementDecl.class); + + RubyClass entityDecl = xmlModule.defineClassUnder("EntityDecl", node, XML_ENTITY_DECL_ALLOCATOR); + entityDecl.defineAnnotatedMethods(XmlEntityDecl.class); + + entityDecl.defineConstant("INTERNAL_GENERAL", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_GENERAL)); + entityDecl.defineConstant("EXTERNAL_GENERAL_PARSED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_GENERAL_PARSED)); + entityDecl.defineConstant("EXTERNAL_GENERAL_UNPARSED", RubyFixnum.newFixnum(ruby, + XmlEntityDecl.EXTERNAL_GENERAL_UNPARSED)); + entityDecl.defineConstant("INTERNAL_PARAMETER", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_PARAMETER)); + entityDecl.defineConstant("EXTERNAL_PARAMETER", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_PARAMETER)); + entityDecl.defineConstant("INTERNAL_PREDEFINED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_PREDEFINED)); + + RubyClass entref = xmlModule.defineClassUnder("EntityReference", node, XML_ENTITY_REFERENCE_ALLOCATOR); + entref.defineAnnotatedMethods(XmlEntityReference.class); + + RubyClass namespace = xmlModule.defineClassUnder("Namespace", ruby.getObject(), XML_NAMESPACE_ALLOCATOR); + namespace.defineAnnotatedMethods(XmlNamespace.class); + + RubyClass nodeSet = xmlModule.defineClassUnder("NodeSet", ruby.getObject(), XML_NODESET_ALLOCATOR); + nodeSet.defineAnnotatedMethods(XmlNodeSet.class); + + RubyClass pi = xmlModule.defineClassUnder("ProcessingInstruction", node, XML_PROCESSING_INSTRUCTION_ALLOCATOR); + pi.defineAnnotatedMethods(XmlProcessingInstruction.class); + + RubyClass reader = xmlModule.defineClassUnder("Reader", ruby.getObject(), XML_READER_ALLOCATOR); + reader.defineAnnotatedMethods(XmlReader.class); + + RubyClass schema = xmlModule.defineClassUnder("Schema", ruby.getObject(), XML_SCHEMA_ALLOCATOR); + schema.defineAnnotatedMethods(XmlSchema.class); + + RubyClass relaxng = xmlModule.defineClassUnder("RelaxNG", schema, XML_RELAXNG_ALLOCATOR); + relaxng.defineAnnotatedMethods(XmlRelaxng.class); + + RubyClass xpathContext = xmlModule.defineClassUnder("XPathContext", ruby.getObject(), XML_XPATHCONTEXT_ALLOCATOR); + xpathContext.defineAnnotatedMethods(XmlXpathContext.class); + + return node; + } + + private void + createHtmlModule(Ruby ruby, RubyModule htmlModule) + { + RubyClass htmlElemDesc = htmlModule.defineClassUnder("ElementDescription", ruby.getObject(), + HTML_ELEMENT_DESCRIPTION_ALLOCATOR); + htmlElemDesc.defineAnnotatedMethods(Html4ElementDescription.class); + + RubyClass htmlEntityLookup = htmlModule.defineClassUnder("EntityLookup", ruby.getObject(), + HTML_ENTITY_LOOKUP_ALLOCATOR); + htmlEntityLookup.defineAnnotatedMethods(Html4EntityLookup.class); + } + + private void + createDocuments(Ruby ruby, RubyModule xmlModule, RubyModule htmlModule, RubyClass node) + { + RubyClass xmlDocument = xmlModule.defineClassUnder("Document", node, XML_DOCUMENT_ALLOCATOR); + xmlDocument.defineAnnotatedMethods(XmlDocument.class); + + //RubyModule htmlDoc = html.defineOrGetClassUnder("Document", document); + RubyModule htmlDocument = htmlModule.defineClassUnder("Document", xmlDocument, HTML_DOCUMENT_ALLOCATOR); + htmlDocument.defineAnnotatedMethods(Html4Document.class); + } + + private void + createSaxModule(Ruby ruby, RubyModule xmlSaxModule, RubyModule htmlSaxModule) + { + RubyClass xmlSaxParserContext = xmlSaxModule.defineClassUnder("ParserContext", ruby.getObject(), + XML_SAXPARSER_CONTEXT_ALLOCATOR); + xmlSaxParserContext.defineAnnotatedMethods(XmlSaxParserContext.class); + + RubyClass xmlSaxPushParser = xmlSaxModule.defineClassUnder("PushParser", ruby.getObject(), XML_SAXPUSHPARSER_ALLOCATOR); + xmlSaxPushParser.defineAnnotatedMethods(XmlSaxPushParser.class); + + RubyClass htmlSaxPushParser = htmlSaxModule.defineClassUnder("PushParser", ruby.getObject(), + HTML_SAXPUSHPARSER_ALLOCATOR); + htmlSaxPushParser.defineAnnotatedMethods(Html4SaxPushParser.class); + + RubyClass htmlSaxParserContext = htmlSaxModule.defineClassUnder("ParserContext", xmlSaxParserContext, + HTML_SAXPARSER_CONTEXT_ALLOCATOR); + htmlSaxParserContext.defineAnnotatedMethods(Html4SaxParserContext.class); + } + + private void + createXsltModule(Ruby ruby, RubyModule xsltModule) + { + RubyClass stylesheet = xsltModule.defineClassUnder("Stylesheet", ruby.getObject(), XSLT_STYLESHEET_ALLOCATOR); + stylesheet.defineAnnotatedMethods(XsltStylesheet.class); + xsltModule.defineAnnotatedMethod(XsltStylesheet.class, "register"); + } + + public static final ObjectAllocator HTML_DOCUMENT_ALLOCATOR = new ObjectAllocator() + { + private Html4Document htmlDocument = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (htmlDocument == null) { htmlDocument = new Html4Document(runtime, klazz); } + try { + Html4Document clone = (Html4Document) htmlDocument.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new Html4Document(runtime, klazz); + } } + }; + + private static final ObjectAllocator HTML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() + { + private Html4SaxParserContext htmlSaxParserContext = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (htmlSaxParserContext == null) { htmlSaxParserContext = new Html4SaxParserContext(runtime, klazz); } + try { + Html4SaxParserContext clone = (Html4SaxParserContext) htmlSaxParserContext.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new Html4SaxParserContext(runtime, klazz); + } + } + }; - public static Map getNokogiriClassCache(Ruby ruby) { - return (Map) ruby.getModule("Nokogiri").getInternalVariable("cache"); + private static ObjectAllocator HTML_ELEMENT_DESCRIPTION_ALLOCATOR = + new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new Html4ElementDescription(runtime, klazz); } + }; - private static Map populateNokogiriClassCahce(Ruby ruby) { - Map nokogiriClassCache = new HashMap(); - nokogiriClassCache.put("Nokogiri::EncodingHandler", (RubyClass)ruby.getClassFromPath("Nokogiri::EncodingHandler")); - nokogiriClassCache.put("Nokogiri::HTML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML::Document")); - nokogiriClassCache.put("Nokogiri::HTML::ElementDescription", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML::ElementDescription")); - nokogiriClassCache.put("Nokogiri::XML::Attr", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Attr")); - nokogiriClassCache.put("Nokogiri::XML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Document")); - nokogiriClassCache.put("Nokogiri::XML::DocumentFragment", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DocumentFragment")); - nokogiriClassCache.put("Nokogiri::XML::DTD", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DTD")); - nokogiriClassCache.put("Nokogiri::XML::Text", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Text")); - nokogiriClassCache.put("Nokogiri::XML::Comment", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Comment")); - nokogiriClassCache.put("Nokogiri::XML::Element", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element")); - nokogiriClassCache.put("Nokogiri::XML::ElementContent", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementContent")); - nokogiriClassCache.put("Nokogiri::XML::ElementDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementDecl")); - nokogiriClassCache.put("Nokogiri::XML::EntityDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityDecl")); - nokogiriClassCache.put("Nokogiri::XML::EntityReference", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityReference")); - nokogiriClassCache.put("Nokogiri::XML::ProcessingInstruction", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ProcessingInstruction")); - nokogiriClassCache.put("Nokogiri::XML::CDATA", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::CDATA")); - nokogiriClassCache.put("Nokogiri::XML::Node", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Node")); - nokogiriClassCache.put("Nokogiri::XML::NodeSet", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::NodeSet")); - nokogiriClassCache.put("Nokogiri::XML::Namespace", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Namespace")); - nokogiriClassCache.put("Nokogiri::XML::SyntaxError", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::SyntaxError")); - nokogiriClassCache.put("Nokogiri::XML::Reader", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Reader")); - nokogiriClassCache.put("Nokogiri::XML::RelaxNG", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::RelaxNG")); - nokogiriClassCache.put("Nokogiri::XML::Schema", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Schema")); - nokogiriClassCache.put("Nokogiri::XML::XPathContext", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::XPathContext")); - nokogiriClassCache.put("Nokogiri::XML::AttributeDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::AttributeDecl")); - nokogiriClassCache.put("Nokogiri::XML::SAX::ParserContext", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::SAX::ParserContext")); - return Collections.unmodifiableMap(nokogiriClassCache); + private static ObjectAllocator HTML_ENTITY_LOOKUP_ALLOCATOR = + new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new Html4EntityLookup(runtime, klazz); + } + }; + + public static final ObjectAllocator XML_ATTR_ALLOCATOR = new ObjectAllocator() + { + private XmlAttr xmlAttr = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlAttr == null) { xmlAttr = new XmlAttr(runtime, klazz); } + try { + XmlAttr clone = (XmlAttr) xmlAttr.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlAttr(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_CDATA_ALLOCATOR = new ObjectAllocator() + { + private XmlCdata xmlCdata = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlCdata == null) { xmlCdata = new XmlCdata(runtime, klazz); } + try { + XmlCdata clone = (XmlCdata) xmlCdata.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlCdata(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_COMMENT_ALLOCATOR = new ObjectAllocator() + { + private XmlComment xmlComment = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlComment == null) { xmlComment = new XmlComment(runtime, klazz); } + try { + XmlComment clone = (XmlComment) xmlComment.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlComment(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_DOCUMENT_ALLOCATOR = new ObjectAllocator() + { + private XmlDocument xmlDocument = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlDocument == null) { xmlDocument = new XmlDocument(runtime, klazz); } + try { + XmlDocument clone = (XmlDocument) xmlDocument.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlDocument(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_DOCUMENT_FRAGMENT_ALLOCATOR = new ObjectAllocator() + { + private XmlDocumentFragment xmlDocumentFragment = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlDocumentFragment == null) { xmlDocumentFragment = new XmlDocumentFragment(runtime, klazz); } + try { + XmlDocumentFragment clone = (XmlDocumentFragment)xmlDocumentFragment.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlDocumentFragment(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_DTD_ALLOCATOR = new ObjectAllocator() + { + private XmlDtd xmlDtd = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlDtd == null) { xmlDtd = new XmlDtd(runtime, klazz); } + try { + XmlDtd clone = (XmlDtd)xmlDtd.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlDtd(runtime, klazz); + } } + }; + + public static final ObjectAllocator XML_ELEMENT_ALLOCATOR = new ObjectAllocator() + { + private XmlElement xmlElement = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlElement == null) { xmlElement = new XmlElement(runtime, klazz); } + try { + XmlElement clone = (XmlElement)xmlElement.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlElement(runtime, klazz); + } + } + }; + + public static ObjectAllocator XML_ELEMENT_DECL_ALLOCATOR = new ObjectAllocator() + { + private XmlElementDecl xmlElementDecl = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlElementDecl == null) { xmlElementDecl = new XmlElementDecl(runtime, klazz); } + try { + XmlElementDecl clone = (XmlElementDecl)xmlElementDecl.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlElementDecl(runtime, klazz); + } + } + }; + + public static ObjectAllocator XML_ENTITY_REFERENCE_ALLOCATOR = new ObjectAllocator() + { + private XmlEntityReference xmlEntityRef = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlEntityRef == null) { xmlEntityRef = new XmlEntityReference(runtime, klazz); } + try { + XmlEntityReference clone = (XmlEntityReference)xmlEntityRef.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlEntityReference(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_NAMESPACE_ALLOCATOR = new ObjectAllocator() + { + private XmlNamespace xmlNamespace = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlNamespace == null) { xmlNamespace = new XmlNamespace(runtime, klazz); } + try { + XmlNamespace clone = (XmlNamespace) xmlNamespace.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlNamespace(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_NODE_ALLOCATOR = new ObjectAllocator() + { + private XmlNode xmlNode = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlNode == null) { xmlNode = new XmlNode(runtime, klazz); } + try { + XmlNode clone = (XmlNode) xmlNode.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlNode(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_NODESET_ALLOCATOR = new ObjectAllocator() + { + private XmlNodeSet xmlNodeSet = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlNodeSet == null) { xmlNodeSet = new XmlNodeSet(runtime, klazz); } + try { + XmlNodeSet clone = (XmlNodeSet) xmlNodeSet.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlNodeSet(runtime, klazz); + } + } + }; + + public static ObjectAllocator XML_PROCESSING_INSTRUCTION_ALLOCATOR = new ObjectAllocator() + { + private XmlProcessingInstruction xmlProcessingInstruction = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlProcessingInstruction == null) { xmlProcessingInstruction = new XmlProcessingInstruction(runtime, klazz); } + try { + XmlProcessingInstruction clone = (XmlProcessingInstruction)xmlProcessingInstruction.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlProcessingInstruction(runtime, klazz); + } + } + }; + + public static ObjectAllocator XML_READER_ALLOCATOR = new ObjectAllocator() + { + private XmlReader xmlReader = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlReader == null) { xmlReader = new XmlReader(runtime, klazz); } + try { + XmlReader clone = (XmlReader) xmlReader.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + xmlReader = new XmlReader(runtime, klazz); + return xmlReader; + } + } + }; - private void init(Ruby ruby) { - RubyModule nokogiri = ruby.defineModule("Nokogiri"); - RubyModule xmlModule = nokogiri.defineModuleUnder("XML"); - RubyModule xmlSaxModule = xmlModule.defineModuleUnder("SAX"); - RubyModule htmlModule = nokogiri.defineModuleUnder("HTML"); - RubyModule htmlSaxModule = htmlModule.defineModuleUnder("SAX"); - RubyModule xsltModule = nokogiri.defineModuleUnder("XSLT"); - - createJavaLibraryVersionConstants(ruby, nokogiri); - createNokogiriModule(ruby, nokogiri); - createSyntaxErrors(ruby, nokogiri, xmlModule); - RubyClass xmlNode = createXmlModule(ruby, xmlModule); - createHtmlModule(ruby, htmlModule); - createDocuments(ruby, xmlModule, htmlModule, xmlNode); - createSaxModule(ruby, xmlSaxModule, htmlSaxModule); - createXsltModule(ruby, xsltModule); - nokogiri.setInternalVariable("cache", populateNokogiriClassCahce(ruby)); + private static ObjectAllocator XML_ATTRIBUTE_DECL_ALLOCATOR = new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlAttributeDecl(runtime, klazz); } + }; - private void createJavaLibraryVersionConstants(Ruby ruby, RubyModule nokogiri) { - nokogiri.defineConstant("XERCES_VERSION", ruby.newString(org.apache.xerces.impl.Version.getVersion())); - nokogiri.defineConstant("NEKO_VERSION", ruby.newString(org.cyberneko.html.Version.getVersion())); + private static ObjectAllocator XML_ENTITY_DECL_ALLOCATOR = new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlEntityDecl(runtime, klazz); } + }; - private void createNokogiriModule(Ruby ruby, RubyModule nokogiri) { - RubyClass encHandler = nokogiri.defineClassUnder("EncodingHandler", ruby.getObject(), ENCODING_HANDLER_ALLOCATOR); - encHandler.defineAnnotatedMethods(EncodingHandler.class); + private static ObjectAllocator XML_ELEMENT_CONTENT_ALLOCATOR = new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + throw runtime.newNotImplementedError("not implemented"); + } + }; + + public static final ObjectAllocator XML_RELAXNG_ALLOCATOR = new ObjectAllocator() + { + private XmlRelaxng xmlRelaxng = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlRelaxng == null) { xmlRelaxng = new XmlRelaxng(runtime, klazz); } + try { + XmlRelaxng clone = (XmlRelaxng) xmlRelaxng.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlRelaxng(runtime, klazz); + } } - - private void createSyntaxErrors(Ruby ruby, RubyModule nokogiri, RubyModule xmlModule) { - RubyClass syntaxError = nokogiri.defineClassUnder("SyntaxError", ruby.getStandardError(), ruby.getStandardError().getAllocator()); - RubyClass xmlSyntaxError = xmlModule.defineClassUnder("SyntaxError", syntaxError, XML_SYNTAXERROR_ALLOCATOR); - xmlSyntaxError.defineAnnotatedMethods(XmlSyntaxError.class); + }; + + public static final ObjectAllocator XML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() + { + private XmlSaxParserContext xmlSaxParserContext = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlSaxParserContext == null) { xmlSaxParserContext = new XmlSaxParserContext(runtime, klazz); } + try { + XmlSaxParserContext clone = (XmlSaxParserContext) xmlSaxParserContext.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlSaxParserContext(runtime, klazz); + } } - - private RubyClass createXmlModule(Ruby ruby, RubyModule xmlModule) { - RubyClass node = xmlModule.defineClassUnder("Node", ruby.getObject(), XML_NODE_ALLOCATOR); - node.defineAnnotatedMethods(XmlNode.class); - - RubyClass attr = xmlModule.defineClassUnder("Attr", node, XML_ATTR_ALLOCATOR); - attr.defineAnnotatedMethods(XmlAttr.class); - - RubyClass attrDecl = xmlModule.defineClassUnder("AttributeDecl", node, XML_ATTRIBUTE_DECL_ALLOCATOR); - attrDecl.defineAnnotatedMethods(XmlAttributeDecl.class); - - RubyClass characterData = xmlModule.defineClassUnder("CharacterData", node, null); - - RubyClass comment = xmlModule.defineClassUnder("Comment", characterData, XML_COMMENT_ALLOCATOR); - comment.defineAnnotatedMethods(XmlComment.class); - - RubyClass text = xmlModule.defineClassUnder("Text", characterData, XML_TEXT_ALLOCATOR); - text.defineAnnotatedMethods(XmlText.class); - - RubyModule cdata = xmlModule.defineClassUnder("CDATA", text, XML_CDATA_ALLOCATOR); - cdata.defineAnnotatedMethods(XmlCdata.class); - - RubyClass dtd = xmlModule.defineClassUnder("DTD", node, XML_DTD_ALLOCATOR); - dtd.defineAnnotatedMethods(XmlDtd.class); - - RubyClass documentFragment = xmlModule.defineClassUnder("DocumentFragment", node, XML_DOCUMENT_FRAGMENT_ALLOCATOR); - documentFragment.defineAnnotatedMethods(XmlDocumentFragment.class); - - RubyClass element = xmlModule.defineClassUnder("Element", node, XML_ELEMENT_ALLOCATOR); - element.defineAnnotatedMethods(XmlElement.class); - - RubyClass elementContent = xmlModule.defineClassUnder("ElementContent", ruby.getObject(), XML_ELEMENT_CONTENT_ALLOCATOR); - elementContent.defineAnnotatedMethods(XmlElementContent.class); - - RubyClass elementDecl = xmlModule.defineClassUnder("ElementDecl", node, XML_ELEMENT_DECL_ALLOCATOR); - elementDecl.defineAnnotatedMethods(XmlElementDecl.class); - - RubyClass entityDecl = xmlModule.defineClassUnder("EntityDecl", node, XML_ENTITY_DECL_ALLOCATOR); - entityDecl.defineAnnotatedMethods(XmlEntityDecl.class); - - entityDecl.defineConstant("INTERNAL_GENERAL", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_GENERAL)); - entityDecl.defineConstant("EXTERNAL_GENERAL_PARSED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_GENERAL_PARSED)); - entityDecl.defineConstant("EXTERNAL_GENERAL_UNPARSED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_GENERAL_UNPARSED)); - entityDecl.defineConstant("INTERNAL_PARAMETER", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_PARAMETER)); - entityDecl.defineConstant("EXTERNAL_PARAMETER", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_PARAMETER)); - entityDecl.defineConstant("INTERNAL_PREDEFINED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_PREDEFINED)); - - RubyClass entref = xmlModule.defineClassUnder("EntityReference", node, XML_ENTITY_REFERENCE_ALLOCATOR); - entref.defineAnnotatedMethods(XmlEntityReference.class); - - RubyClass namespace = xmlModule.defineClassUnder("Namespace", ruby.getObject(), XML_NAMESPACE_ALLOCATOR); - namespace.defineAnnotatedMethods(XmlNamespace.class); - - RubyClass nodeSet = xmlModule.defineClassUnder("NodeSet", ruby.getObject(), XML_NODESET_ALLOCATOR); - nodeSet.defineAnnotatedMethods(XmlNodeSet.class); - - RubyClass pi = xmlModule.defineClassUnder("ProcessingInstruction", node, XML_PROCESSING_INSTRUCTION_ALLOCATOR); - pi.defineAnnotatedMethods(XmlProcessingInstruction.class); - - RubyClass reader = xmlModule.defineClassUnder("Reader", ruby.getObject(), XML_READER_ALLOCATOR); - reader.defineAnnotatedMethods(XmlReader.class); - - RubyClass schema = xmlModule.defineClassUnder("Schema", ruby.getObject(), XML_SCHEMA_ALLOCATOR); - schema.defineAnnotatedMethods(XmlSchema.class); - - RubyClass relaxng = xmlModule.defineClassUnder("RelaxNG", schema, XML_RELAXNG_ALLOCATOR); - relaxng.defineAnnotatedMethods(XmlRelaxng.class); - - RubyClass xpathContext = xmlModule.defineClassUnder("XPathContext", ruby.getObject(), XML_XPATHCONTEXT_ALLOCATOR); - xpathContext.defineAnnotatedMethods(XmlXpathContext.class); - - return node; + }; + + private static final ObjectAllocator XML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlSaxPushParser(runtime, klazz); } + }; - private void createHtmlModule(Ruby ruby, RubyModule htmlModule) { - RubyClass htmlElemDesc = htmlModule.defineClassUnder("ElementDescription", ruby.getObject(), HTML_ELEMENT_DESCRIPTION_ALLOCATOR); - htmlElemDesc.defineAnnotatedMethods(HtmlElementDescription.class); - - RubyClass htmlEntityLookup = htmlModule.defineClassUnder("EntityLookup", ruby.getObject(), HTML_ENTITY_LOOKUP_ALLOCATOR); - htmlEntityLookup.defineAnnotatedMethods(HtmlEntityLookup.class); + private static final ObjectAllocator HTML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new Html4SaxPushParser(runtime, klazz); } - - private void createDocuments(Ruby ruby, RubyModule xmlModule, RubyModule htmlModule, RubyClass node) { - RubyClass xmlDocument = xmlModule.defineClassUnder("Document", node, XML_DOCUMENT_ALLOCATOR); - xmlDocument.defineAnnotatedMethods(XmlDocument.class); - - //RubyModule htmlDoc = html.defineOrGetClassUnder("Document", document); - RubyModule htmlDocument = htmlModule.defineClassUnder("Document", xmlDocument, HTML_DOCUMENT_ALLOCATOR); - htmlDocument.defineAnnotatedMethods(HtmlDocument.class); + }; + + public static final ObjectAllocator XML_SCHEMA_ALLOCATOR = new ObjectAllocator() + { + private XmlSchema xmlSchema = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlSchema == null) { xmlSchema = new XmlSchema(runtime, klazz); } + try { + XmlSchema clone = (XmlSchema) xmlSchema.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlSchema(runtime, klazz); + } } - - private void createSaxModule(Ruby ruby, RubyModule xmlSaxModule, RubyModule htmlSaxModule) { - RubyClass xmlSaxParserContext = xmlSaxModule.defineClassUnder("ParserContext", ruby.getObject(), XML_SAXPARSER_CONTEXT_ALLOCATOR); - xmlSaxParserContext.defineAnnotatedMethods(XmlSaxParserContext.class); - - RubyClass xmlSaxPushParser = xmlSaxModule.defineClassUnder("PushParser", ruby.getObject(), XML_SAXPUSHPARSER_ALLOCATOR); - xmlSaxPushParser.defineAnnotatedMethods(XmlSaxPushParser.class); - - RubyClass htmlSaxPushParser = htmlSaxModule.defineClassUnder("PushParser", ruby.getObject(), HTML_SAXPUSHPARSER_ALLOCATOR); - htmlSaxPushParser.defineAnnotatedMethods(HtmlSaxPushParser.class); - - RubyClass htmlSaxParserContext = htmlSaxModule.defineClassUnder("ParserContext", xmlSaxParserContext, HTML_SAXPARSER_CONTEXT_ALLOCATOR); - htmlSaxParserContext.defineAnnotatedMethods(HtmlSaxParserContext.class); + }; + + public static final ObjectAllocator XML_SYNTAXERROR_ALLOCATOR = new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlSyntaxError(runtime, klazz); } - - private void createXsltModule(Ruby ruby, RubyModule xsltModule) { - RubyClass stylesheet = xsltModule.defineClassUnder("Stylesheet", ruby.getObject(), XSLT_STYLESHEET_ALLOCATOR); - stylesheet.defineAnnotatedMethods(XsltStylesheet.class); - xsltModule.defineAnnotatedMethod(XsltStylesheet.class, "register"); + }; + + public static final ObjectAllocator XML_TEXT_ALLOCATOR = new ObjectAllocator() + { + private XmlText xmlText = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlText == null) { xmlText = new XmlText(runtime, klazz); } + try { + XmlText clone = (XmlText) xmlText.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlText(runtime, klazz); + } } + }; - private static ObjectAllocator ENCODING_HANDLER_ALLOCATOR = new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new EncodingHandler(runtime, klazz, ""); - } - }; - - public static final ObjectAllocator HTML_DOCUMENT_ALLOCATOR = new ObjectAllocator() { - private HtmlDocument htmlDocument = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (htmlDocument == null) htmlDocument = new HtmlDocument(runtime, klazz); - try { - HtmlDocument clone = (HtmlDocument) htmlDocument.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new HtmlDocument(runtime, klazz); - } - } - }; - - public static final ObjectAllocator HTML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() { - private HtmlSaxParserContext htmlSaxParserContext = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (htmlSaxParserContext == null) htmlSaxParserContext = new HtmlSaxParserContext(runtime, klazz); - try { - HtmlSaxParserContext clone = (HtmlSaxParserContext) htmlSaxParserContext.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new HtmlSaxParserContext(runtime, klazz); - } - } - }; - - private static ObjectAllocator HTML_ELEMENT_DESCRIPTION_ALLOCATOR = - new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new HtmlElementDescription(runtime, klazz); - } - }; - - private static ObjectAllocator HTML_ENTITY_LOOKUP_ALLOCATOR = - new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new HtmlEntityLookup(runtime, klazz); - } - }; - - public static final ObjectAllocator XML_ATTR_ALLOCATOR = new ObjectAllocator() { - private XmlAttr xmlAttr = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlAttr == null) xmlAttr = new XmlAttr(runtime, klazz); - try { - XmlAttr clone = (XmlAttr) xmlAttr.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlAttr(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_CDATA_ALLOCATOR = new ObjectAllocator() { - private XmlCdata xmlCdata = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlCdata == null) xmlCdata = new XmlCdata(runtime, klazz); - try { - XmlCdata clone = (XmlCdata) xmlCdata.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlCdata(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_COMMENT_ALLOCATOR = new ObjectAllocator() { - private XmlComment xmlComment = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlComment == null) xmlComment = new XmlComment(runtime, klazz); - try { - XmlComment clone = (XmlComment) xmlComment.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlComment(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_DOCUMENT_ALLOCATOR = new ObjectAllocator() { - private XmlDocument xmlDocument = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlDocument == null) xmlDocument = new XmlDocument(runtime, klazz); - try { - XmlDocument clone = (XmlDocument) xmlDocument.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlDocument(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_DOCUMENT_FRAGMENT_ALLOCATOR = new ObjectAllocator() { - private XmlDocumentFragment xmlDocumentFragment = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlDocumentFragment == null) xmlDocumentFragment = new XmlDocumentFragment(runtime, klazz); - try { - XmlDocumentFragment clone = (XmlDocumentFragment)xmlDocumentFragment.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlDocumentFragment(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_DTD_ALLOCATOR = new ObjectAllocator() { - private XmlDtd xmlDtd = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlDtd == null) xmlDtd = new XmlDtd(runtime, klazz); - try { - XmlDtd clone = (XmlDtd)xmlDtd.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlDtd(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_ELEMENT_ALLOCATOR = new ObjectAllocator() { - private XmlElement xmlElement = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlElement == null) xmlElement = new XmlElement(runtime, klazz); - try { - XmlElement clone = (XmlElement)xmlElement.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlElement(runtime, klazz); - } - } - }; - - public static ObjectAllocator XML_ELEMENT_DECL_ALLOCATOR = new ObjectAllocator() { - private XmlElementDecl xmlElementDecl = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlElementDecl == null) xmlElementDecl = new XmlElementDecl(runtime, klazz); - try { - XmlElementDecl clone = (XmlElementDecl)xmlElementDecl.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlElementDecl(runtime, klazz); - } - } - }; - - public static ObjectAllocator XML_ENTITY_REFERENCE_ALLOCATOR = new ObjectAllocator() { - private XmlEntityReference xmlEntityRef = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlEntityRef == null) xmlEntityRef = new XmlEntityReference(runtime, klazz); - try { - XmlEntityReference clone = (XmlEntityReference)xmlEntityRef.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlEntityReference(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_NAMESPACE_ALLOCATOR = new ObjectAllocator() { - private XmlNamespace xmlNamespace = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlNamespace == null) xmlNamespace = new XmlNamespace(runtime, klazz); - try { - XmlNamespace clone = (XmlNamespace) xmlNamespace.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlNamespace(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_NODE_ALLOCATOR = new ObjectAllocator() { - private XmlNode xmlNode = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlNode == null) xmlNode = new XmlNode(runtime, klazz); - try { - XmlNode clone = (XmlNode) xmlNode.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlNode(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_NODESET_ALLOCATOR = new ObjectAllocator() { - private XmlNodeSet xmlNodeSet = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlNodeSet == null) xmlNodeSet = new XmlNodeSet(runtime, klazz); - try { - XmlNodeSet clone = (XmlNodeSet) xmlNodeSet.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlNodeSet(runtime, klazz); - } - } - }; - - public static ObjectAllocator XML_PROCESSING_INSTRUCTION_ALLOCATOR = new ObjectAllocator() { - private XmlProcessingInstruction xmlProcessingInstruction = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlProcessingInstruction == null) xmlProcessingInstruction = new XmlProcessingInstruction(runtime, klazz); - try { - XmlProcessingInstruction clone = (XmlProcessingInstruction)xmlProcessingInstruction.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlProcessingInstruction(runtime, klazz); - } - } - }; - - public static ObjectAllocator XML_READER_ALLOCATOR = new ObjectAllocator() { - private XmlReader xmlReader = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlReader == null) xmlReader = new XmlReader(runtime, klazz); - try { - XmlReader clone = (XmlReader) xmlReader.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - xmlReader = new XmlReader(runtime, klazz); - return xmlReader; - } - } - }; - - private static ObjectAllocator XML_ATTRIBUTE_DECL_ALLOCATOR = new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new XmlAttributeDecl(runtime, klazz); - } - }; - - private static ObjectAllocator XML_ENTITY_DECL_ALLOCATOR = new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new XmlEntityDecl(runtime, klazz); - } - }; - - private static ObjectAllocator XML_ELEMENT_CONTENT_ALLOCATOR = new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - throw runtime.newNotImplementedError("not implemented"); - } - }; - - public static final ObjectAllocator XML_RELAXNG_ALLOCATOR = new ObjectAllocator() { - private XmlRelaxng xmlRelaxng = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlRelaxng == null) xmlRelaxng = new XmlRelaxng(runtime, klazz); - try { - XmlRelaxng clone = (XmlRelaxng) xmlRelaxng.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlRelaxng(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() { - private XmlSaxParserContext xmlSaxParserContext = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlSaxParserContext == null) xmlSaxParserContext = new XmlSaxParserContext(runtime, klazz); - try { - XmlSaxParserContext clone = (XmlSaxParserContext) xmlSaxParserContext.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlSaxParserContext(runtime, klazz); - } - } - }; - - private static final ObjectAllocator XML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new XmlSaxPushParser(runtime, klazz); - } - }; - - private static final ObjectAllocator HTML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new HtmlSaxPushParser(runtime, klazz); - } - }; - - public static final ObjectAllocator XML_SCHEMA_ALLOCATOR = new ObjectAllocator() { - private XmlSchema xmlSchema = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlSchema == null) xmlSchema = new XmlSchema(runtime, klazz); - try { - XmlSchema clone = (XmlSchema) xmlSchema.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlSchema(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_SYNTAXERROR_ALLOCATOR = new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new XmlSyntaxError(runtime, klazz); - } - }; - - public static final ObjectAllocator XML_TEXT_ALLOCATOR = new ObjectAllocator() { - private XmlText xmlText = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xmlText == null) xmlText = new XmlText(runtime, klazz); - try { - XmlText clone = (XmlText) xmlText.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlText(runtime, klazz); - } - } - }; - - public static final ObjectAllocator XML_XPATHCONTEXT_ALLOCATOR = new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new XmlXpathContext(runtime, klazz); - } - }; - - public static ObjectAllocator XSLT_STYLESHEET_ALLOCATOR = new ObjectAllocator() { - private XsltStylesheet xsltStylesheet = null; - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (xsltStylesheet == null) xsltStylesheet = new XsltStylesheet(runtime, klazz); - try { - XsltStylesheet clone = (XsltStylesheet) xsltStylesheet.clone(); - clone.setMetaClass(klazz); - return clone; - } catch (CloneNotSupportedException e) { - return new XmlText(runtime, klazz); - } - } - }; + public static final ObjectAllocator XML_XPATHCONTEXT_ALLOCATOR = new ObjectAllocator() + { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlXpathContext(runtime, klazz); + } + }; + + public static ObjectAllocator XSLT_STYLESHEET_ALLOCATOR = new ObjectAllocator() + { + private XsltStylesheet xsltStylesheet = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xsltStylesheet == null) { xsltStylesheet = new XsltStylesheet(runtime, klazz); } + try { + XsltStylesheet clone = (XsltStylesheet) xsltStylesheet.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlText(runtime, klazz); + } + } + }; } diff --git a/ext/java/nokogiri/XmlAttr.java b/ext/java/nokogiri/XmlAttr.java index 06235d930d..029f6f407f 100644 --- a/ext/java/nokogiri/XmlAttr.java +++ b/ext/java/nokogiri/XmlAttr.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; @@ -54,116 +22,133 @@ * @author sergio * @author Yoko Harada */ - -@JRubyClass(name="Nokogiri::XML::Attr", parent="Nokogiri::XML::Node") -public class XmlAttr extends XmlNode { - - public static final String[] HTML_BOOLEAN_ATTRS = { - "checked", "compact", "declare", "defer", "disabled", "ismap", - "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", - "selected" - }; - - public XmlAttr(Ruby ruby, Node attr){ - super(ruby, getNokogiriClass(ruby, "Nokogiri::XML::Attr"), attr); +@JRubyClass(name = "Nokogiri::XML::Attr", parent = "Nokogiri::XML::Node") +public class XmlAttr extends XmlNode +{ + private static final long serialVersionUID = 1L; + + public static final String[] HTML_BOOLEAN_ATTRS = { + "checked", "compact", "declare", "defer", "disabled", "ismap", + "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", + "selected" + }; + + public + XmlAttr(Ruby ruby, Node attr) + { + super(ruby, getNokogiriClass(ruby, "Nokogiri::XML::Attr"), attr); + } + + public + XmlAttr(Ruby ruby, RubyClass rubyClass) + { + super(ruby, rubyClass); + } + + public + XmlAttr(Ruby ruby, RubyClass rubyClass, Node attr) + { + super(ruby, rubyClass, attr); + } + + @Override + protected void + init(ThreadContext context, IRubyObject[] args) + { + if (args.length < 2) { + throw context.runtime.newArgumentError(args.length, 2); } - public XmlAttr(Ruby ruby, RubyClass rubyClass) { - super(ruby, rubyClass); - } + IRubyObject doc = args[0]; + IRubyObject content = args[1]; - public XmlAttr(Ruby ruby, RubyClass rubyClass, Node attr){ - super(ruby, rubyClass, attr); + if (!(doc instanceof XmlDocument)) { + throw context.runtime.newArgumentError("document must be an instance of Nokogiri::XML::Document"); } - @Override - protected void init(ThreadContext context, IRubyObject[] args) { - if (args.length < 2) { - throw getRuntime().newArgumentError(args.length, 2); - } - - IRubyObject doc = args[0]; - IRubyObject content = args[1]; - - if(!(doc instanceof XmlDocument)) { - final String msg = - "document must be an instance of Nokogiri::XML::Document"; - throw getRuntime().newArgumentError(msg); - } - - XmlDocument xmlDoc = (XmlDocument)doc; - String str = rubyStringToString(content); - Node attr = xmlDoc.getDocument().createAttribute(str); - setNode(context, attr); + XmlDocument xmlDoc = (XmlDocument)doc; + String str = rubyStringToString(content); + Node attr = xmlDoc.getDocument().createAttribute(str); + setNode(context.runtime, attr); + } + + + // this method is called from XmlNode.setNode() + // if the node is attribute, and its name has prefix "xml" + // the default namespace should be registered for this attribute + void + setNamespaceIfNecessary(Ruby runtime) + { + if ("xml".equals(node.getPrefix())) { + XmlNamespace.createDefaultNamespace(runtime, node); } - - - // this method is called from XmlNode.setNode() - // if the node is attribute, and its name has prefix "xml" - // the default namespace should be registered for this attribute - void setNamespaceIfNecessary(Ruby runtime) { - if ("xml".equals(node.getPrefix())) { - XmlNamespace.createDefaultNamespace(runtime, node); - } + } + + @Override + @JRubyMethod(name = {"content", "value", "to_s"}) + public IRubyObject + content(ThreadContext context) + { + if (content != null && !content.isNil()) { return content; } + if (node == null) { return context.getRuntime().getNil(); } + String attrValue = ((Attr)node).getValue(); + if (attrValue == null) { return context.getRuntime().getNil(); } + return RubyString.newString(context.getRuntime(), attrValue); + } + + @JRubyMethod(name = {"value=", "content="}) + public IRubyObject + value_set(ThreadContext context, IRubyObject content) + { + Attr attr = (Attr) node; + if (content != null && !content.isNil()) { + attr.setValue(rubyStringToString(XmlNode.encode_special_chars(context, content))); } - - private boolean isHtmlBooleanAttr() { - String name = node.getNodeName().toLowerCase(); - - for(String s : HTML_BOOLEAN_ATTRS) { - if(s.equals(name)) return true; - } - - return false; - } - - @Override - @JRubyMethod(name = {"content", "value", "to_s"}) - public IRubyObject content(ThreadContext context) { - if (content != null && !content.isNil()) return content; - if (node == null) return context.getRuntime().getNil(); - String attrValue = ((Attr)node).getValue(); - if (attrValue == null) return context.getRuntime().getNil(); - return RubyString.newString(context.getRuntime(), attrValue); - } - - @JRubyMethod(name = {"value=", "content="}) - public IRubyObject value_set(ThreadContext context, IRubyObject content){ - Attr attr = (Attr) node; - if (content != null && !content.isNil()) { - attr.setValue(rubyStringToString(XmlNode.encode_special_chars(context, content))); - } - setContent(content); - return content; - } - - @Override - protected IRubyObject getNodeName(ThreadContext context) { - if (name != null) return name; - String attrName = ((Attr)node).getName(); - if (!(doc instanceof HtmlDocument) && node.getNamespaceURI() != null) { - attrName = NokogiriHelpers.getLocalPart(attrName); - } - return attrName == null ? context.getRuntime().getNil() : RubyString.newString(context.getRuntime(), attrName); + setContent(content); + return content; + } + + @Override + protected IRubyObject + getNodeName(ThreadContext context) + { + if (name != null) { return name; } + + String attrName = ((Attr) node).getName(); + if (attrName == null) { return context.nil; } + + if (node.getNamespaceURI() != null && !(document(context.runtime) instanceof Html4Document)) { + attrName = NokogiriHelpers.getLocalPart(attrName); + if (attrName == null) { return context.nil; } } - @Override - public void accept(ThreadContext context, SaveContextVisitor visitor) { - visitor.enter((Attr)node); - visitor.leave((Attr)node); - } - - private boolean isHtml(ThreadContext context) { - return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document")); - } - - @Override - public IRubyObject unlink(ThreadContext context) { - Attr attr = (Attr) node; - Element parent = attr.getOwnerElement(); - parent.removeAttributeNode(attr); - - return this; - } + return name = RubyString.newString(context.runtime, attrName); + } + + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + visitor.enter((Attr)node); + visitor.leave((Attr)node); + } + + private boolean + isHtml(ThreadContext context) + { + return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.getRuntime(), + "Nokogiri::HTML4::Document")); + } + + @Override + public IRubyObject + unlink(ThreadContext context) + { + Attr attr = (Attr) node; + Element parent = attr.getOwnerElement(); + parent.removeAttributeNode(attr); + + return this; + } } diff --git a/ext/java/nokogiri/XmlAttributeDecl.java b/ext/java/nokogiri/XmlAttributeDecl.java index cca3cee25e..242fc804fd 100644 --- a/ext/java/nokogiri/XmlAttributeDecl.java +++ b/ext/java/nokogiri/XmlAttributeDecl.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; @@ -49,82 +17,103 @@ * * @author Patrick Mahoney */ -@JRubyClass(name="Nokogiri::XML::AttributeDecl", parent="Nokogiri::XML::Node") -public class XmlAttributeDecl extends XmlNode { - - public XmlAttributeDecl(Ruby ruby, RubyClass klass) { - super(ruby, klass); - throw ruby.newRuntimeError("node required"); - } - - /** - * Initialize based on an attributeDecl node from a NekoDTD parsed - * DTD. - * - * Internally, XmlAttributeDecl combines these into a single node. - */ - public XmlAttributeDecl(Ruby ruby, RubyClass klass, Node attrDeclNode) { - super(ruby, klass, attrDeclNode); - } - - public static IRubyObject create(ThreadContext context, Node attrDeclNode) { - XmlAttributeDecl self = - new XmlAttributeDecl(context.getRuntime(), - getNokogiriClass(context.getRuntime(), "Nokogiri::XML::AttributeDecl"), - attrDeclNode); - return self; - } - - @Override - @JRubyMethod - public IRubyObject node_name(ThreadContext context) { - return attribute_name(context); - } - - @Override - @JRubyMethod(name = "node_name=") - public IRubyObject node_name_set(ThreadContext context, IRubyObject name) { - throw context.getRuntime() - .newRuntimeError("cannot change name of DTD decl"); - } - - public IRubyObject element_name(ThreadContext context) { - return getAttribute(context, "ename"); - } - - public IRubyObject attribute_name(ThreadContext context) { - return getAttribute(context, "aname"); - } - - @JRubyMethod - public IRubyObject attribute_type(ThreadContext context) { - return getAttribute(context, "atype"); +@JRubyClass(name = "Nokogiri::XML::AttributeDecl", parent = "Nokogiri::XML::Node") +public class XmlAttributeDecl extends XmlNode +{ + private static final long serialVersionUID = 1L; + + public + XmlAttributeDecl(Ruby ruby, RubyClass klass) + { + super(ruby, klass); + throw ruby.newRuntimeError("node required"); + } + + /** + * Initialize based on an attributeDecl node from a NekoDTD parsed + * DTD. + * + * Internally, XmlAttributeDecl combines these into a single node. + */ + public + XmlAttributeDecl(Ruby ruby, RubyClass klass, Node attrDeclNode) + { + super(ruby, klass, attrDeclNode); + } + + static XmlAttributeDecl + create(ThreadContext context, Node attrDeclNode) + { + return new XmlAttributeDecl(context.runtime, + getNokogiriClass(context.runtime, "Nokogiri::XML::AttributeDecl"), + attrDeclNode + ); + } + + @Override + @JRubyMethod + public IRubyObject + node_name(ThreadContext context) + { + return attribute_name(context); + } + + @Override + @JRubyMethod(name = "node_name=") + public IRubyObject + node_name_set(ThreadContext context, IRubyObject name) + { + throw context.runtime.newRuntimeError("cannot change name of DTD decl"); + } + + public IRubyObject + element_name(ThreadContext context) + { + return getAttribute(context, "ename"); + } + + public IRubyObject + attribute_name(ThreadContext context) + { + return getAttribute(context, "aname"); + } + + @JRubyMethod + public IRubyObject + attribute_type(ThreadContext context) + { + return getAttribute(context, "atype"); + } + + @JRubyMethod(name = "default") + public IRubyObject + default_value(ThreadContext context) + { + return getAttribute(context, "default"); + } + + /** + * FIXME: will enumerations all be of the simple (val1|val2|val3) + * type string? + */ + @JRubyMethod + public IRubyObject + enumeration(ThreadContext context) + { + final String atype = ((Element) node).getAttribute("atype"); + + if (atype != null && atype.length() != 0 && atype.charAt(0) == '(') { + // removed enclosing parens + String valueStr = atype.substring(1, atype.length() - 1); + String[] values = valueStr.split("\\|"); + RubyArray enumVals = RubyArray.newArray(context.runtime, values.length); + for (int i = 0; i < values.length; i++) { + enumVals.append(context.runtime.newString(values[i])); + } + return enumVals; } - @JRubyMethod(name="default") - public IRubyObject default_value(ThreadContext context) { - return getAttribute(context, "default"); - } - - /** - * FIXME: will enumerations all be of the simple (val1|val2|val3) - * type string? - */ - @JRubyMethod - public IRubyObject enumeration(ThreadContext context) { - RubyArray enumVals = RubyArray.newArray(context.getRuntime()); - String atype = ((Element)node).getAttribute("atype"); - - if (atype != null && atype.length() != 0 && atype.charAt(0) == '(') { - // removed enclosing parens - String valueStr = atype.substring(1, atype.length() - 1); - String[] values = valueStr.split("\\|"); - for (int i = 0; i < values.length; i++) { - enumVals.append(context.getRuntime().newString(values[i])); - } - } - - return enumVals; - } + return context.runtime.newEmptyArray(); + } } diff --git a/ext/java/nokogiri/XmlCdata.java b/ext/java/nokogiri/XmlCdata.java index 0980378db3..31b8f420c6 100644 --- a/ext/java/nokogiri/XmlCdata.java +++ b/ext/java/nokogiri/XmlCdata.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.rubyStringToString; @@ -51,33 +19,42 @@ * @author sergio * @author Yoko Harada */ - -@JRubyClass(name="Nokogiri::XML::CDATA", parent="Nokogiri::XML::Text") -public class XmlCdata extends XmlText { - public XmlCdata(Ruby ruby, RubyClass rubyClass) { - super(ruby, rubyClass); - } - - public XmlCdata(Ruby ruby, RubyClass rubyClass, Node node) { - super(ruby, rubyClass, node); - } - - @Override - protected void init(ThreadContext context, IRubyObject[] args) { - if (args.length < 2) { - throw getRuntime().newArgumentError(args.length, 2); - } - IRubyObject doc = args[0]; - content = args[1]; - XmlDocument xmlDoc =(XmlDocument) ((XmlNode) doc).document(context); - Document document = xmlDoc.getDocument(); - Node node = document.createCDATASection((content.isNil()) ? null : rubyStringToString(content)); - setNode(context, node); - } - - @Override - public void accept(ThreadContext context, SaveContextVisitor visitor) { - visitor.enter((CDATASection)node); - visitor.leave((CDATASection)node); +@JRubyClass(name = "Nokogiri::XML::CDATA", parent = "Nokogiri::XML::Text") +public class XmlCdata extends XmlText +{ + private static final long serialVersionUID = 1L; + + public + XmlCdata(Ruby ruby, RubyClass rubyClass) + { + super(ruby, rubyClass); + } + + public + XmlCdata(Ruby ruby, RubyClass rubyClass, Node node) + { + super(ruby, rubyClass, node); + } + + @Override + protected void + init(ThreadContext context, IRubyObject[] args) + { + if (args.length < 2) { + throw getRuntime().newArgumentError(args.length, 2); } + IRubyObject doc = args[0]; + content = args[1]; + Document document = ((XmlNode) doc).getOwnerDocument(); + Node node = document.createCDATASection(rubyStringToString(content)); + setNode(context.runtime, node); + } + + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + visitor.enter((CDATASection)node); + visitor.leave((CDATASection)node); + } } diff --git a/ext/java/nokogiri/XmlComment.java b/ext/java/nokogiri/XmlComment.java index 7f6445fe29..f77a91e366 100644 --- a/ext/java/nokogiri/XmlComment.java +++ b/ext/java/nokogiri/XmlComment.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.rubyStringToString; @@ -50,48 +18,60 @@ * @author sergio * @author Yoko Harada */ +@JRubyClass(name = "Nokogiri::XML::Comment", parent = "Nokogiri::XML::CharacterData") +public class XmlComment extends XmlNode +{ + private static final long serialVersionUID = 1L; -@JRubyClass(name="Nokogiri::XML::Comment", parent="Nokogiri::XML::CharacterData") -public class XmlComment extends XmlNode { - public XmlComment(Ruby ruby, RubyClass rubyClass, Node node) { - super(ruby, rubyClass, node); - } + public + XmlComment(Ruby ruby, RubyClass rubyClass, Node node) + { + super(ruby, rubyClass, node); + } + + public + XmlComment(Ruby runtime, RubyClass klass) + { + super(runtime, klass); + } - public XmlComment(Ruby runtime, RubyClass klass) { - super(runtime, klass); + @Override + protected void + init(ThreadContext context, IRubyObject[] args) + { + if (args.length < 2) { + throw getRuntime().newArgumentError(args.length, 2); } - @Override - protected void init(ThreadContext context, IRubyObject[] args) { - if (args.length < 2) - throw getRuntime().newArgumentError(args.length, 2); + IRubyObject doc = args[0]; + IRubyObject text = args[1]; - IRubyObject doc = args[0]; - IRubyObject text = args[1]; + XmlDocument xmlDoc; + if (doc instanceof XmlDocument) { + xmlDoc = (XmlDocument) doc; - XmlDocument xmlDoc; - if (doc instanceof XmlDocument) { - xmlDoc = (XmlDocument) doc; - - } else if (doc instanceof XmlNode) { - XmlNode xmlNode = (XmlNode) doc; - xmlDoc = (XmlDocument)xmlNode.document(context); - } else { - throw getRuntime().newArgumentError("first argument must be a XML::Document or XML::Node"); - } - if (xmlDoc != null) { - Document document = xmlDoc.getDocument(); - Node node = document.createComment(rubyStringToString(text)); - setNode(context, node); - } + } else if (doc instanceof XmlNode) { + XmlNode xmlNode = (XmlNode) doc; + xmlDoc = (XmlDocument)xmlNode.document(context); + } else { + throw getRuntime().newArgumentError("first argument must be a XML::Document or XML::Node"); } + if (xmlDoc != null) { + Document document = xmlDoc.getDocument(); + Node node = document.createComment(rubyStringToString(text)); + setNode(context.runtime, node); + } + } - @Override - public boolean isComment() { return true; } + @Override + public boolean + isComment() { return true; } - @Override - public void accept(ThreadContext context, SaveContextVisitor visitor) { - visitor.enter((Comment)node); - visitor.leave((Comment)node); - } + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + visitor.enter((Comment)node); + visitor.leave((Comment)node); + } } diff --git a/ext/java/nokogiri/XmlDocument.java b/ext/java/nokogiri/XmlDocument.java index 152d2e9efd..3141ae28c4 100644 --- a/ext/java/nokogiri/XmlDocument.java +++ b/ext/java/nokogiri/XmlDocument.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2014: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.clearXpathContext; @@ -39,25 +7,28 @@ import static nokogiri.internals.NokogiriHelpers.rubyStringToString; import static nokogiri.internals.NokogiriHelpers.stringOrNil; -import java.io.UnsupportedEncodingException; import java.util.List; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import org.jcodings.specific.USASCIIEncoding; +import org.jcodings.specific.UTF8Encoding; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyFixnum; -import org.jruby.RubyNil; +import org.jruby.RubyString; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; +import org.jruby.exceptions.RaiseException; import org.jruby.javasupport.JavaUtil; -import org.jruby.runtime.Arity; import org.jruby.runtime.Block; import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.Visibility; import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.ByteList; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.DocumentType; @@ -80,581 +51,655 @@ * @author Yoko Harada * @author John Shahid */ - -@JRubyClass(name="Nokogiri::XML::Document", parent="Nokogiri::XML::Node") -public class XmlDocument extends XmlNode { - private NokogiriNamespaceCache nsCache; - - /* UserData keys for storing extra info in the document node. */ - public final static String DTD_RAW_DOCUMENT = "DTD_RAW_DOCUMENT"; - public final static String DTD_INTERNAL_SUBSET = "DTD_INTERNAL_SUBSET"; - public final static String DTD_EXTERNAL_SUBSET = "DTD_EXTERNAL_SUBSET"; - - /* DocumentBuilderFactory implementation class name. This needs to set a classloader into it. - * Setting an appropriate classloader resolves issue 380. - */ - private static final String DOCUMENTBUILDERFACTORY_IMPLE_NAME = "org.apache.xerces.jaxp.DocumentBuilderFactoryImpl"; - - private static boolean substituteEntities = false; - private static boolean loadExternalSubset = false; // TODO: Verify this. - - /** cache variables */ - protected IRubyObject encoding = null; - protected IRubyObject url = null; - - public XmlDocument(Ruby ruby, RubyClass klazz) { - super(ruby, klazz, createNewDocument()); - } - - public XmlDocument(Ruby ruby, Document document) { - this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::Document"), document); - } - - public XmlDocument(Ruby ruby, RubyClass klass, Document document) { - super(ruby, klass, document); - initializeNamespaceCacheIfNecessary(); - createAndCacheNamespaces(ruby, document.getDocumentElement()); - stabilizeTextContent(document); - setInstanceVariable("@decorators", ruby.getNil()); - } - - public void setDocumentNode(ThreadContext context, Node node) { - super.setNode(context, node); - initializeNamespaceCacheIfNecessary(); - Ruby runtime = context.getRuntime(); - if (node != null) { - Document document = (Document)node; - stabilizeTextContent(document); - createAndCacheNamespaces(runtime, document.getDocumentElement()); +@JRubyClass(name = "Nokogiri::XML::Document", parent = "Nokogiri::XML::Node") +public class XmlDocument extends XmlNode +{ + private static final long serialVersionUID = 1L; + + private NokogiriNamespaceCache nsCache; + + /* UserData keys for storing extra info in the document node. */ + public final static String DTD_RAW_DOCUMENT = "DTD_RAW_DOCUMENT"; + public final static String DTD_INTERNAL_SUBSET = "DTD_INTERNAL_SUBSET"; + public final static String DTD_EXTERNAL_SUBSET = "DTD_EXTERNAL_SUBSET"; + + /* DocumentBuilderFactory implementation class name. This needs to set a classloader into it. + * Setting an appropriate classloader resolves issue 380. + */ + private static final String DOCUMENTBUILDERFACTORY_IMPLE_NAME = "org.apache.xerces.jaxp.DocumentBuilderFactoryImpl"; + + private static final ByteList DOCUMENT = ByteList.create("document"); + static { DOCUMENT.setEncoding(USASCIIEncoding.INSTANCE); } + + private static boolean substituteEntities = false; + private static boolean loadExternalSubset = false; // TODO: Verify this. + + /** cache variables */ + protected IRubyObject encoding; + protected IRubyObject url; + + public + XmlDocument(Ruby runtime, RubyClass klazz) + { + super(runtime, klazz, createNewDocument(runtime)); + } + + public + XmlDocument(Ruby runtime, Document document) + { + this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document); + } + + public + XmlDocument(Ruby runtime, RubyClass klass, Document document) + { + super(runtime, klass, document); + init(runtime, document); + } + + void + init(Ruby runtime, Document document) + { + stabilizeTextContent(document); + if (document.getDocumentElement() != null) { + createAndCacheNamespaces(runtime, document.getDocumentElement()); + } + setInstanceVariable("@decorators", runtime.getNil()); + } + + public final void + setDocumentNode(Ruby runtime, Document node) + { + super.setNode(runtime, node); + if (node != null) { init(runtime, node); } + else { setInstanceVariable("@decorators", runtime.getNil()); } + } + + public void + setEncoding(IRubyObject encoding) + { + this.encoding = encoding; + } + + public IRubyObject + getEncoding() + { + return encoding; + } + + // not sure, but like attribute values, text value will be lost + // unless it is referred once before this document is used. + // this seems to happen only when the fragment is parsed from Node#in_context. + protected static void + stabilizeTextContent(Document document) + { + if (document.getDocumentElement() != null) { document.getDocumentElement().getTextContent(); } + } + + private static void + createAndCacheNamespaces(Ruby runtime, Node node) + { + if (node.hasAttributes()) { + NamedNodeMap nodeMap = node.getAttributes(); + for (int i = 0; i < nodeMap.getLength(); i++) { + Node n = nodeMap.item(i); + if (n instanceof Attr) { + Attr attr = (Attr) n; + stabilizeAttr(attr); + if (isNamespace(attr.getName())) { + // create and cache + XmlNamespace.createFromAttr(runtime, attr); + } } - setInstanceVariable("@decorators", runtime.getNil()); - } - - public void setEncoding(IRubyObject encoding) { - this.encoding = encoding; - } - - public IRubyObject getEncoding() { - return encoding; - } - - // not sure, but like attribute values, text value will be lost - // unless it is referred once before this document is used. - // this seems to happen only when the fragment is parsed from Node#in_context. - protected void stabilizeTextContent(Document document) { - if (document.getDocumentElement() != null) document.getDocumentElement().getTextContent(); - } - - private void createAndCacheNamespaces(Ruby ruby, Node node) { - if (node == null) return; - if (node.hasAttributes()) { - NamedNodeMap nodeMap = node.getAttributes(); - for (int i=0; i 0 && !(args[0].isNil())) { - mode = RubyFixnum.fix2int(args[0]); - } - if (args.length > 1 ) { - if (!args[1].isNil() && !(args[1] instanceof List)) { - throw context.getRuntime().newTypeError("Expected array"); - } - if (!args[1].isNil()) { - inclusive_namespace = ((RubyArray)args[1]) - .join(context, context.getRuntime().newString(" ")) - .asString() - .asJavaString(); // OMG I wish I knew JRuby better, this is ugly - } - } - if (args.length > 2) { - with_comments = args[2].isTrue(); - } - String algorithmURI = null; - switch(mode) { - case 0: // XML_C14N_1_0 - if (with_comments) algorithmURI = Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS; - else algorithmURI = Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS; - break; - case 1: // XML_C14N_EXCLUSIVE_1_0 - if (with_comments) algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS; - else algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS; - break; - case 2: // XML_C14N_1_1 = 2 - if (with_comments) algorithmURI = Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS; - else algorithmURI = Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS; - } - try { - Canonicalizer canonicalizer = Canonicalizer.getInstance(algorithmURI); - XmlNode startingNode = getStartingNode(block); - byte[] result; - CanonicalFilter filter = new CanonicalFilter(context, block); - if (inclusive_namespace == null) { - result = canonicalizer.canonicalizeSubtree(startingNode.getNode(), filter); - } else { - result = canonicalizer.canonicalizeSubtree(startingNode.getNode(), inclusive_namespace, filter); - } - String resultString = new String(result, "UTF-8"); - return stringOrNil(context.getRuntime(), resultString); - } catch (CanonicalizationException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (UnsupportedEncodingException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - return context.getRuntime().getNil(); - } - - private XmlNode getStartingNode(Block block) { - if (block.isGiven()) { - if (block.getBinding().getSelf() instanceof XmlNode) { - return (XmlNode)block.getBinding().getSelf(); - } - } - return this; - } - - public void resetNamespaceCache(ThreadContext context) { - nsCache = new NokogiriNamespaceCache(); - createAndCacheNamespaces(context.getRuntime(), node); - } + dtd = XmlDtd.newEmpty(context.runtime, document, name, publicId, systemId); + } else { + dtd = context.nil; + } + + setInternalSubset(dtd); + } + + return dtd; + } + + /** + * Assumes XmlNode#internal_subset() has returned nil. (i.e. there + * is not already an internal subset). + */ + public IRubyObject + createInternalSubset(ThreadContext context, + IRubyObject name, + IRubyObject external_id, + IRubyObject system_id) + { + XmlDtd dtd = XmlDtd.newEmpty(context.runtime, getDocument(), name, external_id, system_id); + setInternalSubset(dtd); + return dtd; + } + + protected void + setInternalSubset(IRubyObject data) + { + node.setUserData(DTD_INTERNAL_SUBSET, data, null); + } + + public IRubyObject + getExternalSubset(ThreadContext context) + { + IRubyObject dtd = (IRubyObject) node.getUserData(DTD_EXTERNAL_SUBSET); + + if (dtd == null) { return context.nil; } + return dtd; + } + + /** + * Assumes XmlNode#external_subset() has returned nil. (i.e. there + * is not already an external subset). + */ + public IRubyObject + createExternalSubset(ThreadContext context, + IRubyObject name, + IRubyObject external_id, + IRubyObject system_id) + { + XmlDtd dtd = XmlDtd.newEmpty(context.runtime, getDocument(), name, external_id, system_id); + setExternalSubset(dtd); + return dtd; + } + + protected void + setExternalSubset(IRubyObject data) + { + node.setUserData(DTD_EXTERNAL_SUBSET, data, null); + } + + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + Document document = getDocument(); + visitor.enter(document); + NodeList children = document.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + Node child = children.item(i); + short type = child.getNodeType(); + if (type == Node.COMMENT_NODE) { + XmlComment xmlComment = (XmlComment) getCachedNodeOrCreate(context.runtime, child); + xmlComment.accept(context, visitor); + } else if (type == Node.DOCUMENT_TYPE_NODE) { + XmlDtd xmlDtd = (XmlDtd) getCachedNodeOrCreate(context.runtime, child); + xmlDtd.accept(context, visitor); + } else if (type == Node.PROCESSING_INSTRUCTION_NODE) { + XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) getCachedNodeOrCreate(context.runtime, + child); + xmlProcessingInstruction.accept(context, visitor); + } else if (type == Node.TEXT_NODE) { + XmlText xmlText = (XmlText) getCachedNodeOrCreate(context.runtime, child); + xmlText.accept(context, visitor); + } else if (type == Node.ELEMENT_NODE) { + XmlElement xmlElement = (XmlElement) getCachedNodeOrCreate(context.runtime, child); + xmlElement.accept(context, visitor); + } + } + visitor.leave(document); + } + + @JRubyMethod(meta = true) + public static IRubyObject + wrap(ThreadContext context, IRubyObject klass, IRubyObject arg) + { + XmlDocument xmlDocument = new XmlDocument(context.runtime, (RubyClass) klass, arg.toJava(Document.class)); + Helpers.invoke(context, xmlDocument, "initialize"); + return xmlDocument; + } + + @Deprecated + @JRubyMethod(meta = true, visibility = Visibility.PRIVATE) + public static IRubyObject + wrapJavaDocument(ThreadContext context, IRubyObject klass, IRubyObject arg) + { + return wrap(context, klass, arg); + } + + @Deprecated // default to_java works (due inherited from XmlNode#toJava) + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + toJavaDocument(ThreadContext context) + { + return JavaUtil.convertJavaToUsableRubyObject(context.getRuntime(), node); + } + + /* call-seq: + * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false) + * doc.canonicalize { |obj, parent| ... } + * + * Canonicalize a document and return the results. Takes an optional block + * that takes two parameters: the +obj+ and that node's +parent+. + * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace + * The block must return a non-nil, non-false value if the +obj+ passed in + * should be included in the canonicalized document. + */ + @JRubyMethod(optional = 3) + public IRubyObject + canonicalize(ThreadContext context, IRubyObject[] args, Block block) + { + int mode = 0; + String inclusive_namespace = null; + Boolean with_comments = false; + if (args.length > 0 && !(args[0].isNil())) { + mode = RubyFixnum.fix2int(args[0]); + } + if (args.length > 1) { + if (!args[1].isNil() && !(args[1] instanceof List)) { + throw context.runtime.newTypeError("Expected array"); + } + if (!args[1].isNil()) { + inclusive_namespace = ((RubyArray)args[1]) + .join(context, context.runtime.newString(" ")) + .asString() + .asJavaString(); // OMG I wish I knew JRuby better, this is ugly + } + } + if (args.length > 2) { + with_comments = args[2].isTrue(); + } + String algorithmURI = null; + switch (mode) { + case 0: // XML_C14N_1_0 + if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS; } + else { algorithmURI = Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS; } + break; + case 1: // XML_C14N_EXCLUSIVE_1_0 + if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS; } + else { algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS; } + break; + case 2: // XML_C14N_1_1 = 2 + if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS; } + else { algorithmURI = Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS; } + } + try { + Canonicalizer canonicalizer = Canonicalizer.getInstance(algorithmURI); + XmlNode startingNode = getStartingNode(block); + byte[] result; + CanonicalFilter filter = new CanonicalFilter(context, block); + if (inclusive_namespace == null) { + result = canonicalizer.canonicalizeSubtree(startingNode.getNode(), filter); + } else { + result = canonicalizer.canonicalizeSubtree(startingNode.getNode(), inclusive_namespace, filter); + } + return RubyString.newString(context.runtime, new ByteList(result, UTF8Encoding.INSTANCE)); + } catch (Exception e) { + throw context.getRuntime().newRuntimeError(e.getMessage()); + } + } + + private XmlNode + getStartingNode(Block block) + { + if (block.isGiven()) { + IRubyObject boundSelf = block.getBinding().getSelf(); + if (boundSelf instanceof XmlNode) { return (XmlNode) boundSelf; } + } + return this; + } + + public void + resetNamespaceCache(ThreadContext context) + { + nsCache = new NokogiriNamespaceCache(); + createAndCacheNamespaces(context.runtime, node); + } } diff --git a/ext/java/nokogiri/XmlDocumentFragment.java b/ext/java/nokogiri/XmlDocumentFragment.java index 07df539aa4..36578acad8 100644 --- a/ext/java/nokogiri/XmlDocumentFragment.java +++ b/ext/java/nokogiri/XmlDocumentFragment.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getLocalNameForNamespace; @@ -49,6 +17,7 @@ import org.jruby.RubyString; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Block; import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; @@ -58,146 +27,137 @@ /** * Class for Nokogiri::XML::DocumentFragment - * + * * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::DocumentFragment", parent="Nokogiri::XML::Node") -public class XmlDocumentFragment extends XmlNode { - private XmlElement fragmentContext = null; - - public XmlDocumentFragment(Ruby ruby) { - this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DocumentFragment")); +@JRubyClass(name = "Nokogiri::XML::DocumentFragment", parent = "Nokogiri::XML::Node") +public class XmlDocumentFragment extends XmlNode +{ + private static final long serialVersionUID = 1L; + + public + XmlDocumentFragment(Ruby ruby) + { + this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DocumentFragment")); + } + + public + XmlDocumentFragment(Ruby ruby, RubyClass klazz) + { + super(ruby, klazz); + } + + @JRubyMethod(name = "new", meta = true, required = 1, optional = 3) + public static IRubyObject + rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args, Block block) + { + if (args.length < 1) { + throw context.runtime.newArgumentError(args.length, 1); } - public XmlDocumentFragment(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); - } - - @JRubyMethod(name="new", meta = true, required=1, optional=2) - public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args) { - - if(args.length < 1) { - throw context.getRuntime().newArgumentError(args.length, 1); - } - - if(!(args[0] instanceof XmlDocument)){ - throw context.getRuntime().newArgumentError("first parameter must be a Nokogiri::XML::Document instance"); - } - - XmlDocument doc = (XmlDocument) args[0]; - - // make wellformed fragment, ignore invalid namespace, or add appropriate namespace to parse - if (args.length > 1 && args[1] instanceof RubyString) { - if (XmlDocumentFragment.isTag((RubyString)args[1])) { - args[1] = RubyString.newString(context.getRuntime(), addNamespaceDeclIfNeeded(doc, rubyStringToString(args[1]))); - } - } - - XmlDocumentFragment fragment = (XmlDocumentFragment) NokogiriService.XML_DOCUMENT_FRAGMENT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)cls); - fragment.setDocument(context, doc); - fragment.setNode(context, doc.getDocument().createDocumentFragment()); - - //TODO: Get namespace definitions from doc. - if (args.length == 3 && args[2] != null && args[2] instanceof XmlElement) { - fragment.fragmentContext = (XmlElement)args[2]; - } - Helpers.invoke(context, fragment, "initialize", args); - return fragment; + if (!(args[0] instanceof XmlDocument)) { + throw context.runtime.newArgumentError("first parameter must be a Nokogiri::XML::Document instance"); } - private static final ByteList TAG_BEG = ByteList.create("<"); - private static final ByteList TAG_END = ByteList.create(">"); - - private static boolean isTag(final RubyString str) { - return str.getByteList().startsWith(TAG_BEG) && str.getByteList().endsWith(TAG_END); - } + XmlDocument doc = (XmlDocument) args[0]; - private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) { - if (isNamespace(qName.intern())) return true; - for (int i=0; i < nodeMap.getLength(); i++) { - Attr attr = (Attr)nodeMap.item(i); - if (isNamespace(attr.getNodeName())) { - String localPart = getLocalNameForNamespace(attr.getNodeName()); - if (getPrefix(qName).equals(localPart)) { - return true; - } - } - } - return false; + // make wellformed fragment, ignore invalid namespace, or add appropriate namespace to parse + if (args.length > 1 && args[1] instanceof RubyString) { + final RubyString arg1 = (RubyString) args[1]; + if (XmlDocumentFragment.isTag(arg1)) { + args[1] = RubyString.newString(context.runtime, addNamespaceDeclIfNeeded(doc, rubyStringToString(arg1))); + } } - private static final Pattern QNAME_RE = Pattern.compile("[^\\s]+:[^=\\s]+"); - private static final Pattern START_TAG_RE = Pattern.compile("<[^]+>"); - - private static String addNamespaceDeclIfNeeded(XmlDocument doc, String tags) { - if (doc.getDocument() == null) return tags; - if (doc.getDocument().getDocumentElement() == null) return tags; - Matcher matcher = START_TAG_RE.matcher(tags); - Map rewriteTable = null; - while (matcher.find()) { - String start_tag = matcher.group(); - Matcher matcher2 = QNAME_RE.matcher(start_tag); - while (matcher2.find()) { - String qName = matcher2.group(); - NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes(); - if (isNamespaceDefined(qName, nodeMap)) { - CharSequence namespaceDecl = getNamespaceDecl(getPrefix(qName), nodeMap); - if (namespaceDecl != null) { - if (rewriteTable == null) rewriteTable = new HashMap(8, 1); - StringBuilder str = new StringBuilder(qName.length() + namespaceDecl.length() + 3); - String key = str.append('<').append(qName).append('>').toString(); - str.setCharAt(key.length() - 1, ' '); // (last) '>' -> ' ' - rewriteTable.put(key, str.append(namespaceDecl).append('>')); - } - } - } + XmlDocumentFragment fragment = (XmlDocumentFragment) NokogiriService.XML_DOCUMENT_FRAGMENT_ALLOCATOR.allocate( + context.runtime, (RubyClass)cls); + fragment.setDocument(context, doc); + fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment()); + + Helpers.invoke(context, fragment, "initialize", args, block); + return fragment; + } + + private static final ByteList TAG_BEG = ByteList.create("<"); + private static final ByteList TAG_END = ByteList.create(">"); + + private static boolean + isTag(final RubyString str) + { + return str.getByteList().startsWith(TAG_BEG) && str.getByteList().endsWith(TAG_END); + } + + private static boolean + isNamespaceDefined(String qName, NamedNodeMap nodeMap) + { + if (isNamespace(qName.intern())) { return true; } + for (int i = 0; i < nodeMap.getLength(); i++) { + Attr attr = (Attr)nodeMap.item(i); + if (isNamespace(attr.getNodeName())) { + String localPart = getLocalNameForNamespace(attr.getNodeName(), null); + if (getPrefix(qName).equals(localPart)) { + return true; } - if (rewriteTable != null) { - for (Map.Entry e : rewriteTable.entrySet()) { - tags = tags.replace(e.getKey(), e.getValue()); - } - } - - return tags; + } } - - private static CharSequence getNamespaceDecl(final String prefix, NamedNodeMap nodeMap) { - for (int i=0; i < nodeMap.getLength(); i++) { - Attr attr = (Attr) nodeMap.item(i); - if (prefix.equals(attr.getLocalName())) { - return new StringBuilder(). - append(attr.getName()).append('=').append('"').append(attr.getValue()).append('"'); - } + return false; + } + + private static final Pattern QNAME_RE = Pattern.compile("[^\\s]+:[^=\\s]+"); + private static final Pattern START_TAG_RE = Pattern.compile("<[^]+>"); + + private static String + addNamespaceDeclIfNeeded(XmlDocument doc, String tags) + { + if (doc.getDocument() == null) { return tags; } + if (doc.getDocument().getDocumentElement() == null) { return tags; } + Matcher matcher = START_TAG_RE.matcher(tags); + Map rewriteTable = null; + while (matcher.find()) { + String start_tag = matcher.group(); + Matcher matcher2 = QNAME_RE.matcher(start_tag); + while (matcher2.find()) { + String qName = matcher2.group(); + NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes(); + if (isNamespaceDefined(qName, nodeMap)) { + CharSequence namespaceDecl = getNamespaceDecl(getPrefix(qName), nodeMap); + if (namespaceDecl != null) { + if (rewriteTable == null) { rewriteTable = new HashMap(8, 1); } + StringBuilder str = new StringBuilder(qName.length() + namespaceDecl.length() + 3); + String key = str.append('<').append(qName).append('>').toString(); + str.setCharAt(key.length() - 1, ' '); // (last) '>' -> ' ' + rewriteTable.put(key, str.append(namespaceDecl).append('>')); + } } - return null; + } } - - public XmlElement getFragmentContext() { - return fragmentContext; - } - - //@Override - public void add_child(ThreadContext context, XmlNode child) { - // Some magic for DocumentFragment - - Ruby ruby = context.getRuntime(); - XmlNodeSet children = (XmlNodeSet) child.children(context); - - long length = children.length(); - - RubyArray childrenArray = children.convertToArray(); - - if(length != 0) { - for(int i = 0; i < length; i++) { - XmlNode item = (XmlNode) ((XmlNode) childrenArray.aref(ruby.newFixnum(i))).dup_implementation(context, true); - add_child(context, item); - } - } + if (rewriteTable != null) { + for (Map.Entry e : rewriteTable.entrySet()) { + tags = tags.replace(e.getKey(), e.getValue()); + } } - @Override - public void relink_namespace(ThreadContext context) { - ((XmlNodeSet) children(context)).relink_namespace(context); + return tags; + } + + private static CharSequence + getNamespaceDecl(final String prefix, NamedNodeMap nodeMap) + { + for (int i = 0; i < nodeMap.getLength(); i++) { + Attr attr = (Attr) nodeMap.item(i); + if (prefix.equals(attr.getLocalName())) { + return new StringBuilder(). + append(attr.getName()).append('=').append('"').append(attr.getValue()).append('"'); + } } + return null; + } + + @Override + public void + relink_namespace(ThreadContext context) + { + relink_namespace(context, getChildren()); + } } diff --git a/ext/java/nokogiri/XmlDtd.java b/ext/java/nokogiri/XmlDtd.java index 6083454bd5..492fed62c4 100644 --- a/ext/java/nokogiri/XmlDtd.java +++ b/ext/java/nokogiri/XmlDtd.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; @@ -61,424 +29,488 @@ /** * Class for Nokogiri::XML::DTD - * + * * @author sergio * @author Patrick Mahoney * @author Yoko Harada */ +@JRubyClass(name = "Nokogiri::XML::DTD", parent = "Nokogiri::XML::Node") +public class XmlDtd extends XmlNode +{ + private static final long serialVersionUID = 1L; -@JRubyClass(name="Nokogiri::XML::DTD", parent="Nokogiri::XML::Node") -public class XmlDtd extends XmlNode { - /** cache of children, Nokogiri::XML::NodeSet */ - protected IRubyObject children = null; - - /** cache of name => XmlAttributeDecl */ - protected RubyHash attributes = null; - - /** cache of name => XmlElementDecl */ - protected RubyHash elements = null; - - /** cache of name => XmlEntityDecl */ - protected RubyHash entities = null; - - /** cache of name => Nokogiri::XML::Notation */ - protected RubyHash notations = null; - protected RubyClass notationClass; - - /** temporary store of content models before they are added to - * their XmlElementDecl. */ - protected RubyHash contentModels; - - /** node name */ - protected IRubyObject name; - - /** public ID (or external ID) */ - protected IRubyObject pubId; - - /** system ID */ - protected IRubyObject sysId; - - public XmlDtd(Ruby ruby, RubyClass rubyClass) { - super(ruby, rubyClass); - } - - public void setNode(Ruby runtime, Node dtd) { - this.node = dtd; - notationClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::Notation"); - - name = pubId = sysId = runtime.getNil(); - if (dtd == null) return; - - // This is the dtd declaration stored in the document; it - // contains the DTD name (root element) and public and system - // ids. The actual declarations are in the NekoDTD 'dtd' - // variable. I don't know of a way to consolidate the two. - - DocumentType otherDtd = dtd.getOwnerDocument().getDoctype(); - if (otherDtd != null) { - name = stringOrNil(runtime, otherDtd.getNodeName()); - pubId = nonEmptyStringOrNil(runtime, otherDtd.getPublicId()); - sysId = nonEmptyStringOrNil(runtime, otherDtd.getSystemId()); - } - } - - public XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd) { - super(ruby, rubyClass, dtd); - setNode(ruby, dtd); - } - - public static XmlDtd newEmpty(Ruby runtime, - Document doc, - IRubyObject name, - IRubyObject external_id, - IRubyObject system_id) { - - DocumentType placeholder; - if (doc.getDoctype() == null) { - String javaName = NokogiriHelpers.rubyStringToString(name); - String javaExternalId = NokogiriHelpers.rubyStringToString(external_id); - String javaSystemId = NokogiriHelpers.rubyStringToString(system_id); - placeholder = doc.getImplementation().createDocumentType(javaName, javaExternalId, javaSystemId); - doc.appendChild(placeholder); - } else { - placeholder = doc.getDoctype(); - } - // FIXME: what if the document had a doc type, why are we here ? - XmlDtd dtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); - dtd.setNode(runtime, placeholder); - dtd.name = name; - dtd.pubId = external_id; - dtd.sysId = system_id; - return dtd; - } - + /** cache of children, Nokogiri::XML::NodeSet */ + protected IRubyObject children = null; - /** - * Create an unparented element that contains DTD declarations - * parsed from the internal subset attached as user data to - * doc. The attached dtd must be the tree from - * NekoDTD. The owner document of the returned tree will be - * doc. - * - * NekoDTD parser returns a new document node containing elements - * representing the dtd declarations. The plan is to get the root - * element and adopt it into the correct document, stipping the - * Document provided by NekoDTD. - * - */ - public static XmlDtd newFromInternalSubset(Ruby runtime, Document doc) { - Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT); - if (dtdTree_ == null) { - XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); - xmlDtd.setNode(runtime, null); - return xmlDtd; - } - - Node dtdTree = (Node) dtdTree_; - Node dtd = getInternalSubset(dtdTree); - if (dtd == null) { - XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); - xmlDtd.setNode(runtime, null); - return xmlDtd; - } else { - // Import the node into doc so it has the correct owner document. - dtd = doc.importNode(dtd, true); - XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); - xmlDtd.setNode(runtime, dtd); - return xmlDtd; - } - } - - public static IRubyObject newFromExternalSubset(Ruby runtime, Document doc) { - Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT); - if (dtdTree_ == null) { - return runtime.getNil(); - } - - Node dtdTree = (Node) dtdTree_; - Node dtd = getExternalSubset(dtdTree); - if (dtd == null) { - return runtime.getNil(); - } else if (!dtd.hasChildNodes()) { - return runtime.getNil(); - } else { - // Import the node into doc so it has the correct owner document. - dtd = doc.importNode(dtd, true); - XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); - xmlDtd.setNode(runtime, dtd); - return xmlDtd; - } - } + /** cache of name => XmlAttributeDecl */ + protected RubyHash attributes = null; - /* - * dtd is the document node of a NekoDTD tree. - * NekoDTD tree looks like this: - * - *
-     * [#document: null]
-     *   [#comment: ...]
-     *   [#comment: ...]
-     *   [dtd: null]   // a DocumentType; isDTD(node) => false
-     *   [dtd: null]   // root of dtd, an Element node; isDTD(node) => true
-     *     ... decls, content models, etc. ...
-     *     [externalSubset: null] pubid="the pubid" sysid="the sysid"
-     *       ... external subset decls, etc. ...
-     * 
- */ - protected static Node getInternalSubset(Node dtdTree) { - Node root; - for (root = dtdTree.getFirstChild(); ; root = root.getNextSibling()) { - if (root == null) - return null; - else if (isDTD(root)) - return root; // we have second dtd which is root - } - } + /** cache of name => XmlElementDecl */ + protected RubyHash elements = null; - protected static Node getExternalSubset(Node dtdTree) { - Node dtd = getInternalSubset(dtdTree); - if (dtd == null) return null; - for (Node ext = dtd.getFirstChild(); ; ext = ext.getNextSibling()) { - if (ext == null) - return null; - else if (isExternalSubset(ext)) - return ext; - } - } + /** cache of name => XmlEntityDecl */ + protected RubyHash entities = null; - /** - * This overrides the #attributes method defined in - * lib/nokogiri/xml/node.rb. - */ - @JRubyMethod - public IRubyObject attributes(ThreadContext context) { - if (attributes == null) extractDecls(context); + /** cache of name => Nokogiri::XML::Notation */ + protected RubyHash notations = null; + protected RubyClass notationClass; - return attributes; - } + /** temporary store of content models before they are added to + * their XmlElementDecl. */ + protected RubyHash contentModels; - @JRubyMethod - public IRubyObject elements(ThreadContext context) { - if (elements == null) extractDecls(context); + /** node name */ + protected IRubyObject name; - return elements; - } + /** public ID (or external ID) */ + protected IRubyObject pubId; - @JRubyMethod - public IRubyObject entities(ThreadContext context) { - if (entities == null) extractDecls(context); + /** system ID */ + protected IRubyObject sysId; - return entities; - } + public + XmlDtd(Ruby ruby, RubyClass rubyClass) + { + super(ruby, rubyClass); + } - @JRubyMethod - public IRubyObject notations(ThreadContext context) { - if (notations == null) extractDecls(context); + public void + setNode(Ruby runtime, Node dtd) + { + this.node = dtd; + notationClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::Notation"); - return notations; - } + name = pubId = sysId = runtime.getNil(); + if (dtd == null) { return; } - /** - * Our "node" object is as-returned by NekoDTD. The actual - * "children" that we're interested in (Attribute declarations, - * etc.) are a few layers deep. - */ - @Override - @JRubyMethod - public IRubyObject children(ThreadContext context) { - if (children == null) extractDecls(context); - - return children; - } + // This is the dtd declaration stored in the document; it + // contains the DTD name (root element) and public and system + // ids. The actual declarations are in the NekoDTD 'dtd' + // variable. I don't know of a way to consolidate the two. - /** - * Returns the name of the dtd. - */ - @Override - @JRubyMethod - public IRubyObject node_name(ThreadContext context) { - return name; + DocumentType otherDtd = dtd.getOwnerDocument().getDoctype(); + if (otherDtd != null) { + name = stringOrNil(runtime, otherDtd.getNodeName()); + pubId = nonEmptyStringOrNil(runtime, otherDtd.getPublicId()); + sysId = nonEmptyStringOrNil(runtime, otherDtd.getSystemId()); } - - @Override - @JRubyMethod(name = "node_name=") - public IRubyObject node_name_set(ThreadContext context, IRubyObject name) { - throw context.getRuntime() - .newRuntimeError("cannot change name of DTD"); + } + + public + XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd) + { + super(ruby, rubyClass, dtd); + setNode(ruby, dtd); + } + + public static XmlDtd + newEmpty(Ruby runtime, + Document doc, + IRubyObject name, + IRubyObject external_id, + IRubyObject system_id) + { + + DocumentType placeholder; + if (doc.getDoctype() == null) { + String javaName = NokogiriHelpers.rubyStringToString(name); + String javaExternalId = NokogiriHelpers.rubyStringToString(external_id); + String javaSystemId = NokogiriHelpers.rubyStringToString(system_id); + placeholder = doc.getImplementation().createDocumentType(javaName, javaExternalId, javaSystemId); + doc.appendChild(placeholder); + } else { + placeholder = doc.getDoctype(); } - - @JRubyMethod - public IRubyObject system_id(ThreadContext context) { - return sysId; + // FIXME: what if the document had a doc type, why are we here ? + XmlDtd dtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::DTD")); + dtd.setNode(runtime, placeholder); + dtd.name = name; + dtd.pubId = external_id; + dtd.sysId = system_id; + return dtd; + } + + + /** + * Create an unparented element that contains DTD declarations + * parsed from the internal subset attached as user data to + * doc. The attached dtd must be the tree from + * NekoDTD. The owner document of the returned tree will be + * doc. + * + * NekoDTD parser returns a new document node containing elements + * representing the dtd declarations. The plan is to get the root + * element and adopt it into the correct document, stipping the + * Document provided by NekoDTD. + * + */ + public static XmlDtd + newFromInternalSubset(Ruby runtime, Document doc) + { + Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT); + if (dtdTree_ == null) { + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, null); + return xmlDtd; } - @JRubyMethod - public IRubyObject external_id(ThreadContext context) { - return pubId; + Node dtdTree = (Node) dtdTree_; + Node dtd = getInternalSubset(dtdTree); + if (dtd == null) { + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, null); + return xmlDtd; + } else { + // Import the node into doc so it has the correct owner document. + dtd = doc.importNode(dtd, true); + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, dtd); + return xmlDtd; } - - @JRubyMethod - public IRubyObject validate(ThreadContext context, IRubyObject doc) { - RubyArray errors = RubyArray.newArray(context.getRuntime()); - if (doc instanceof XmlDocument) { - errors = (RubyArray) ((XmlDocument)doc).getInstanceVariable("@errors"); - } - return errors; + } + + public static IRubyObject + newFromExternalSubset(Ruby runtime, Document doc) + { + Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT); + if (dtdTree_ == null) { + return runtime.getNil(); } - public static boolean nameEquals(Node node, QName name) { - return name.localpart.equals(node.getNodeName()); + Node dtdTree = (Node) dtdTree_; + Node dtd = getExternalSubset(dtdTree); + if (dtd == null) { + return runtime.getNil(); + } else if (!dtd.hasChildNodes()) { + return runtime.getNil(); + } else { + // Import the node into doc so it has the correct owner document. + dtd = doc.importNode(dtd, true); + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, dtd); + return xmlDtd; } - - public static boolean isExternalSubset(Node node) { - return nameEquals(node, DTDConfiguration.E_EXTERNAL_SUBSET); + } + + /* + * dtd is the document node of a NekoDTD tree. + * NekoDTD tree looks like this: + * + *
+   * [#document: null]
+   *   [#comment: ...]
+   *   [#comment: ...]
+   *   [dtd: null]   // a DocumentType; isDTD(node) => false
+   *   [dtd: null]   // root of dtd, an Element node; isDTD(node) => true
+   *     ... decls, content models, etc. ...
+   *     [externalSubset: null] pubid="the pubid" sysid="the sysid"
+   *       ... external subset decls, etc. ...
+   * 
+ */ + protected static Node + getInternalSubset(Node dtdTree) + { + Node root; + for (root = dtdTree.getFirstChild(); ; root = root.getNextSibling()) { + if (root == null) { + return null; + } else if (isDTD(root)) { + return root; // we have second dtd which is root + } } - - /** - * Checks instanceof Element so we return false for a DocumentType - * node (NekoDTD uses Element for all its nodes). - */ - public static boolean isDTD(Node node) { - return (node instanceof Element && - nameEquals(node, DTDConfiguration.E_DTD)); + } + + protected static Node + getExternalSubset(Node dtdTree) + { + Node dtd = getInternalSubset(dtdTree); + if (dtd == null) { return null; } + for (Node ext = dtd.getFirstChild(); ; ext = ext.getNextSibling()) { + if (ext == null) { + return null; + } else if (isExternalSubset(ext)) { + return ext; + } } - - public static boolean isAttributeDecl(Node node) { - return nameEquals(node, DTDConfiguration.E_ATTRIBUTE_DECL); + } + + /** + * This overrides the #attributes method defined in + * lib/nokogiri/xml/node.rb. + */ + @JRubyMethod + public IRubyObject + attributes(ThreadContext context) + { + if (attributes == null) { extractDecls(context); } + + return attributes; + } + + @JRubyMethod + public IRubyObject + elements(ThreadContext context) + { + if (elements == null) { extractDecls(context); } + + return elements; + } + + @JRubyMethod + public IRubyObject + entities(ThreadContext context) + { + if (entities == null) { extractDecls(context); } + + return entities; + } + + @JRubyMethod + public IRubyObject + notations(ThreadContext context) + { + if (notations == null) { extractDecls(context); } + + return notations; + } + + /** + * Our "node" object is as-returned by NekoDTD. The actual + * "children" that we're interested in (Attribute declarations, + * etc.) are a few layers deep. + */ + @Override + @JRubyMethod + public IRubyObject + children(ThreadContext context) + { + if (children == null) { extractDecls(context); } + + return children; + } + + /** + * Returns the name of the dtd. + */ + @Override + @JRubyMethod + public IRubyObject + node_name(ThreadContext context) + { + return name; + } + + @Override + @JRubyMethod(name = "node_name=") + public IRubyObject + node_name_set(ThreadContext context, IRubyObject name) + { + throw context.getRuntime() + .newRuntimeError("cannot change name of DTD"); + } + + @JRubyMethod + public IRubyObject + system_id(ThreadContext context) + { + return sysId; + } + + @JRubyMethod + public IRubyObject + external_id(ThreadContext context) + { + return pubId; + } + + @JRubyMethod + public IRubyObject + validate(ThreadContext context, IRubyObject doc) + { + RubyArray errors = RubyArray.newArray(context.getRuntime()); + if (doc instanceof XmlDocument) { + errors = (RubyArray)((XmlDocument)doc).getInstanceVariable("@errors"); } - - public static boolean isElementDecl(Node node) { - return nameEquals(node, DTDConfiguration.E_ELEMENT_DECL); + return errors; + } + + public static boolean + nameEquals(Node node, QName name) + { + return name.localpart.equals(node.getNodeName()); + } + + public static boolean + isExternalSubset(Node node) + { + return nameEquals(node, DTDConfiguration.E_EXTERNAL_SUBSET); + } + + /** + * Checks instanceof Element so we return false for a DocumentType + * node (NekoDTD uses Element for all its nodes). + */ + public static boolean + isDTD(Node node) + { + return (node instanceof Element && + nameEquals(node, DTDConfiguration.E_DTD)); + } + + public static boolean + isAttributeDecl(Node node) + { + return nameEquals(node, DTDConfiguration.E_ATTRIBUTE_DECL); + } + + public static boolean + isElementDecl(Node node) + { + return nameEquals(node, DTDConfiguration.E_ELEMENT_DECL); + } + + public static boolean + isEntityDecl(Node node) + { + return (nameEquals(node, DTDConfiguration.E_INTERNAL_ENTITY_DECL) || + nameEquals(node, DTDConfiguration.E_UNPARSED_ENTITY_DECL)); + } + + public static boolean + isNotationDecl(Node node) + { + return nameEquals(node, DTDConfiguration.E_NOTATION_DECL); + } + + public static boolean + isContentModel(Node node) + { + return nameEquals(node, DTDConfiguration.E_CONTENT_MODEL); + } + + /** + * Recursively extract various DTD declarations and store them in + * the various collections. + */ + protected void + extractDecls(ThreadContext context) + { + Ruby runtime = context.runtime; + + // initialize data structures + attributes = RubyHash.newHash(runtime); + elements = RubyHash.newHash(runtime); + entities = RubyHash.newHash(runtime); + notations = RubyHash.newHash(runtime); + contentModels = RubyHash.newHash(runtime); + children = runtime.getNil(); + + // recursively extract decls + if (node == null) { return; } // leave all the decl hash's empty + + // convert allDecls to a NodeSet + children = XmlNodeSet.newNodeSet(runtime, extractDecls(context, node.getFirstChild())); + + // add attribute decls as attributes to the matching element decl + RubyArray keys = attributes.keys(); + for (int i = 0; i < keys.getLength(); ++i) { + IRubyObject akey = keys.entry(i); + IRubyObject val; + + val = attributes.op_aref(context, akey); + if (val.isNil()) { continue; } + XmlAttributeDecl attrDecl = (XmlAttributeDecl) val; + IRubyObject ekey = attrDecl.element_name(context); + val = elements.op_aref(context, ekey); + if (val.isNil()) { continue; } + XmlElementDecl elemDecl = (XmlElementDecl) val; + + elemDecl.appendAttrDecl(attrDecl); } - public static boolean isEntityDecl(Node node) { - return (nameEquals(node, DTDConfiguration.E_INTERNAL_ENTITY_DECL) || - nameEquals(node, DTDConfiguration.E_UNPARSED_ENTITY_DECL)); - } + // add content models to the matching element decl + keys = contentModels.keys(); + for (int i = 0; i < keys.getLength(); ++i) { + IRubyObject key = keys.entry(i); + IRubyObject cm = contentModels.op_aref(context, key); - public static boolean isNotationDecl(Node node) { - return nameEquals(node, DTDConfiguration.E_NOTATION_DECL); + IRubyObject elem = elements.op_aref(context, key); + if (elem.isNil()) { continue; } + if (((XmlElementDecl)elem).isEmpty()) { continue; } + ((XmlElementDecl) elem).setContentModel(cm); } - - public static boolean isContentModel(Node node) { - return nameEquals(node, DTDConfiguration.E_CONTENT_MODEL); + } + + /** + * The node is either the first child of the root dtd + * node (as returned by getInternalSubset()) or the first child of + * the external subset node (as returned by getExternalSubset()). + * + * This recursive function will not descend into an + * 'externalSubset' node, thus for an internal subset it only + * extracts nodes in the internal subset, and for an external + * subset it extracts everything and assumess node + * and all children are part of the external subset. + */ + protected IRubyObject[] + extractDecls(ThreadContext context, Node node) + { + List decls = new ArrayList(); + while (node != null) { + if (isExternalSubset(node)) { + break; + } else if (isAttributeDecl(node)) { + XmlAttributeDecl decl = XmlAttributeDecl.create(context, node); + attributes.op_aset(context, decl.attribute_name(context), decl); + decls.add(decl); + } else if (isElementDecl(node)) { + XmlElementDecl decl = XmlElementDecl.create(context, node); + elements.op_aset(context, decl.element_name(context), decl); + decls.add(decl); + } else if (isEntityDecl(node)) { + XmlEntityDecl decl = XmlEntityDecl.create(context, node); + entities.op_aset(context, decl.node_name(context), decl); + decls.add(decl); + } else if (isNotationDecl(node)) { + XmlNode tmp = (XmlNode) + NokogiriHelpers.constructNode(context.getRuntime(), node); + IRubyObject decl = invoke(context, notationClass, "new", + tmp.getAttribute(context, "name"), + tmp.getAttribute(context, "pubid"), + tmp.getAttribute(context, "sysid")); + notations.op_aset(context, + tmp.getAttribute(context, "name"), decl); + decls.add(decl); + } else if (isContentModel(node)) { + XmlElementContent cm = + new XmlElementContent(context.getRuntime(), + (XmlDocument) document(context), + node); + contentModels.op_aset(context, cm.element_name(context), cm); + } else { + // recurse + decls.addAll(Arrays.asList(extractDecls(context, node.getFirstChild()))); + } + + node = node.getNextSibling(); } - /** - * Recursively extract various DTD declarations and store them in - * the various collections. - */ - protected void extractDecls(ThreadContext context) { - Ruby runtime = context.getRuntime(); - - // initialize data structures - attributes = RubyHash.newHash(runtime); - elements = RubyHash.newHash(runtime); - entities = RubyHash.newHash(runtime); - notations = RubyHash.newHash(runtime); - contentModels = RubyHash.newHash(runtime); - children = runtime.getNil(); - - // recursively extract decls - if (node == null) return; // leave all the decl hash's empty - - // convert allDecls to a NodeSet - children = XmlNodeSet.newXmlNodeSet(context, extractDecls(context, node.getFirstChild())); - - // add attribute decls as attributes to the matching element decl - RubyArray keys = attributes.keys(); - for (int i = 0; i < keys.getLength(); ++i) { - IRubyObject akey = keys.entry(i); - IRubyObject val; - - val = attributes.op_aref(context, akey); - if (val.isNil()) continue; - XmlAttributeDecl attrDecl = (XmlAttributeDecl) val; - IRubyObject ekey = attrDecl.element_name(context); - val = elements.op_aref(context, ekey); - if (val.isNil()) continue; - XmlElementDecl elemDecl = (XmlElementDecl) val; - - elemDecl.appendAttrDecl(attrDecl); - } - - // add content models to the matching element decl - keys = contentModels.keys(); - for (int i = 0; i < keys.getLength(); ++i) { - IRubyObject key = keys.entry(i); - IRubyObject cm = contentModels.op_aref(context, key); - - IRubyObject elem = elements.op_aref(context, key); - if (elem.isNil()) continue; - if (((XmlElementDecl)elem).isEmpty()) continue; - ((XmlElementDecl) elem).setContentModel(cm); - } - } - - /** - * The node is either the first child of the root dtd - * node (as returned by getInternalSubset()) or the first child of - * the external subset node (as returned by getExternalSubset()). - * - * This recursive function will not descend into an - * 'externalSubset' node, thus for an internal subset it only - * extracts nodes in the internal subset, and for an external - * subset it extracts everything and assumess node - * and all children are part of the external subset. - */ - protected IRubyObject[] extractDecls(ThreadContext context, Node node) { - List decls = new ArrayList(); - while (node != null) { - if (isExternalSubset(node)) { - break; - } else if (isAttributeDecl(node)) { - XmlAttributeDecl decl = (XmlAttributeDecl) - XmlAttributeDecl.create(context, node); - attributes.op_aset(context, decl.attribute_name(context), decl); - decls.add(decl); - } else if (isElementDecl(node)) { - XmlElementDecl decl = (XmlElementDecl) - XmlElementDecl.create(context, node); - elements.op_aset(context, decl.element_name(context), decl); - decls.add(decl); - } else if (isEntityDecl(node)) { - XmlEntityDecl decl = (XmlEntityDecl) - XmlEntityDecl.create(context, node); - entities.op_aset(context, decl.node_name(context), decl); - decls.add(decl); - } else if (isNotationDecl(node)) { - XmlNode tmp = (XmlNode) - NokogiriHelpers.constructNode(context.getRuntime(), node); - IRubyObject decl = invoke(context, notationClass, "new", - tmp.getAttribute(context, "name"), - tmp.getAttribute(context, "pubid"), - tmp.getAttribute(context, "sysid")); - notations.op_aset(context, - tmp.getAttribute(context, "name"), decl); - decls.add(decl); - } else if (isContentModel(node)) { - XmlElementContent cm = - new XmlElementContent(context.getRuntime(), - (XmlDocument) document(context), - node); - contentModels.op_aset(context, cm.element_name(context), cm); - } else { - // recurse - decls.addAll(Arrays.asList(extractDecls(context, node.getFirstChild()))); - } - - node = node.getNextSibling(); - } - - return decls.toArray(new IRubyObject[decls.size()]); - } - - @Override - public void accept(ThreadContext context, SaveContextVisitor visitor) { - // since we use nekoDTD to parse dtd, node might be ElementImpl type - // An external subset doesn't need to show up, so this method just see docType. - DocumentType docType = node.getOwnerDocument().getDoctype(); - visitor.enter(docType); - visitor.leave(docType); - } + return decls.toArray(new IRubyObject[decls.size()]); + } + + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + // since we use nekoDTD to parse dtd, node might be ElementImpl type + // An external subset doesn't need to show up, so this method just see docType. + DocumentType docType = node.getOwnerDocument().getDoctype(); + visitor.enter(docType); + visitor.leave(docType); + } } diff --git a/ext/java/nokogiri/XmlElement.java b/ext/java/nokogiri/XmlElement.java index d300af60cb..b8e3225edc 100644 --- a/ext/java/nokogiri/XmlElement.java +++ b/ext/java/nokogiri/XmlElement.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import org.jruby.Ruby; @@ -44,44 +12,33 @@ /** * Class for Nokogiri::XML::Element - * + * * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::Element", parent="Nokogiri::XML::Node") -public class XmlElement extends XmlNode { +@JRubyClass(name = "Nokogiri::XML::Element", parent = "Nokogiri::XML::Node") +public class XmlElement extends XmlNode +{ + private static final long serialVersionUID = 1L; + + public + XmlElement(Ruby runtime, RubyClass klazz) + { + super(runtime, klazz); + } - public XmlElement(Ruby runtime, RubyClass klazz) { - super(runtime, klazz); - } + public + XmlElement(Ruby runtime, RubyClass klazz, Node element) + { + super(runtime, klazz, element); + } - public XmlElement(Ruby runtime, RubyClass klazz, Node element) { - super(runtime, klazz, element); - } - - @Override - public void setNode(ThreadContext context, Node node) { - super.setNode(context, node); - if (doc != null) - setInstanceVariable("@document", doc); - } - - @Override - public void accept(ThreadContext context, SaveContextVisitor visitor) { - visitor.enter((Element) node); - XmlNodeSet xmlNodeSet = (XmlNodeSet) children(context); - if (xmlNodeSet.length() > 0) { - IRubyObject[] nodes = XmlNodeSet.getNodes(context, xmlNodeSet); - for( int i = 0; i < nodes.length; i++ ) { - Object item = nodes[i]; - if (item instanceof XmlNode) { - ((XmlNode) item).accept(context, visitor); - } - else if (item instanceof XmlNamespace) { - ((XmlNamespace) item).accept(context, visitor); - } - } - } - visitor.leave((Element) node); - } + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + visitor.enter((Element) node); + acceptChildren(context, getChildren(), visitor); + visitor.leave((Element) node); + } } diff --git a/ext/java/nokogiri/XmlElementContent.java b/ext/java/nokogiri/XmlElementContent.java index 3ebd42c7f5..501a4557be 100644 --- a/ext/java/nokogiri/XmlElementContent.java +++ b/ext/java/nokogiri/XmlElementContent.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getLocalPart; @@ -55,328 +23,390 @@ * * @author Patrick Mahoney */ -@JRubyClass(name="Nokogiri::XML::ElementContent") -public class XmlElementContent extends RubyObject { - protected String element_name = null; - - protected String name; - protected Type type; - protected Occur occur; - protected IRubyObject left; - protected IRubyObject right; - - /** values hardcoded from nokogiri/xml/element_content.rb; this - * makes me uneasy, but it works */ - public enum Type { - PCDATA (1), - ELEMENT (2), - SEQ (3), - OR (4); - - private final int value; - Type(int value) { - this.value = value; - } - public IRubyObject value(Ruby runtime) { - return runtime.newFixnum(value); - } +@JRubyClass(name = "Nokogiri::XML::ElementContent") +public class XmlElementContent extends RubyObject +{ + private static final long serialVersionUID = 1L; + + protected String element_name = null; + + protected String name; + protected Type type; + protected Occur occur; + protected IRubyObject left; + protected IRubyObject right; + + /** values hardcoded from nokogiri/xml/element_content.rb; this + * makes me uneasy, but it works */ + public enum Type { + PCDATA(1), + ELEMENT(2), + SEQ(3), + OR(4); + + private final int value; + Type(int value) + { + this.value = value; } - - public enum Occur { - ONCE (1), - OPT (2), - MULT (3), - PLUS (4); - - private final int value; - Occur(int value) { - this.value = value; - } - public IRubyObject value(Ruby runtime) { - return runtime.newFixnum(value); - } + public IRubyObject value(Ruby runtime) + { + return runtime.newFixnum(value); } - - public XmlElementContent(Ruby runtime, RubyClass klass, - XmlDocument document, Node node) { - this(runtime, klass, document, new NodeIter(node)); - element_name = ((Element)node).getAttribute("ename"); - - /* - * This is a bit of a hack to match libxml behavior. - * - * If the tree contains but a single group with a single - * element, we can simply return the bare element without the - * surrounding group. - * - * TODO: is SEQ/ONCE with a single child the only case for - * reduction? - * - * - pmahoney - */ - if (!this.left.isNil()) { - XmlElementContent left = (XmlElementContent) this.left; - if (type == Type.SEQ && - occur == Occur.ONCE && - left.type == Type.ELEMENT && - right.isNil()) { - this.name = left.name; - this.type = left.type; - this.occur = left.occur; - this.left = this.right; // both nil - } - } + } + + public enum Occur { + ONCE(1), + OPT(2), + MULT(3), + PLUS(4); + + private final int value; + Occur(int value) + { + this.value = value; } - - public XmlElementContent(Ruby runtime, XmlDocument document, Node node) { - this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ElementContent"), document, node); + public IRubyObject value(Ruby runtime) + { + return runtime.newFixnum(value); } - - public XmlElementContent(Ruby runtime, RubyClass klass, - XmlDocument doc, NodeIter iter) { - super(runtime, klass); - - setInstanceVariable("@document", doc); - - name = null; - type = Type.SEQ; - occur = Occur.ONCE; - left = runtime.getNil(); - right = runtime.getNil(); - - apply(runtime, klass, doc, iter); + } + + public + XmlElementContent(Ruby runtime, RubyClass klass, + XmlDocument document, Node node) + { + this(runtime, klass, document, new NodeIter(node)); + element_name = ((Element)node).getAttribute("ename"); + + /* + * This is a bit of a hack to match libxml behavior. + * + * If the tree contains but a single group with a single + * element, we can simply return the bare element without the + * surrounding group. + * + * TODO: is SEQ/ONCE with a single child the only case for + * reduction? + * + * - pmahoney + */ + if (!this.left.isNil()) { + XmlElementContent left = (XmlElementContent) this.left; + if (type == Type.SEQ && + occur == Occur.ONCE && + left.type == Type.ELEMENT && + right.isNil()) { + this.name = left.name; + this.type = left.type; + this.occur = left.occur; + this.left = this.right; // both nil + } } - - protected XmlElementContent(Ruby runtime, RubyClass klass, - Type type, XmlDocument doc, NodeIter iter, - XmlElementContent left) { - super(runtime, klass); - - setInstanceVariable("@document", doc); - - name = null; - this.type = type; - occur = Occur.ONCE; - this.left = left; - right = runtime.getNil(); - - switch (type) { - case SEQ: - case OR: - applyGroup(runtime, klass, doc, iter); - default: - // noop - } + } + + public + XmlElementContent(Ruby runtime, XmlDocument document, Node node) + { + this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ElementContent"), document, node); + } + + public + XmlElementContent(Ruby runtime, RubyClass klass, + XmlDocument doc, NodeIter iter) + { + super(runtime, klass); + + setInstanceVariable("@document", doc); + + name = null; + type = Type.SEQ; + occur = Occur.ONCE; + left = runtime.getNil(); + right = runtime.getNil(); + + apply(runtime, klass, doc, iter); + } + + protected + XmlElementContent(Ruby runtime, RubyClass klass, + Type type, XmlDocument doc, NodeIter iter, + XmlElementContent left) + { + super(runtime, klass); + + setInstanceVariable("@document", doc); + + name = null; + this.type = type; + occur = Occur.ONCE; + this.left = left; + right = runtime.getNil(); + + switch (type) { + case SEQ: + case OR: + applyGroup(runtime, klass, doc, iter); + break; + default: + break; } - - /** - * Applies the current node in iter to this content - * model. When finished, iter will point to the last - * processed node. - */ - protected void apply(Ruby runtime, RubyClass klass, - XmlDocument doc, - NodeIter iter) { - if (iter.isNull()) return; - - Element elem = (Element) iter.current(); - - if (isGroup(elem) && iter.hasChildren()) { - iter.firstChild(); - applyGroup(runtime, klass, doc, iter); - iter.parent(); - } else if (isElement(elem)) { - name = elem.getAttribute("name"); - type = Type.ELEMENT; - } - - iter.nextSibling(); - if (iter.isNull()) return; - if (isOccurrence(iter.current())) { - setOccur(((Element)iter.current()).getAttribute("type")); - iter.nextSibling(); - } + } + + /** + * Applies the current node in iter to this content + * model. When finished, iter will point to the last + * processed node. + */ + protected void + apply(Ruby runtime, RubyClass klass, + XmlDocument doc, + NodeIter iter) + { + if (iter.isNull()) { return; } + + Element elem = (Element) iter.current(); + + if (isGroup(elem) && iter.hasChildren()) { + iter.firstChild(); + applyGroup(runtime, klass, doc, iter); + iter.parent(); + } else if (isElement(elem)) { + name = elem.getAttribute("name"); + type = Type.ELEMENT; } - protected void applyGroup(Ruby runtime, RubyClass klass, - XmlDocument doc, NodeIter iter) { - // LEFT branch + iter.nextSibling(); + if (iter.isNull()) { return; } + if (isOccurrence(iter.current())) { + setOccur(((Element)iter.current()).getAttribute("type")); + iter.nextSibling(); + } + } - if (iter.isNull()) return; + protected void + applyGroup(Ruby runtime, RubyClass klass, + XmlDocument doc, NodeIter iter) + { + // LEFT branch - if (left.isNil()) { - left = new XmlElementContent(runtime, klass, doc, iter); + if (iter.isNull()) { return; } - if (iter.isNull()) return; + if (left.isNil()) { + left = new XmlElementContent(runtime, klass, doc, iter); - if (isSeparator(iter.current())) { - setType(((Element)iter.current()).getAttribute("type")); - iter.nextSibling(); // skip separator - } - } + if (iter.isNull()) { return; } - // RIGHT branch + if (isSeparator(iter.current())) { + setType(((Element)iter.current()).getAttribute("type")); + iter.nextSibling(); // skip separator + } + } - if (iter.isNull()) return; + // RIGHT branch - right = new XmlElementContent(runtime, klass, doc, iter); + if (iter.isNull()) { return; } - if (iter.isNull()) return; - if (isSeparator(iter.current())) - iter.nextSibling(); // skip separator - if (iter.isNull()) return; + right = new XmlElementContent(runtime, klass, doc, iter); - // binary tree can only hold two children. If we have more, - // the right child is another tree with the same sequence - // "type". The "left" of the new tree is what we've - // currently consumed as our "right" branch of this tree. - right = new XmlElementContent(runtime, klass, type, doc, iter, - (XmlElementContent) right); + if (iter.isNull()) { return; } + if (isSeparator(iter.current())) { + iter.nextSibling(); // skip separator } + if (iter.isNull()) { return; } + + // binary tree can only hold two children. If we have more, + // the right child is another tree with the same sequence + // "type". The "left" of the new tree is what we've + // currently consumed as our "right" branch of this tree. + right = new XmlElementContent(runtime, klass, type, doc, iter, + (XmlElementContent) right); + } + + /** + * Set the type based on the separator node type string. + */ + protected void + setType(String type) + { + if ("|".equals(type)) { this.type = Type.OR; } + else if (",".equals(type)) { this.type = Type.SEQ; } + } + + protected void + setOccur(String type) + { + if ("*".equals(type)) { this.occur = Occur.MULT; } + else if ("+".equals(type)) { this.occur = Occur.PLUS; } + } + + public static boolean + isGroup(Node node) + { + return XmlDtd.nameEquals(node, DTDConfiguration.E_GROUP); + } + + // content model element, not Element node type + public static boolean + isElement(Node node) + { + return XmlDtd.nameEquals(node, DTDConfiguration.E_ELEMENT); + } + + public static boolean + isSeparator(Node node) + { + return XmlDtd.nameEquals(node, DTDConfiguration.E_SEPARATOR); + } + + public static boolean + isOccurrence(Node node) + { + return XmlDtd.nameEquals(node, DTDConfiguration.E_OCCURRENCE); + } + + /** + * Return the name of the element to which this content model + * applies. Only works for the root of the tree. + */ + public IRubyObject + element_name(ThreadContext context) + { + return nonEmptyStringOrNil(context.getRuntime(), element_name); + } + + @JRubyMethod + public IRubyObject + prefix(ThreadContext context) + { + return nonEmptyStringOrNil(context.getRuntime(), getPrefix(name)); + } + + @JRubyMethod + public IRubyObject + name(ThreadContext context) + { + return nonEmptyStringOrNil(context.getRuntime(), getLocalPart(name)); + } + + @JRubyMethod + public IRubyObject + type(ThreadContext context) + { + return type.value(context.getRuntime()); + } + + @JRubyMethod + public IRubyObject + occur(ThreadContext context) + { + return occur.value(context.getRuntime()); + } + + @JRubyMethod + public IRubyObject + c1(ThreadContext context) + { + return left; + } + + @JRubyMethod + public IRubyObject + c2(ThreadContext context) + { + return right; + } + + /** + * Iterator for a tree of Nodes. Has a current position that + * points to a given node. Calling nextSibling() on the last + * sibling results in a current position of null. This position + * is not fatal and can be escaped by calling parent() (which + * moves to the parent of previous sibling). The null position is + * used to indicate the end of a list. + */ + protected static class NodeIter + { + protected Node pre; + protected Node cur; /** - * Set the type based on the separator node type string. + * The first time, we fake a previous sibling element. Thus, + * initially, current() is null, and the first call should be + * nextSibling(). */ - protected void setType(String type) { - if ("|".equals(type)) this.type = Type.OR; - else if (",".equals(type)) this.type = Type.SEQ; + public + NodeIter(Node node) + { + pre = null; + cur = node.getFirstChild(); // skip root contentModel node } - protected void setOccur(String type) { - if ("*".equals(type)) this.occur = Occur.MULT; - else if ("+".equals(type)) this.occur = Occur.PLUS; + public Node + current() + { + return cur; } - public static boolean isGroup(Node node) { - return XmlDtd.nameEquals(node, DTDConfiguration.E_GROUP); + public boolean + isNull() + { + return (cur == null); } - // content model element, not Element node type - public static boolean isElement(Node node) { - return XmlDtd.nameEquals(node, DTDConfiguration.E_ELEMENT); - } - - public static boolean isSeparator(Node node) { - return XmlDtd.nameEquals(node, DTDConfiguration.E_SEPARATOR); - } - - public static boolean isOccurrence(Node node) { - return XmlDtd.nameEquals(node, DTDConfiguration.E_OCCURRENCE); + public boolean + hasChildren() + { + return (cur != null && cur.hasChildNodes()); } /** - * Return the name of the element to which this content model - * applies. Only works for the root of the tree. + * Descend to the first child. */ - public IRubyObject element_name(ThreadContext context) { - return nonEmptyStringOrNil(context.getRuntime(), element_name); - } - - @JRubyMethod - public IRubyObject prefix(ThreadContext context) { - return nonEmptyStringOrNil(context.getRuntime(), getPrefix(name)); - } - - @JRubyMethod - public IRubyObject name(ThreadContext context) { - return nonEmptyStringOrNil(context.getRuntime(), getLocalPart(name)); - } - - @JRubyMethod - public IRubyObject type(ThreadContext context) { - return type.value(context.getRuntime()); - } - - @JRubyMethod - public IRubyObject occur(ThreadContext context) { - return occur.value(context.getRuntime()); - } - - @JRubyMethod - public IRubyObject c1(ThreadContext context) { - return left; - } - - @JRubyMethod - public IRubyObject c2(ThreadContext context) { - return right; + public Node + firstChild() + { + if (cur == null) { throw new RuntimeException("no children"); } + Node ch = cur.getFirstChild(); + if (ch == null) { throw new RuntimeException("no children"); } + + cur = ch; + return cur; } /** - * Iterator for a tree of Nodes. Has a current position that - * points to a given node. Calling nextSibling() on the last - * sibling results in a current position of null. This position - * is not fatal and can be escaped by calling parent() (which - * moves to the parent of previous sibling). The null position is - * used to indicate the end of a list. + * Move to the next sibling */ - protected static class NodeIter { - protected Node pre; - protected Node cur; - - /** - * The first time, we fake a previous sibling element. Thus, - * initially, current() is null, and the first call should be - * nextSibling(). - */ - public NodeIter(Node node) { - pre = null; - cur = node.getFirstChild(); // skip root contentModel node - } - - public Node current() { - return cur; - } - - public boolean isNull() { - return (cur == null); - } - - public boolean hasChildren() { - return (cur != null && cur.hasChildNodes()); - } - - /** - * Descend to the first child. - */ - public Node firstChild() { - if (cur == null) throw new RuntimeException("no children"); - Node ch = cur.getFirstChild(); - if (ch == null) throw new RuntimeException("no children"); - - cur = ch; - return cur; - } - - /** - * Move to the next sibling - */ - public Node nextSibling() { - if (cur == null) { - throw new RuntimeException("no next sibling"); - } else { - Node ns = cur.getNextSibling(); - if (ns == null) { - pre = cur; - cur = null; - } else { - cur = ns; - } - return cur; - } + public Node + nextSibling() + { + if (cur == null) { + throw new RuntimeException("no next sibling"); + } else { + Node ns = cur.getNextSibling(); + if (ns == null) { + pre = cur; + cur = null; + } else { + cur = ns; } + return cur; + } + } - /** - * Move to the parent. - */ - public Node parent() { - if (cur == null) cur = pre; + /** + * Move to the parent. + */ + public Node + parent() + { + if (cur == null) { cur = pre; } - Node p = cur.getParentNode(); - if (p == null) throw new RuntimeException("no parent"); + Node p = cur.getParentNode(); + if (p == null) { throw new RuntimeException("no parent"); } - cur = p; - return cur; - } + cur = p; + return cur; } + } } diff --git a/ext/java/nokogiri/XmlElementDecl.java b/ext/java/nokogiri/XmlElementDecl.java index 591d86b663..bd4bc8cf25 100644 --- a/ext/java/nokogiri/XmlElementDecl.java +++ b/ext/java/nokogiri/XmlElementDecl.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getLocalPart; @@ -51,102 +19,130 @@ * @author Patrick Mahoney * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::ElementDecl", parent="Nokogiri::XML::Node") -public class XmlElementDecl extends XmlNode { - RubyArray attrDecls; - IRubyObject contentModel; - - public XmlElementDecl(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); - } - - public void setNode(ThreadContext context, Node node) { - super.setNode(context, node); - attrDecls = RubyArray.newArray(context.getRuntime()); - contentModel = context.getRuntime().getNil(); - } - - /** - * Initialize based on an elementDecl node from a NekoDTD parsed - * DTD. - */ - public XmlElementDecl(Ruby ruby, RubyClass klass, Node elemDeclNode) { - super(ruby, klass, elemDeclNode); - attrDecls = RubyArray.newArray(ruby); - contentModel = ruby.getNil(); - } - - public static IRubyObject create(ThreadContext context, Node elemDeclNode) { - XmlElementDecl self = - new XmlElementDecl(context.getRuntime(), - getNokogiriClass(context.getRuntime(), "Nokogiri::XML::ElementDecl"), - elemDeclNode); - return self; - } - - public IRubyObject element_name(ThreadContext context) { - return getAttribute(context, "ename"); - } - - public void setContentModel(IRubyObject cm) { - contentModel = cm; - } - - @Override - @JRubyMethod - public IRubyObject content(ThreadContext context) { - return contentModel; - } - - public boolean isEmpty() { - return "EMPTY".equals(getAttribute("model")); - } - - @JRubyMethod - public IRubyObject prefix(ThreadContext context) { - String enamePrefix = getPrefix(getAttribute("ename")); - if (enamePrefix == null) - return context.getRuntime().getNil(); - else - return context.getRuntime().newString(enamePrefix); - } - - /** - * Returns the local part of the element name. - */ - @Override - @JRubyMethod - public IRubyObject node_name(ThreadContext context) { - String ename = getLocalPart(getAttribute("ename")); - return context.getRuntime().newString(ename); - } - - @Override - @JRubyMethod(name = "node_name=") - public IRubyObject node_name_set(ThreadContext context, IRubyObject name) { - throw context.getRuntime() - .newRuntimeError("cannot change name of DTD decl"); - } - - @Override - @JRubyMethod - public IRubyObject attribute_nodes(ThreadContext context) { - return attrDecls; - } - - @Override - @JRubyMethod - public IRubyObject attribute(ThreadContext context, IRubyObject name) { - throw context.getRuntime() - .newRuntimeError("attribute by name not implemented"); - } - - public void appendAttrDecl(XmlAttributeDecl decl) { - attrDecls.append(decl); - } - - @JRubyMethod - public IRubyObject element_type(ThreadContext context) { - return context.getRuntime().newFixnum(node.getNodeType()); - } +@JRubyClass(name = "Nokogiri::XML::ElementDecl", parent = "Nokogiri::XML::Node") +public class XmlElementDecl extends XmlNode +{ + private static final long serialVersionUID = 1L; + + RubyArray attrDecls; + IRubyObject contentModel; + + public + XmlElementDecl(Ruby runtime, RubyClass klazz) + { + super(runtime, klazz); + attrDecls = RubyArray.newArray(runtime); + contentModel = runtime.getNil(); + } + + /** + * Initialize based on an elementDecl node from a NekoDTD parsed DTD. + */ + public + XmlElementDecl(Ruby ruby, RubyClass klass, Node elemDeclNode) + { + super(ruby, klass, elemDeclNode); + } + + @Override // gets called from constructor ^^^ + public void + setNode(Ruby runtime, Node node) + { + super.setNode(runtime, node); + attrDecls = RubyArray.newArray(runtime); + contentModel = runtime.getNil(); + } + + static XmlElementDecl + create(ThreadContext context, Node elemDeclNode) + { + return new XmlElementDecl(context.runtime, + getNokogiriClass(context.runtime, "Nokogiri::XML::ElementDecl"), + elemDeclNode + ); + } + + public IRubyObject + element_name(ThreadContext context) + { + return getAttribute(context, "ename"); + } + + public void + setContentModel(IRubyObject cm) + { + contentModel = cm; + } + + @Override + @JRubyMethod + public IRubyObject + content(ThreadContext context) + { + return contentModel; + } + + public boolean + isEmpty() + { + return "EMPTY".equals(getAttribute("model")); + } + + @JRubyMethod + public IRubyObject + prefix(ThreadContext context) + { + String enamePrefix = getPrefix(getAttribute("ename")); + if (enamePrefix == null) { return context.nil; } + return context.runtime.newString(enamePrefix); + } + + /** + * Returns the local part of the element name. + */ + @Override + @JRubyMethod + public IRubyObject + node_name(ThreadContext context) + { + String ename = getLocalPart(getAttribute("ename")); + return context.runtime.newString(ename); + } + + @Override + @JRubyMethod(name = "node_name=") + public IRubyObject + node_name_set(ThreadContext context, IRubyObject name) + { + throw context.runtime.newRuntimeError("cannot change name of DTD decl"); + } + + @Override + @JRubyMethod + public IRubyObject + attribute_nodes(ThreadContext context) + { + return attrDecls; + } + + @Override + @JRubyMethod + public IRubyObject + attribute(ThreadContext context, IRubyObject name) + { + throw context.runtime.newRuntimeError("attribute by name not implemented"); + } + + public void + appendAttrDecl(XmlAttributeDecl decl) + { + attrDecls.append(decl); + } + + @JRubyMethod + public IRubyObject + element_type(ThreadContext context) + { + return context.runtime.newFixnum(node.getNodeType()); + } } diff --git a/ext/java/nokogiri/XmlEntityDecl.java b/ext/java/nokogiri/XmlEntityDecl.java index 422cf17694..d2dff7f022 100644 --- a/ext/java/nokogiri/XmlEntityDecl.java +++ b/ext/java/nokogiri/XmlEntityDecl.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; @@ -49,113 +17,135 @@ * @author Patrick Mahoney * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::EntityDecl", parent="Nokogiri::XML::Node") -public class XmlEntityDecl extends XmlNode { - public static final int INTERNAL_GENERAL = 1; - public static final int EXTERNAL_GENERAL_PARSED = 2; - public static final int EXTERNAL_GENERAL_UNPARSED = 3; - public static final int INTERNAL_PARAMETER = 4; - public static final int EXTERNAL_PARAMETER = 5; - public static final int INTERNAL_PREDEFINED = 6; - - private IRubyObject entityType; - private IRubyObject name; - private IRubyObject external_id; - private IRubyObject system_id; - private IRubyObject content; - - public XmlEntityDecl(Ruby ruby, RubyClass klass) { - super(ruby, klass); - throw ruby.newRuntimeError("node required"); - } +@JRubyClass(name = "Nokogiri::XML::EntityDecl", parent = "Nokogiri::XML::Node") +public class XmlEntityDecl extends XmlNode +{ + private static final long serialVersionUID = 1L; - /** - * Initialize based on an entityDecl node from a NekoDTD parsed - * DTD. - */ - public XmlEntityDecl(Ruby ruby, RubyClass klass, Node entDeclNode) { - super(ruby, klass, entDeclNode); - entityType = RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_GENERAL); - name = external_id = system_id = content = ruby.getNil(); - } - - public XmlEntityDecl(Ruby ruby, RubyClass klass, Node entDeclNode, IRubyObject[] argv) { - super(ruby, klass, entDeclNode); - name = argv[0]; - entityType = RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_GENERAL); - external_id = system_id = content = ruby.getNil(); - if (argv.length > 1) entityType = argv[1]; - if (argv.length > 4) { - external_id = argv[2]; - system_id = argv[3]; - content = argv[4]; - } - } + public static final int INTERNAL_GENERAL = 1; + public static final int EXTERNAL_GENERAL_PARSED = 2; + public static final int EXTERNAL_GENERAL_UNPARSED = 3; + public static final int INTERNAL_PARAMETER = 4; + public static final int EXTERNAL_PARAMETER = 5; + public static final int INTERNAL_PREDEFINED = 6; - public static IRubyObject create(ThreadContext context, Node entDeclNode) { - XmlEntityDecl self = - new XmlEntityDecl(context.getRuntime(), - getNokogiriClass(context.getRuntime(), "Nokogiri::XML::EntityDecl"), - entDeclNode); - return self; - } - - // when entity is created by create_entity method - public static IRubyObject create(ThreadContext context, Node entDeclNode, IRubyObject[] argv) { - XmlEntityDecl self = - new XmlEntityDecl(context.getRuntime(), - getNokogiriClass(context.getRuntime(), "Nokogiri::XML::EntityDecl"), - entDeclNode, argv); - return self; - } + private IRubyObject entityType; + private IRubyObject name; + private IRubyObject external_id; + private IRubyObject system_id; + private IRubyObject content; - /** - * Returns the local part of the element name. - */ - @Override - @JRubyMethod - public IRubyObject node_name(ThreadContext context) { - IRubyObject value = getAttribute(context, "name"); - if (value.isNil()) value = name; - return value; - } + XmlEntityDecl(Ruby runtime, RubyClass klass) + { + super(runtime, klass); + } - @Override - @JRubyMethod(name = "node_name=") - public IRubyObject node_name_set(ThreadContext context, IRubyObject name) { - throw context.getRuntime() - .newRuntimeError("cannot change name of DTD decl"); - } + /** + * Initialize based on an entityDecl node from a NekoDTD parsed DTD. + */ + public + XmlEntityDecl(Ruby runtime, RubyClass klass, Node entDeclNode) + { + super(runtime, klass, entDeclNode); + entityType = RubyFixnum.newFixnum(runtime, XmlEntityDecl.INTERNAL_GENERAL); + name = external_id = system_id = content = runtime.getNil(); + } - @JRubyMethod - public IRubyObject content(ThreadContext context) { - IRubyObject value = getAttribute(context, "value"); - if (value.isNil()) value = content; - return value; - } + public + XmlEntityDecl(Ruby runtime, RubyClass klass, Node entDeclNode, IRubyObject[] argv) + { + super(runtime, klass, entDeclNode); + name = argv[0]; + entityType = RubyFixnum.newFixnum(runtime, XmlEntityDecl.INTERNAL_GENERAL); + external_id = system_id = content = runtime.getNil(); - // TODO: what is content vs. original_content? - @JRubyMethod - public IRubyObject original_content(ThreadContext context) { - return getAttribute(context, "value"); + if (argv.length > 1) { entityType = argv[1]; } + if (argv.length > 4) { + external_id = argv[2]; + system_id = argv[3]; + content = argv[4]; } + } - @JRubyMethod - public IRubyObject system_id(ThreadContext context) { - IRubyObject value = getAttribute(context, "sysid"); - if (value.isNil()) value = system_id; - return value; - } + static XmlEntityDecl + create(ThreadContext context, Node entDeclNode) + { + return new XmlEntityDecl(context.runtime, + getNokogiriClass(context.runtime, "Nokogiri::XML::EntityDecl"), + entDeclNode + ); + } - @JRubyMethod - public IRubyObject external_id(ThreadContext context) { - IRubyObject value = getAttribute(context, "pubid"); - if (value.isNil()) value = external_id; - return value; - } + // when entity is created by create_entity method + static XmlEntityDecl + create(ThreadContext context, Node entDeclNode, IRubyObject... argv) + { + return new XmlEntityDecl(context.runtime, + getNokogiriClass(context.runtime, "Nokogiri::XML::EntityDecl"), + entDeclNode, argv + ); + } - @JRubyMethod - public IRubyObject entity_type(ThreadContext context) { - return entityType; - } + /** + * Returns the local part of the element name. + */ + @Override + @JRubyMethod + public IRubyObject + node_name(ThreadContext context) + { + IRubyObject value = getAttribute(context, "name"); + if (value.isNil()) { value = name; } + return value; + } + + @Override + @JRubyMethod(name = "node_name=") + public IRubyObject + node_name_set(ThreadContext context, IRubyObject name) + { + throw context.runtime.newRuntimeError("cannot change name of DTD decl"); + } + + @JRubyMethod + public IRubyObject + content(ThreadContext context) + { + IRubyObject value = getAttribute(context, "value"); + if (value.isNil()) { value = content; } + return value; + } + + // TODO: what is content vs. original_content? + @JRubyMethod + public IRubyObject + original_content(ThreadContext context) + { + return getAttribute(context, "value"); + } + + @JRubyMethod + public IRubyObject + system_id(ThreadContext context) + { + IRubyObject value = getAttribute(context, "sysid"); + if (value.isNil()) { value = system_id; } + return value; + } + + @JRubyMethod + public IRubyObject + external_id(ThreadContext context) + { + IRubyObject value = getAttribute(context, "pubid"); + if (value.isNil()) { value = external_id; } + return value; + } + + @JRubyMethod + public IRubyObject + entity_type(ThreadContext context) + { + return entityType; + } } diff --git a/ext/java/nokogiri/XmlEntityReference.java b/ext/java/nokogiri/XmlEntityReference.java index 789352568a..512bd681c4 100644 --- a/ext/java/nokogiri/XmlEntityReference.java +++ b/ext/java/nokogiri/XmlEntityReference.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate; @@ -47,55 +15,65 @@ /** * Class for Nokogiri::XML::EntityReference - * + * * @author sergio * @author Patrick Mahoney * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::EntityReference", parent="Nokogiri::XML::Node") -public class XmlEntityReference extends XmlNode { +@JRubyClass(name = "Nokogiri::XML::EntityReference", parent = "Nokogiri::XML::Node") +public class XmlEntityReference extends XmlNode +{ + private static final long serialVersionUID = 1L; - public XmlEntityReference(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); - } + public + XmlEntityReference(Ruby ruby, RubyClass klazz) + { + super(ruby, klazz); + } - public XmlEntityReference(Ruby ruby, RubyClass klass, Node node) { - super(ruby, klass, node); + public + XmlEntityReference(Ruby ruby, RubyClass klass, Node node) + { + super(ruby, klass, node); + } + + protected void + init(ThreadContext context, IRubyObject[] args) + { + if (args.length < 2) { + throw context.runtime.newArgumentError(args.length, 2); } - protected void init(ThreadContext context, IRubyObject[] args) { - if (args.length < 2) { - throw getRuntime().newArgumentError(args.length, 2); - } + IRubyObject doc = args[0]; + IRubyObject name = args[1]; - IRubyObject doc = args[0]; - IRubyObject name = args[1]; + Document document = ((XmlNode) doc).getOwnerDocument(); + // FIXME: disable error checking as a workaround for #719. this depends on the internals of Xerces. + CoreDocumentImpl internalDocument = (CoreDocumentImpl) document; + boolean oldErrorChecking = internalDocument.getErrorChecking(); + internalDocument.setErrorChecking(false); + Node node = document.createEntityReference(rubyStringToString(name)); + internalDocument.setErrorChecking(oldErrorChecking); + setNode(context.runtime, node); + } - Document document = ((XmlNode) doc).getOwnerDocument(); - // FIXME: disable error checking as a workaround for #719. this depends on the internals of Xerces. - CoreDocumentImpl internalDocument = (CoreDocumentImpl) document; - boolean oldErrorChecking = internalDocument.getErrorChecking(); - internalDocument.setErrorChecking(false); - Node node = document.createEntityReference(rubyStringToString(name)); - internalDocument.setErrorChecking(oldErrorChecking); - setNode(context, node); - } - - @Override - public void accept(ThreadContext context, SaveContextVisitor visitor) { - visitor.enter(node); - Node child = node.getFirstChild(); - while (child != null) { - IRubyObject nokoNode = getCachedNodeOrCreate(context.getRuntime(), child); - if (nokoNode instanceof XmlNode) { - XmlNode cur = (XmlNode) nokoNode; - cur.accept(context, visitor); - } else if (nokoNode instanceof XmlNamespace) { - XmlNamespace cur = (XmlNamespace) nokoNode; - cur.accept(context, visitor); - } - child = child.getNextSibling(); - } - visitor.leave(node); + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + visitor.enter(node); + Node child = node.getFirstChild(); + while (child != null) { + IRubyObject nokoNode = getCachedNodeOrCreate(context.getRuntime(), child); + if (nokoNode instanceof XmlNode) { + XmlNode cur = (XmlNode) nokoNode; + cur.accept(context, visitor); + } else if (nokoNode instanceof XmlNamespace) { + XmlNamespace cur = (XmlNamespace) nokoNode; + cur.accept(context, visitor); + } + child = child.getNextSibling(); } + visitor.leave(node); + } } diff --git a/ext/java/nokogiri/XmlNamespace.java b/ext/java/nokogiri/XmlNamespace.java index ed97519e54..b2804bb7bc 100644 --- a/ext/java/nokogiri/XmlNamespace.java +++ b/ext/java/nokogiri/XmlNamespace.java @@ -1,49 +1,13 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; -import static nokogiri.internals.NokogiriHelpers.CACHED_NODE; import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate; import static nokogiri.internals.NokogiriHelpers.getLocalNameForNamespace; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static nokogiri.internals.NokogiriHelpers.stringOrNil; -import nokogiri.internals.NokogiriHelpers; import nokogiri.internals.SaveContextVisitor; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyObject; -import org.jruby.RubyString; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.ThreadContext; @@ -54,163 +18,176 @@ /** * Class for Nokogiri::XML::Namespace - * + * * @author serabe * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::Namespace") -public class XmlNamespace extends RubyObject { - private Attr attr; - private IRubyObject prefix; - private IRubyObject href; - private String prefixString; - private String hrefString; - - public XmlNamespace(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); - } - - public Node getNode() { - return attr; - } - - public String getPrefix() { - return prefixString; - } - - public String getHref() { - return hrefString; - } - - void deleteHref() { - hrefString = "http://www.w3.org/XML/1998/namespace"; - href = NokogiriHelpers.stringOrNil(getRuntime(), hrefString); - attr.getOwnerElement().removeAttributeNode(attr); - } +@JRubyClass(name = "Nokogiri::XML::Namespace") +public class XmlNamespace extends RubyObject +{ + private static final long serialVersionUID = 1L; - public void init(Attr attr, IRubyObject prefix, IRubyObject href, IRubyObject xmlDocument) { - init(attr, prefix, href, (String) prefix.toJava(String.class), (String) href.toJava(String.class), xmlDocument); - } - - public void init(Attr attr, IRubyObject prefix, IRubyObject href, String prefixString, String hrefString, IRubyObject xmlDocument) { - this.attr = attr; - this.prefix = prefix; - this.href = href; - this.prefixString = prefixString; - this.hrefString = hrefString; - setInstanceVariable("@document", xmlDocument); - } - - public static XmlNamespace createFromAttr(Ruby runtime, Attr attr) { - String prefixValue = getLocalNameForNamespace(attr.getName()); - IRubyObject prefix_value; - if (prefixValue == null) { - prefix_value = runtime.getNil(); - prefixValue = ""; - } else { - prefix_value = RubyString.newString(runtime, prefixValue); - } - String hrefValue = attr.getValue(); - IRubyObject href_value = RubyString.newString(runtime, hrefValue); - // check namespace cache - XmlDocument xmlDocument = (XmlDocument)getCachedNodeOrCreate(runtime, attr.getOwnerDocument()); - xmlDocument.initializeNamespaceCacheIfNecessary(); - XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefixValue, hrefValue); - if (xmlNamespace != null) return xmlNamespace; - - // creating XmlNamespace instance - XmlNamespace namespace = - (XmlNamespace) NokogiriService.XML_NAMESPACE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Namespace")); - namespace.init(attr, prefix_value, href_value, prefixValue, hrefValue, xmlDocument); - - // updateing namespace cache - xmlDocument.getNamespaceCache().put(namespace, attr.getOwnerElement()); - return namespace; - } - - public static XmlNamespace createFromPrefixAndHref(Node owner, IRubyObject prefix, IRubyObject href) { - String prefixValue = prefix.isNil() ? "" : (String) prefix.toJava(String.class); - String hrefValue = (String) href.toJava(String.class); - Ruby runtime = prefix.getRuntime(); - Document document = owner.getOwnerDocument(); - // check namespace cache - XmlDocument xmlDocument = (XmlDocument)getCachedNodeOrCreate(runtime, document); - xmlDocument.initializeNamespaceCacheIfNecessary(); - XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefixValue, hrefValue); - if (xmlNamespace != null) return xmlNamespace; - - // creating XmlNamespace instance - XmlNamespace namespace = - (XmlNamespace) NokogiriService.XML_NAMESPACE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Namespace")); - String attrName = "xmlns"; - if (!"".equals(prefixValue)) { - attrName = attrName + ":" + prefixValue; - } - Attr attrNode = document.createAttribute(attrName); - attrNode.setNodeValue(hrefValue); - - // initialize XmlNamespace object - namespace.init(attrNode, prefix, href, prefixValue, hrefValue, xmlDocument); - - // updating namespace cache - xmlDocument.getNamespaceCache().put(namespace, owner); - return namespace; - } - - // owner should be an Attr node - public static XmlNamespace createDefaultNamespace(Ruby runtime, Node owner) { - String prefixValue = owner.getPrefix(); - String hrefValue = owner.getNamespaceURI(); - Document document = owner.getOwnerDocument(); - // check namespace cache - XmlDocument xmlDocument = (XmlDocument)getCachedNodeOrCreate(runtime, document); - XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefixValue, hrefValue); - if (xmlNamespace != null) return xmlNamespace; - - // creating XmlNamespace instance - XmlNamespace namespace = - (XmlNamespace) NokogiriService.XML_NAMESPACE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Namespace")); - - IRubyObject prefix = stringOrNil(runtime, prefixValue); - IRubyObject href = stringOrNil(runtime, hrefValue); - // initialize XmlNamespace object - namespace.init((Attr)owner, prefix, href, prefixValue, hrefValue, xmlDocument); - - // updating namespace cache - xmlDocument.getNamespaceCache().put(namespace, owner); - return namespace; - } - - /** - * Create and return a copy of this object. - * - * @return a clone of this object - */ - @Override - public Object clone() throws CloneNotSupportedException { - return super.clone(); - } + private Attr attr; + private transient IRubyObject prefixRuby; + private transient IRubyObject hrefRuby; + private String prefix; + private String href; - public boolean isEmpty() { - return prefix.isNil() && href.isNil(); - } + public + XmlNamespace(Ruby runtime, RubyClass klazz) + { + super(runtime, klazz); + } - @JRubyMethod - public IRubyObject href(ThreadContext context) { - return href; - } + XmlNamespace(Ruby runtime, Attr attr, String prefix, String href, IRubyObject document) + { + this(runtime, attr, prefix, null, href, null, document); + } - @JRubyMethod - public IRubyObject prefix(ThreadContext context) { - return prefix; + private + XmlNamespace(Ruby runtime, Attr attr, String prefix, IRubyObject prefixRuby, + String href, IRubyObject hrefRuby, IRubyObject document) + { + super(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Namespace")); + + this.attr = attr; + this.prefix = prefix; + this.href = href; + this.prefixRuby = prefixRuby; + this.hrefRuby = hrefRuby; + setInstanceVariable("@document", document); + } + + public Node + getNode() + { + return attr; + } + + public String + getPrefix() + { + return prefix; + } + + boolean + hasPrefix(String prefix) + { + return prefix == null ? this.prefix == null : prefix.equals(this.prefix); + } + + public String + getHref() + { + return href; + } + + public static XmlNamespace + createFromAttr(Ruby runtime, Attr attr) + { + String prefixStr = getLocalNameForNamespace(attr.getName(), null); + IRubyObject prefix = prefixStr == null ? runtime.getNil() : null; + String hrefStr = attr.getValue(); + // check namespace cache + XmlDocument xmlDocument = (XmlDocument) getCachedNodeOrCreate(runtime, attr.getOwnerDocument()); + XmlNamespace namespace = xmlDocument.getNamespaceCache().get(prefixStr, hrefStr); + if (namespace != null) { return namespace; } + + namespace = new XmlNamespace(runtime, attr, prefixStr, prefix, hrefStr, null, xmlDocument); + xmlDocument.getNamespaceCache().put(namespace, attr.getOwnerElement()); + return namespace; + } + + static XmlNamespace + createImpl(Node owner, IRubyObject prefix, String prefixStr, IRubyObject href, String hrefStr) + { + final Ruby runtime = prefix.getRuntime(); + + Document document = owner.getOwnerDocument(); + XmlDocument xmlDocument = (XmlDocument) getCachedNodeOrCreate(runtime, document); + + assert xmlDocument.getNamespaceCache().get(prefixStr, hrefStr) == null; + + // creating XmlNamespace instance + String attrName = "xmlns"; + if (prefixStr != null && !prefixStr.isEmpty()) { attrName = attrName + ':' + prefixStr; } + + Attr attrNode = document.createAttribute(attrName); + attrNode.setNodeValue(hrefStr); + + XmlNamespace namespace = new XmlNamespace(runtime, attrNode, prefixStr, prefix, hrefStr, href, xmlDocument); + xmlDocument.getNamespaceCache().put(namespace, owner); + return namespace; + } + + // owner should be an Attr node + public static XmlNamespace + createDefaultNamespace(Ruby runtime, Node owner) + { + String prefixStr = owner.getPrefix(); + String hrefStr = owner.getNamespaceURI(); + // check namespace cache + XmlDocument xmlDocument = (XmlDocument) getCachedNodeOrCreate(runtime, owner.getOwnerDocument()); + XmlNamespace namespace = xmlDocument.getNamespaceCache().get(prefixStr, hrefStr); + if (namespace != null) { return namespace; } + + namespace = new XmlNamespace(runtime, (Attr) owner, prefixStr, hrefStr, xmlDocument); + xmlDocument.getNamespaceCache().put(namespace, owner); + return namespace; + } + + /** + * Create and return a copy of this object. + * + * @return a clone of this object + */ + @Override + public Object + clone() throws CloneNotSupportedException + { + return super.clone(); + } + + public boolean + isEmpty() + { + return prefix == null && href == null; + } + + @JRubyMethod + public IRubyObject + href(ThreadContext context) + { + if (hrefRuby == null) { + if (href == null) { return hrefRuby = context.nil; } + return hrefRuby = context.runtime.newString(href); } - - public void accept(ThreadContext context, SaveContextVisitor visitor) { - String string = " " + prefix + "=\"" + href + "\""; - visitor.enter(string); - visitor.leave(string); - // is below better? - //visitor.enter(attr); - //visitor.leave(attr); + return hrefRuby; + } + + @JRubyMethod + public IRubyObject + prefix(ThreadContext context) + { + if (prefixRuby == null) { + if (prefix == null) { return prefixRuby = context.nil; } + return prefixRuby = context.runtime.newString(prefix); } + return prefixRuby; + } + + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + String prefix = this.prefix; + if (prefix == null) { prefix = ""; } + String href = this.href; + if (href == null) { href = ""; } + String string = ' ' + prefix + '=' + '"' + href + '"'; + visitor.enter(string); + visitor.leave(string); + // is below better? + //visitor.enter(attr); + //visitor.leave(attr); + } } diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index d94921fc4e..69e6772959 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -1,60 +1,18 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2014: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static java.lang.Math.max; -import static nokogiri.internals.NokogiriHelpers.clearXpathContext; -import static nokogiri.internals.NokogiriHelpers.convertEncoding; -import static nokogiri.internals.NokogiriHelpers.convertString; -import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static nokogiri.internals.NokogiriHelpers.isBlank; -import static nokogiri.internals.NokogiriHelpers.nodeArrayToRubyArray; -import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil; -import static nokogiri.internals.NokogiriHelpers.rubyStringToString; -import static nokogiri.internals.NokogiriHelpers.stringOrNil; +import static nokogiri.internals.NokogiriHelpers.*; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; +import java.util.*; import org.apache.xerces.dom.CoreDocumentImpl; import org.jruby.Ruby; import org.jruby.RubyArray; +import org.jruby.RubyBoolean; import org.jruby.RubyClass; import org.jruby.RubyFixnum; import org.jruby.RubyInteger; @@ -64,8 +22,8 @@ import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; -import org.jruby.runtime.Helpers; import org.jruby.runtime.Block; +import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.Visibility; import org.jruby.runtime.builtin.IRubyObject; @@ -78,6 +36,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; +import org.w3c.dom.Comment; import nokogiri.internals.HtmlDomParserContext; import nokogiri.internals.NokogiriHelpers; @@ -93,1695 +52,1887 @@ * @author Yoko Harada * @author John Shahid */ -@JRubyClass(name="Nokogiri::XML::Node") -public class XmlNode extends RubyObject { - protected static final String TEXT_WRAPPER_NAME = "nokogiri_text_wrapper"; - - /** The underlying Node object. */ - protected Node node; - - /* Cached objects */ - protected IRubyObject content = null; - protected IRubyObject doc = null; - protected IRubyObject name = null; - - /* - * Taken from http://ejohn.org/blog/comparing-document-position/ - * Used for compareDocumentPosition. - * Thanks to both java api and w3 doc for its helpful documentation - */ - - protected static final int IDENTICAL_ELEMENTS = 0; - protected static final int IN_DIFFERENT_DOCUMENTS = 1; - protected static final int SECOND_PRECEDES_FIRST = 2; - protected static final int FIRST_PRECEDES_SECOND = 4; - protected static final int SECOND_CONTAINS_FIRST = 8; - protected static final int FIRST_CONTAINS_SECOND = 16; - - /** - * Cast node to an XmlNode or raise a type error - * in context. - */ - protected static XmlNode asXmlNode(ThreadContext context, IRubyObject node) { - if ( !(node instanceof XmlNode) ) { - final Ruby runtime = context.getRuntime(); - throw runtime.newTypeError(node == null ? runtime.getNil() : node, getNokogiriClass(runtime, "Nokogiri::XML::Node")); - } - return (XmlNode) node; - } - - /** - * Cast node to an XmlNode, or null if RubyNil, or - * raise a type error in context. - */ - protected static XmlNode asXmlNodeOrNull(ThreadContext context, IRubyObject node) { - if (node == null || node.isNil()) return null; - return asXmlNode(context, node); - } - - /** - * Coalesce to adjacent TextNodes. - * @param context - * @param prev Previous node to cur. - * @param cur Next node to prev. - */ - public static void coalesceTextNodes(ThreadContext context, IRubyObject prev, IRubyObject cur) { - XmlNode p = asXmlNode(context, prev); - XmlNode c = asXmlNode(context, cur); - - Node pNode = p.node; - Node cNode = c.node; - - pNode.setNodeValue(pNode.getNodeValue()+cNode.getNodeValue()); - p.content = null; // clear cached content - - c.assimilateXmlNode(context, p); - } - - /** - * Coalesce text nodes around anchorNode. If - * anchorNode has siblings (previous or next) that - * are text nodes, the content will be merged into - * anchorNode and the redundant nodes will be removed - * from the DOM. - * - * To match libxml behavior (?) the final content of - * anchorNode and any removed nodes will be - * identical. - * - * @param context - * @param anchorNode - */ - protected static void coalesceTextNodes(ThreadContext context, - IRubyObject anchorNode, - AdoptScheme scheme) { - XmlNode xa = asXmlNode(context, anchorNode); - - XmlNode xp = asXmlNodeOrNull(context, xa.previous_sibling(context)); - XmlNode xn = asXmlNodeOrNull(context, xa.next_sibling(context)); - - Node p = xp == null ? null : xp.node; - Node a = xa.node; - Node n = xn == null ? null : xn.node; - - Node parent = a.getParentNode(); - - boolean shouldMergeP = scheme == AdoptScheme.NEXT_SIBLING || scheme == AdoptScheme.CHILD || scheme == AdoptScheme.REPLACEMENT; - boolean shouldMergeN = scheme == AdoptScheme.PREV_SIBLING || scheme == AdoptScheme.REPLACEMENT; - - // apply the merge right to left - if (shouldMergeN && n != null && n.getNodeType() == Node.TEXT_NODE) { - xa.setContent(a.getNodeValue() + n.getNodeValue()); - parent.removeChild(n); - xn.assimilateXmlNode(context, xa); - } - if (shouldMergeP && p != null && p.getNodeType() == Node.TEXT_NODE) { - xp.setContent(p.getNodeValue() + a.getNodeValue()); - parent.removeChild(a); - xa.assimilateXmlNode(context, xp); - } - } - - /** - * This is the allocator for XmlNode class. It should only be - * called from Ruby code. - */ - public XmlNode(Ruby ruby, RubyClass cls) { - super(ruby, cls); - } - - /** - * This is a constructor to create an XmlNode from an already - * existing node. It may be called by Java code. - */ - public XmlNode(Ruby ruby, RubyClass cls, Node node) { - super(ruby, cls); - setNode(ruby.getCurrentContext(), node); - } - - protected void decorate(final ThreadContext context) { - if (node != null) { - resetCache(); - - if (node.getNodeType() != Node.DOCUMENT_NODE) { - doc = document(context.runtime); - - if (doc != null && ! doc.isNil()) { - Helpers.invoke(context, doc, "decorate", this); - } - } - } - } - - /** - * Create and return a copy of this object. - * - * @return a clone of this object - */ - @Override - public Object clone() throws CloneNotSupportedException { - return super.clone(); - } - - protected void resetCache() { - node.setUserData(NokogiriHelpers.CACHED_NODE, this, null); - } - - /** - * Allocate a new object, perform initialization, call that - * object's initialize method, and call any block passing the - * object as the only argument. If cls is - * Nokogiri::XML::Node, creates a new Nokogiri::XML::Element - * instead. - * - * This static method seems to be inherited, strangely enough. - * E.g. creating a new XmlAttr from Ruby code calls this method if - * XmlAttr does not define its own 'new' method. - * - * Since there is some Java bookkeeping that always needs to - * happen, we don't define the 'initialize' method in Java because - * we'd have to count on subclasses calling 'super'. - * - * The main consequence of this is that every subclass needs to - * define its own 'new' method. - * - * As a convenience, this method does the following: - * - *
    - * - *
  • allocates a new object using the allocator assigned to - * cls
  • - * - *
  • calls the Java method init(); subclasses can override this, - * otherwise they should implement a specific 'new' method
  • - * - *
  • invokes the Ruby initializer
  • - * - *
  • if a block is given, calls the block with the new node as - * the argument
  • - * - *
- * - * -pmahoney - */ - @JRubyMethod(name = "new", meta = true, rest = true) - public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, - IRubyObject[] args, Block block) { - Ruby ruby = context.getRuntime(); - RubyClass klazz = (RubyClass) cls; - - if ("Nokogiri::XML::Node".equals(klazz.getName())) { - klazz = getNokogiriClass(ruby, "Nokogiri::XML::Element"); - } - - XmlNode xmlNode = (XmlNode) klazz.allocate(); - xmlNode.init(context, args); - xmlNode.callInit(args, block); - assert xmlNode.node != null; - if (block.isGiven()) block.call(context, xmlNode); - return xmlNode; - } - - /** - * Initialize the object from Ruby arguments. Should be - * overridden by subclasses. Should check for a minimum number of - * args but not for an exact number. Any extra args will then be - * passed to 'initialize'. The way 'new' and this 'init' function - * interact means that subclasses cannot arbitrarily change the - * require aruments by defining an 'initialize' method. This is - * how the C libxml wrapper works also. - * - * As written it performs initialization for a new Element with - * the given name within the document - * doc. So XmlElement need not override this. This - * implementation cannot be moved to XmlElement however, because - * subclassing XmlNode must result in something that behaves much - * like XmlElement. - */ - protected void init(ThreadContext context, IRubyObject[] args) { - if (args.length < 2) - throw context.getRuntime().newArgumentError(args.length, 2); - - IRubyObject name = args[0]; - IRubyObject doc = args[1]; - - Document document = asXmlNode(context, doc).getOwnerDocument(); - if (document == null) { - throw getRuntime().newArgumentError("node must have owner document"); - } - - Element element; - String node_name = rubyStringToString(name); - String prefix = NokogiriHelpers.getPrefix(node_name); - String namespace_uri = null; - if (document.getDocumentElement() != null) { - namespace_uri = document.getDocumentElement().lookupNamespaceURI(prefix); - } - element = document.createElementNS(namespace_uri, node_name); - setNode(context, element); - } - - /** - * Set the underlying node of this node to the underlying node of - * otherNode. - * - * FIXME: also update the cached node? - */ - protected void assimilateXmlNode(ThreadContext context, IRubyObject otherNode) { - XmlNode toAssimilate = asXmlNode(context, otherNode); - - this.node = toAssimilate.node; - content = null; // clear cache - } - - /** - * See org.w3.dom.Node#normalize. - */ - public void normalize() { - node.normalize(); - } - - public Node getNode() { - return node; - } - - public static Node getNodeFromXmlNode(ThreadContext context, IRubyObject xmlNode) { - return asXmlNode(context, xmlNode).node; - } - - protected String indentString(IRubyObject indentStringObject, String xml) { - String[] lines = xml.split("\n"); - - if(lines.length <= 1) return xml; - - String[] resultLines = new String[lines.length]; - - String curLine; - boolean closingTag = false; - String indentString = rubyStringToString(indentStringObject); - int lengthInd = indentString.length(); - StringBuilder curInd = new StringBuilder(); - - resultLines[0] = lines[0]; - - for(int i = 1; i < lines.length; i++) { - - curLine = lines[i].trim(); - - if(curLine.length() == 0) continue; - - if(curLine.startsWith("") && !closingTag) { - curInd.append(indentString); - } - - closingTag = false; - } - - StringBuilder result = new StringBuilder(); - for(int i = 0; i < resultLines.length; i++) { - result.append(resultLines[i]).append('\n'); - } - - return result.toString(); - } - - public boolean isComment() { return false; } - - public boolean isElement() { - if (node instanceof Element) return true; // in case of subclassing - else return false; - } - - public boolean isProcessingInstruction() { return false; } - - /** - * Return the string value of the attribute key or - * nil. - * - * Only applies where the underlying Node is an Element node, but - * implemented here in XmlNode because not all nodes with - * underlying Element nodes subclass XmlElement, such as the DTD - * declarations like XmlElementDecl. - */ - protected IRubyObject getAttribute(ThreadContext context, String key) { - return getAttribute(context.getRuntime(), key); - } - - protected IRubyObject getAttribute(Ruby runtime, String key) { - String value = getAttribute(key); - return nonEmptyStringOrNil(runtime, value); - } - - protected String getAttribute(String key) { - if (node.getNodeType() != Node.ELEMENT_NODE) return null; - - String value = ((Element)node).getAttribute(key); - return value.length() == 0 ? null : value; - } - - /** - * This method should be called after a node has been adopted in a new - * document. This method will ensure that the node is renamed with the - * appriopriate NS uri. First the prefix of the node is extracted, then is - * used to lookup the namespace uri in the new document starting at the - * current node and traversing the ancestors. If the namespace uri wasn't - * empty (or null) all children and the node has attributes and/or children - * then the algorithm is recursively applied to the children. - */ - public void relink_namespace(ThreadContext context) { - if (!(node instanceof Element)) { - return; - } - - Element e = (Element) node; - - // disable error checking to prevent lines like the following - // from throwing a `NAMESPACE_ERR' exception: - // Nokogiri::XML::DocumentFragment.parse("a") - // since the `o' prefix isn't defined anywhere. - e.getOwnerDocument().setStrictErrorChecking(false); - - String prefix = e.getPrefix(); - String nsURI = e.lookupNamespaceURI(prefix); - this.node = NokogiriHelpers.renameNode(e, nsURI, e.getNodeName()); - - if (nsURI == null || nsURI == "") { - return; - } - - String currentPrefix = e.getParentNode().lookupPrefix(nsURI); - String currentURI = e.getParentNode().lookupNamespaceURI(prefix); - boolean isDefault = e.getParentNode().isDefaultNamespace(nsURI); - - // add xmlns attribute if this is a new root node or if the node's - // namespace isn't a default namespace in the new document - if (e.getParentNode().getNodeType() == Node.DOCUMENT_NODE) { - // this is the root node, so we must set the namespaces attributes - // anyway - e.setAttribute(prefix == null ? "xmlns":"xmlns:"+prefix, nsURI); - } else if (prefix == null) { - if (!isDefault) - // this is a default namespace but isn't the default where this - // node is being added - e.setAttribute("xmlns", nsURI); - } else if (currentPrefix != prefix || currentURI != nsURI) { - // this is a prefixed namespace but doens't have the same prefix or - // the prefix is set to a diffent URI - e.setAttribute("xmlns:"+prefix, nsURI); - } - - if (e.hasAttributes()) { - NamedNodeMap attrs = e.getAttributes(); - - for (int i = 0; i < attrs.getLength(); i++) { - Attr attr = (Attr) attrs.item(i); - String attrPrefix = attr.getPrefix(); - if (attrPrefix == null) { - attrPrefix = NokogiriHelpers.getPrefix(attr.getNodeName()); - } - String nodeName = attr.getNodeName(); - String nsUri; - if ("xml".equals(attrPrefix)) { - nsUri = "http://www.w3.org/XML/1998/namespace"; - } else if ("xmlns".equals(attrPrefix) || nodeName.equals("xmlns")) { - nsUri = "http://www.w3.org/2000/xmlns/"; - } else { - nsUri = attr.lookupNamespaceURI(attrPrefix); - } - - if (nsUri == e.getNamespaceURI()) { - nsUri = null; - } - - if (!(nsUri == null || "".equals(nsUri) || "http://www.w3.org/XML/1998/namespace".equals(nsUri))) { - // Create a new namespace object and add it to the document - // namespace cache. - // TODO: why do we need the namespace cache ? - XmlNamespace.createFromAttr(context.getRuntime(), attr); - } - NokogiriHelpers.renameNode(attr, nsUri, nodeName); - } - } - - if (this.node.hasChildNodes()) { - XmlNodeSet nodeSet = (XmlNodeSet)(children(context)); - nodeSet.relink_namespace(context); - } - } - - // Users might extend XmlNode. This method works for such a case. - public void accept(ThreadContext context, SaveContextVisitor visitor) { - visitor.enter(node); - XmlNodeSet xmlNodeSet = (XmlNodeSet) children(context); - if (xmlNodeSet.length() > 0) { - RubyArray array = (RubyArray) xmlNodeSet.to_a(context); - for(int i = 0; i < array.getLength(); i++) { - Object item = array.get(i); - if (item instanceof XmlNode) { - XmlNode cur = (XmlNode) item; - cur.accept(context, visitor); - } else if (item instanceof XmlNamespace) { - XmlNamespace cur = (XmlNamespace)item; - cur.accept(context, visitor); - } - } - } - visitor.leave(node); - } - - public void setName(IRubyObject name) { - this.name = name; - } - - public void setDocument(ThreadContext context, IRubyObject doc) { - this.doc = doc; - - setDocumentAndDecorate(context, this, doc); - } - - // shared logic with XmlNodeSet - static void setDocumentAndDecorate(ThreadContext context, RubyObject self, IRubyObject doc) { - self.setInstanceVariable("@document", doc); - if (doc != null) { - Helpers.invoke(context, doc, "decorate", self); - } - } - - public void setNode(ThreadContext context, Node node) { - this.node = node; - - decorate(context); - - if (this instanceof XmlAttr) { - ((XmlAttr)this).setNamespaceIfNecessary(context.getRuntime()); - } - } - - public void updateNodeNamespaceIfNecessary(ThreadContext context, XmlNamespace ns) { - String oldPrefix = this.node.getPrefix(); - String uri = rubyStringToString(ns.href(context)); - - /* - * Update if both prefixes are null or equal - */ - boolean update = (oldPrefix == null && ns.prefix(context).isNil()) || - (oldPrefix != null && !ns.prefix(context).isNil() - && oldPrefix.equals(rubyStringToString(ns.prefix(context)))); - - if(update) { - this.node = NokogiriHelpers.renameNode(this.node, uri, this.node.getNodeName()); - } - } - - protected IRubyObject getNodeName(ThreadContext context) { - if (name != null) return name; - - String str = null; - if (node != null) { - str = node.getNodeName(); - str = NokogiriHelpers.getLocalPart(str); - } - if (str == null) str = ""; - if (str.startsWith("#")) str = str.substring(1); // eliminates '#' - return name = NokogiriHelpers.stringOrBlank(context.getRuntime(), str); - } - - /** - * Add a namespace definition to this node. To the underlying - * node, add an attribute of the form - * xmlns:prefix="uri". - */ - @JRubyMethod(name = {"add_namespace_definition", "add_namespace"}) - public IRubyObject add_namespace_definition(ThreadContext context, - IRubyObject prefix, - IRubyObject href) { - String prefixString = rubyStringToString(prefix); - String hrefString ; - - // try to search the namespace first - if (href.isNil()) { - hrefString = this.findNamespaceHref(context, rubyStringToString(prefix)); - if (hrefString == null) { - return context.nil; - } - href = context.getRuntime().newString(hrefString); - } else { - hrefString = rubyStringToString(href); - } - - NokogiriNamespaceCache nsCache = NokogiriHelpers.getNamespaceCacheFormNode(node); - XmlNamespace cachedNamespace = nsCache.get(prefixString, hrefString); - - if (cachedNamespace != null) return cachedNamespace; - - Node namespaceOwner; - if (node.getNodeType() == Node.ELEMENT_NODE) { - namespaceOwner = node; - Element element = (Element) node; - // adds namespace as node's attribute - final String uri = "http://www.w3.org/2000/xmlns/"; - String qName = - prefix.isNil() ? "xmlns" : "xmlns:" + prefixString; - - element.setAttributeNS(uri, qName, hrefString); - } - else if (node.getNodeType() == Node.ATTRIBUTE_NODE) namespaceOwner = ((Attr)node).getOwnerElement(); - else namespaceOwner = node.getParentNode(); - XmlNamespace ns = XmlNamespace.createFromPrefixAndHref(namespaceOwner, prefix, href); - if (node != namespaceOwner) { - - this.node = NokogiriHelpers.renameNode(node, ns.getHref(), ns.getPrefix() + ":" + node.getLocalName()); - } - updateNodeNamespaceIfNecessary(context, ns); - - return ns; - } - - @JRubyMethod(name = {"attribute", "attr"}) - public IRubyObject attribute(ThreadContext context, IRubyObject name){ - NamedNodeMap attrs = this.node.getAttributes(); - Node attr = attrs.getNamedItem(rubyStringToString(name)); - if(attr == null) { - return context.getRuntime().getNil(); - } - return getCachedNodeOrCreate(context.getRuntime(), attr); - } - - @JRubyMethod - public IRubyObject attribute_nodes(ThreadContext context) { - NamedNodeMap nodeMap = this.node.getAttributes(); - - Ruby ruby = context.getRuntime(); - if(nodeMap == null){ - return ruby.newEmptyArray(); - } - - RubyArray attr = ruby.newArray(); - - for(int i = 0; i < nodeMap.getLength(); i++) { - if ((doc instanceof HtmlDocument) || !NokogiriHelpers.isNamespace(nodeMap.item(i))) { - attr.append(getCachedNodeOrCreate(context.getRuntime(), nodeMap.item(i))); - } - } - - return attr; - } - - @JRubyMethod - public IRubyObject attribute_with_ns(ThreadContext context, IRubyObject name, IRubyObject namespace) { - String namej = rubyStringToString(name); - String nsj = (namespace.isNil()) ? null : rubyStringToString(namespace); - - Node el = this.node.getAttributes().getNamedItemNS(nsj, namej); - - if(el == null) { - return context.getRuntime().getNil(); - } - return NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), el); - } - - @JRubyMethod(name = "blank?") - public IRubyObject blank_p(ThreadContext context) { - // according to libxml doc, - // a node is blank if if it is a Text or CDATA node consisting of whitespace only - if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) { - String data = node.getTextContent(); - return context.runtime.newBoolean(data == null || isBlank(data)); - } - return context.runtime.getFalse(); - } - - @JRubyMethod - public IRubyObject child(ThreadContext context) { - return getCachedNodeOrCreate(context.getRuntime(), node.getFirstChild()); - } - - @JRubyMethod - public IRubyObject children(ThreadContext context) { - XmlNodeSet xmlNodeSet = XmlNodeSet.newEmptyNodeSet(context); - - NodeList nodeList = node.getChildNodes(); - if (nodeList.getLength() > 0) { - xmlNodeSet.setNodeList(nodeList); // initializes @document from first node - } - else { // TODO this is very ripe for refactoring - setDocumentAndDecorate(context, xmlNodeSet, doc); - } - - return xmlNodeSet; - } - - @JRubyMethod - public IRubyObject first_element_child(ThreadContext context) { - List elementNodes = new ArrayList(); - addElements(node, elementNodes, true); - if (elementNodes.size() == 0) return context.getRuntime().getNil(); - return getCachedNodeOrCreate(context.getRuntime(), elementNodes.get(0)); - } - - @JRubyMethod - public IRubyObject last_element_child(ThreadContext context) { - List elementNodes = new ArrayList(); - addElements(node, elementNodes, false); - if (elementNodes.size() == 0) return context.getRuntime().getNil(); - return getCachedNodeOrCreate(context.getRuntime(), elementNodes.get(elementNodes.size()-1)); - } - - @JRubyMethod(name = {"element_children", "elements"}) - public IRubyObject element_children(ThreadContext context) { - List elementNodes = new ArrayList(); - addElements(node, elementNodes, false); - IRubyObject[] array = NokogiriHelpers.nodeArrayToArray(context.runtime, - elementNodes.toArray(new Node[0])); - XmlNodeSet xmlNodeSet = XmlNodeSet.newXmlNodeSet(context, array); - return xmlNodeSet; - } - - private void addElements(Node n, List nodes, boolean isFirstOnly) { - NodeList children = n.getChildNodes(); - if (children.getLength() == 0) return; - for (int i=0; i< children.getLength(); i++) { - Node child = children.item(i); - if (child.getNodeType() == Node.ELEMENT_NODE) { - nodes.add(child); - if (isFirstOnly) return; - } - } - } - - /** - * call-seq: - * compare(other) - * - * Compare this Node to +other+ with respect to their Document - */ - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject compare(ThreadContext context, IRubyObject other) { - if (!(other instanceof XmlNode)) { - return context.getRuntime().newFixnum(-2); - } - - Node otherNode = asXmlNode(context, other).node; - - // Do not touch this if, if it's not for a good reason. - if (node.getNodeType() == Node.DOCUMENT_NODE || - otherNode.getNodeType() == Node.DOCUMENT_NODE) { - return context.getRuntime().newFixnum(-1); - } - - try{ - int res = node.compareDocumentPosition(otherNode); - if ((res & FIRST_PRECEDES_SECOND) == FIRST_PRECEDES_SECOND) { - return context.getRuntime().newFixnum(-1); - } else if ((res & SECOND_PRECEDES_FIRST) == SECOND_PRECEDES_FIRST) { - return context.getRuntime().newFixnum(1); - } else if (res == IDENTICAL_ELEMENTS) { - return context.getRuntime().newFixnum(0); - } - - return context.getRuntime().newFixnum(-2); - } catch (Exception ex) { - return context.getRuntime().newFixnum(-2); - } - } - - /** - * TODO: this is a stub implementation. It's not clear what - * 'in_context' is supposed to do. Also should take - * options into account. - */ - @JRubyMethod(required = 2, visibility = Visibility.PRIVATE) - public IRubyObject in_context(ThreadContext context, - IRubyObject str, - IRubyObject options) { - RubyModule klass; - XmlDomParserContext ctx; - InputStream istream; - XmlDocument document; - - IRubyObject d = document(context); - Ruby runtime = context.getRuntime(); - if (d != null && d instanceof XmlDocument) { - document = (XmlDocument)d; - } else { - return runtime.getNil(); - } - - if (document instanceof HtmlDocument) { - klass = getNokogiriClass(runtime, "Nokogiri::HTML::Document"); - ctx = new HtmlDomParserContext(runtime, options); - ((HtmlDomParserContext)ctx).enableDocumentFragment(); - istream = new ByteArrayInputStream((rubyStringToString(str)).getBytes()); - } else { - klass = getNokogiriClass(runtime, "Nokogiri::XML::Document"); - ctx = new XmlDomParserContext(runtime, options); - String input = rubyStringToString(str); - istream = new ByteArrayInputStream(input.getBytes()); - } - - ctx.setInputSource(istream); - // TODO: for some reason, document.getEncoding() can be null or nil (don't know why) - // run `test_parse_with_unparented_html_text_context_node' few times to see this happen - if (document instanceof HtmlDocument && !(document.getEncoding() == null || document.getEncoding().isNil())) { - HtmlDomParserContext htmlCtx= (HtmlDomParserContext) ctx; - htmlCtx.setEncoding(document.getEncoding().asJavaString()); - } - - XmlDocument doc = ctx.parse(context, klass, runtime.getNil()); - - RubyArray documentErrors = getErrorArray(document); - RubyArray docErrors = getErrorArray(doc); - if (isErrorIncreased(documentErrors, docErrors)) { - for (int i = 0; i < docErrors.getLength(); i++) { - documentErrors.add(docErrors.entry(i)); - } - document.setInstanceVariable("@errors", documentErrors); - XmlNodeSet xmlNodeSet = XmlNodeSet.newXmlNodeSet(context, new IRubyObject[0]); - return xmlNodeSet; - } - - // The first child might be document type node (dtd declaration). - // XmlNodeSet to be return should not have dtd decl in its list. - Node first; - if (doc.node.getFirstChild().getNodeType() == Node.DOCUMENT_TYPE_NODE) { - first = doc.node.getFirstChild().getNextSibling(); - } else { - first = doc.node.getFirstChild(); - } - - IRubyObject[] nodes = new IRubyObject[]{NokogiriHelpers.getCachedNodeOrCreate(runtime, first)}; - XmlNodeSet xmlNodeSet = XmlNodeSet.newXmlNodeSet(context, nodes); - return xmlNodeSet; - } - - private RubyArray getErrorArray(XmlDocument document) { - IRubyObject obj = document.getInstanceVariable("@errors"); - if (obj != null && obj instanceof RubyArray) { - return (RubyArray)obj; - } - return RubyArray.newArray(document.getRuntime()); - } - - private boolean isErrorIncreased(RubyArray baseErrors, RubyArray createdErrors) { - int length = ((RubyArray) createdErrors.op_diff(baseErrors)).size(); - return length > 0; - } - - @JRubyMethod(name = {"content", "text", "inner_text"}) - public IRubyObject content(ThreadContext context) { - return stringOrNil(context.getRuntime(), getContentImpl()); - } - - public CharSequence getContentImpl() { - if (!node.hasChildNodes() && node.getNodeValue() == null && - (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE)) { - return null; - } - CharSequence textContent; - if (this instanceof XmlDocument) { - Node node = ((Document)this.node).getDocumentElement(); - if (node == null) { - textContent = ""; - } else { - Node documentElement = ((Document) this.node).getDocumentElement(); - textContent = getTextContentRecursively(new StringBuilder(), documentElement); - } - } else { - textContent = getTextContentRecursively(new StringBuilder(), node); - } - // textContent = NokogiriHelpers.convertEncodingByNKFIfNecessary(context, (XmlDocument) document(context), textContent); - return textContent; - } - - private StringBuilder getTextContentRecursively(StringBuilder buffer, Node currentNode) { - CharSequence textContent = currentNode.getNodeValue(); - if (textContent != null && NokogiriHelpers.shouldDecode(currentNode)) { - textContent = NokogiriHelpers.decodeJavaString(textContent); - } - if (textContent != null) buffer.append(textContent); - NodeList children = currentNode.getChildNodes(); - for (int i = 0; i < children.getLength(); i++) { - Node child = children.item(i); - if (hasTextContent(child)) getTextContentRecursively(buffer, child); - } - return buffer; - } - - private boolean hasTextContent(Node child) { - return child.getNodeType() != Node.COMMENT_NODE && child.getNodeType() != Node.PROCESSING_INSTRUCTION_NODE; - } - - @JRubyMethod - public final IRubyObject document(ThreadContext context) { - return document(context.runtime); - } - - IRubyObject document(final Ruby runtime) { - if (doc == null) { - doc = (XmlDocument) node.getOwnerDocument().getUserData(NokogiriHelpers.CACHED_NODE); - } - if (doc == null) { - doc = getCachedNodeOrCreate(runtime, node.getOwnerDocument()); - node.getOwnerDocument().setUserData(NokogiriHelpers.CACHED_NODE, doc, null); - } - return doc; - } - - public IRubyObject dup() { - return dup_implementation(getMetaClass().getClassRuntime(), true); - } - - @JRubyMethod - public IRubyObject dup(ThreadContext context) { - return dup_implementation(context, true); - } - - @JRubyMethod - public IRubyObject dup(ThreadContext context, IRubyObject depth) { - boolean deep = depth instanceof RubyInteger && RubyFixnum.fix2int(depth) != 0; - return dup_implementation(context, deep); - } - - protected final IRubyObject dup_implementation(ThreadContext context, boolean deep) { - return dup_implementation(context.getRuntime(), deep); - } - - protected IRubyObject dup_implementation(Ruby runtime, boolean deep) { - XmlNode clone; - try { - clone = (XmlNode) clone(); - } catch (CloneNotSupportedException e) { - throw runtime.newRuntimeError(e.toString()); - } - Node newNode = node.cloneNode(deep); - clone.node = newNode; - return clone; - } - - public static RubyString encode_special_chars(ThreadContext context, IRubyObject string) { - CharSequence str = NokogiriHelpers.encodeJavaString( rubyStringToString(string) ); - return RubyString.newString(context.getRuntime(), str); - } - - /** - * Instance method version of the above static method. - */ - @JRubyMethod(name="encode_special_chars") - public IRubyObject i_encode_special_chars(ThreadContext context, - IRubyObject string) { - return encode_special_chars(context, string); - } - - /** - * Get the attribute at the given key, key. - * Assumes that this node has attributes (i.e. that key? returned - * true). - */ - @JRubyMethod(visibility = Visibility.PRIVATE) - public IRubyObject get(ThreadContext context, IRubyObject rbkey) { - if (node instanceof Element) { - if (rbkey == null || rbkey.isNil()) context.getRuntime().getNil(); - String key = rubyStringToString(rbkey); - Element element = (Element) node; - if (!element.hasAttribute(key)) return context.getRuntime().getNil(); - String value = element.getAttribute(key); - return stringOrNil(context.getRuntime(), value); - } - return context.getRuntime().getNil(); +@JRubyClass(name = "Nokogiri::XML::Node") +public class XmlNode extends RubyObject +{ + private static final long serialVersionUID = 1L; + + protected static final String TEXT_WRAPPER_NAME = "nokogiri_text_wrapper"; + + /** The underlying Node object. */ + protected Node node; + + /* Cached objects */ + protected IRubyObject content = null; + private transient XmlDocument doc; + protected transient RubyString name; + + /* + * Taken from http://ejohn.org/blog/comparing-document-position/ + * Used for compareDocumentPosition. + * Thanks to both java api and w3 doc for its helpful documentation + */ + + protected static final int IDENTICAL_ELEMENTS = 0; + protected static final int IN_DIFFERENT_DOCUMENTS = 1; + protected static final int SECOND_PRECEDES_FIRST = 2; + protected static final int FIRST_PRECEDES_SECOND = 4; + protected static final int SECOND_CONTAINS_FIRST = 8; + protected static final int FIRST_CONTAINS_SECOND = 16; + + /** + * Cast node to an XmlNode or raise a type error + * in context. + */ + protected static XmlNode + asXmlNode(ThreadContext context, IRubyObject node) + { + if (!(node instanceof XmlNode)) { + final Ruby runtime = context.runtime; + throw runtime.newTypeError(node == null ? runtime.getNil() : node, getNokogiriClass(runtime, "Nokogiri::XML::Node")); + } + return (XmlNode) node; + } + + /** + * Cast node to an XmlNode, or null if RubyNil, or + * raise a type error in context. + */ + protected static XmlNode + asXmlNodeOrNull(ThreadContext context, IRubyObject node) + { + if (node == null || node.isNil()) { return null; } + return asXmlNode(context, node); + } + + /** + * Coalesce to adjacent TextNodes. + * @param context + * @param prev Previous node to cur. + * @param cur Next node to prev. + */ + public static void + coalesceTextNodes(ThreadContext context, IRubyObject prev, IRubyObject cur) + { + XmlNode p = asXmlNode(context, prev); + XmlNode c = asXmlNode(context, cur); + + Node pNode = p.node; + Node cNode = c.node; + + pNode.setNodeValue(pNode.getNodeValue() + cNode.getNodeValue()); + p.content = null; // clear cached content + + c.assimilateXmlNode(context, p); + } + + /** + * Coalesce text nodes around anchorNode. If + * anchorNode has siblings (previous or next) that + * are text nodes, the content will be merged into + * anchorNode and the redundant nodes will be removed + * from the DOM. + * + * To match libxml behavior (?) the final content of + * anchorNode and any removed nodes will be + * identical. + * + * @param context + * @param anchorNode + */ + protected static void + coalesceTextNodes(ThreadContext context, + IRubyObject anchorNode, + AdoptScheme scheme) + { + XmlNode xa = asXmlNode(context, anchorNode); + + XmlNode xp = asXmlNodeOrNull(context, xa.previous_sibling(context)); + XmlNode xn = asXmlNodeOrNull(context, xa.next_sibling(context)); + + Node p = xp == null ? null : xp.node; + Node a = xa.node; + Node n = xn == null ? null : xn.node; + + Node parent = a.getParentNode(); + + boolean shouldMergeP = scheme == AdoptScheme.NEXT_SIBLING || scheme == AdoptScheme.CHILD + || scheme == AdoptScheme.REPLACEMENT; + boolean shouldMergeN = scheme == AdoptScheme.PREV_SIBLING || scheme == AdoptScheme.REPLACEMENT; + + // apply the merge right to left + if (shouldMergeN && n != null && n.getNodeType() == Node.TEXT_NODE) { + xa.setContent(a.getNodeValue() + n.getNodeValue()); + parent.removeChild(n); + xn.assimilateXmlNode(context, xa); + } + if (shouldMergeP && p != null && p.getNodeType() == Node.TEXT_NODE) { + xp.setContent(p.getNodeValue() + a.getNodeValue()); + parent.removeChild(a); + xa.assimilateXmlNode(context, xp); + } + } + + /** + * This is the allocator for XmlNode class. It should only be + * called from Ruby code. + */ + public + XmlNode(Ruby runtime, RubyClass klass) + { + super(runtime, klass); + } + + /** + * This is a constructor to create an XmlNode from an already + * existing node. It may be called by Java code. + */ + public + XmlNode(Ruby runtime, RubyClass klass, Node node) + { + super(runtime, klass); + setNode(runtime, node); + } + + protected void + decorate(final Ruby runtime) + { + if (node != null) { + resetCache(); + + if (node.getNodeType() != Node.DOCUMENT_NODE) { + setDocumentAndDecorate(runtime.getCurrentContext(), this, document(runtime)); + } } - - /** - * Returns the owner document, checking if this node is the - * document, or returns null if there is no owner. - */ - protected Document getOwnerDocument() { - if (node.getNodeType() == Node.DOCUMENT_NODE) { - return (Document) node; + } + + /** + * Create and return a copy of this object. + * + * @return a clone of this object + */ + @Override + public Object + clone() throws CloneNotSupportedException + { + return super.clone(); + } + + protected void + resetCache() + { + node.setUserData(NokogiriHelpers.CACHED_NODE, this, null); + } + + /** + * Allocate a new object, perform initialization, call that + * object's initialize method, and call any block passing the + * object as the only argument. If cls is + * Nokogiri::XML::Node, creates a new Nokogiri::XML::Element + * instead. + * + * This static method seems to be inherited, strangely enough. + * E.g. creating a new XmlAttr from Ruby code calls this method if + * XmlAttr does not define its own 'new' method. + * + * Since there is some Java bookkeeping that always needs to + * happen, we don't define the 'initialize' method in Java because + * we'd have to count on subclasses calling 'super'. + * + * The main consequence of this is that every subclass needs to + * define its own 'new' method. + * + * As a convenience, this method does the following: + * + *
    + * + *
  • allocates a new object using the allocator assigned to + * cls
  • + * + *
  • calls the Java method init(); subclasses can override this, + * otherwise they should implement a specific 'new' method
  • + * + *
  • invokes the Ruby initializer
  • + * + *
  • if a block is given, calls the block with the new node as + * the argument
  • + * + *
+ * + * -pmahoney + */ + @JRubyMethod(name = "new", meta = true, rest = true) + public static IRubyObject + rbNew(ThreadContext context, IRubyObject cls, + IRubyObject[] args, Block block) + { + Ruby ruby = context.runtime; + RubyClass klazz = (RubyClass) cls; + + if ("Nokogiri::XML::Node".equals(klazz.getName())) { + klazz = getNokogiriClass(ruby, "Nokogiri::XML::Element"); + } + + XmlNode xmlNode = (XmlNode) klazz.allocate(); + xmlNode.init(context, args); + xmlNode.callInit(args, block); + assert xmlNode.node != null; + if (block.isGiven()) { block.call(context, xmlNode); } + return xmlNode; + } + + /** + * Initialize the object from Ruby arguments. Should be + * overridden by subclasses. Should check for a minimum number of + * args but not for an exact number. Any extra args will then be + * passed to 'initialize'. The way 'new' and this 'init' function + * interact means that subclasses cannot arbitrarily change the + * require aruments by defining an 'initialize' method. This is + * how the C libxml wrapper works also. + * + * As written it performs initialization for a new Element with + * the given name within the document + * doc. So XmlElement need not override this. This + * implementation cannot be moved to XmlElement however, because + * subclassing XmlNode must result in something that behaves much + * like XmlElement. + */ + protected void + init(ThreadContext context, IRubyObject[] args) + { + if (args.length < 2) { + throw context.runtime.newArgumentError(args.length, 2); + } + + IRubyObject name = args[0]; + IRubyObject doc = args[1]; + + if (!(doc instanceof XmlNode)) { + throw context.runtime.newArgumentError("document must be a Nokogiri::XML::Node"); + } + if (!(doc instanceof XmlDocument)) { + // TODO: deprecate allowing Node + context.runtime.getWarnings().warn("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); + } + + Document document = asXmlNode(context, doc).getOwnerDocument(); + if (document == null) { + throw context.runtime.newArgumentError("node must have owner document"); + } + + Element element; + String node_name = rubyStringToString(name); + String prefix = NokogiriHelpers.getPrefix(node_name); + String namespace_uri = null; + if (document.getDocumentElement() != null) { + namespace_uri = document.getDocumentElement().lookupNamespaceURI(prefix); + } + element = document.createElementNS(namespace_uri, node_name); + setNode(context.runtime, element); + } + + /** + * Set the underlying node of this node to the underlying node of + * otherNode. + * + * FIXME: also update the cached node? + */ + protected void + assimilateXmlNode(ThreadContext context, IRubyObject otherNode) + { + XmlNode toAssimilate = asXmlNode(context, otherNode); + + this.node = toAssimilate.node; + content = null; // clear cache + } + + /** + * See org.w3.dom.Node#normalize. + */ + public void + normalize() + { + node.normalize(); + } + + public Node + getNode() + { + return node; + } + + public boolean + isComment() { return false; } + + public boolean + isElement() + { + if (node instanceof Element) { return true; } // in case of subclassing + else { return false; } + } + + public boolean + isProcessingInstruction() { return false; } + + /** + * Return the string value of the attribute key or + * nil. + * + * Only applies where the underlying Node is an Element node, but + * implemented here in XmlNode because not all nodes with + * underlying Element nodes subclass XmlElement, such as the DTD + * declarations like XmlElementDecl. + */ + protected IRubyObject + getAttribute(ThreadContext context, String key) + { + return getAttribute(context.runtime, key); + } + + protected IRubyObject + getAttribute(Ruby runtime, String key) + { + String value = getAttribute(key); + return nonEmptyStringOrNil(runtime, value); + } + + protected String + getAttribute(String key) + { + if (node.getNodeType() != Node.ELEMENT_NODE) { return null; } + + String value = ((Element)node).getAttribute(key); + return value.length() == 0 ? null : value; + } + + /** + * This method should be called after a node has been adopted in a new + * document. This method will ensure that the node is renamed with the + * appriopriate NS uri. First the prefix of the node is extracted, then is + * used to lookup the namespace uri in the new document starting at the + * current node and traversing the ancestors. If the namespace uri wasn't + * empty (or null) all children and the node has attributes and/or children + * then the algorithm is recursively applied to the children. + */ + public void + relink_namespace(ThreadContext context) + { + if (!(node instanceof Element)) { + return; + } + + Element e = (Element) node; + + // disable error checking to prevent lines like the following + // from throwing a `NAMESPACE_ERR' exception: + // Nokogiri::XML::DocumentFragment.parse("a") + // since the `o' prefix isn't defined anywhere. + e.getOwnerDocument().setStrictErrorChecking(false); + + String prefix = e.getPrefix(); + String nsURI = e.lookupNamespaceURI(prefix); + this.node = NokogiriHelpers.renameNode(e, nsURI, e.getNodeName()); + + if (nsURI == null || nsURI.isEmpty()) { + RubyBoolean ns_inherit = + (RubyBoolean)document(context.runtime).getInstanceVariable("@namespace_inheritance"); + if (ns_inherit.isTrue()) { + set_namespace(context, ((XmlNode)parent(context)).namespace(context)); + } + return; + } + + String currentPrefix = e.getParentNode().lookupPrefix(nsURI); + String currentURI = e.getParentNode().lookupNamespaceURI(prefix); + boolean isDefault = e.getParentNode().isDefaultNamespace(nsURI); + + // add xmlns attribute if this is a new root node or if the node's + // namespace isn't a default namespace in the new document + if (e.getParentNode().getNodeType() == Node.DOCUMENT_NODE) { + // this is the root node, so we must set the namespaces attributes anyway + e.setAttribute(prefix == null ? "xmlns" : "xmlns:" + prefix, nsURI); + } else if (prefix == null) { + // this is a default namespace but isn't the default where this node is being added + if (!isDefault) { e.setAttribute("xmlns", nsURI); } + } else if (!prefix.equals(currentPrefix) || nsURI.equals(currentURI)) { + // this is a prefixed namespace + // but doesn't have the same prefix or the prefix is set to a different URI + e.setAttribute("xmlns:" + prefix, nsURI); + } + + if (e.hasAttributes()) { + NamedNodeMap attrs = e.getAttributes(); + + for (int i = 0; i < attrs.getLength(); i++) { + Attr attr = (Attr) attrs.item(i); + String attrPrefix = attr.getPrefix(); + if (attrPrefix == null) { + attrPrefix = NokogiriHelpers.getPrefix(attr.getNodeName()); + } + String nodeName = attr.getNodeName(); + String nsUri; + if ("xml".equals(attrPrefix)) { + nsUri = "http://www.w3.org/XML/1998/namespace"; + } else if ("xmlns".equals(attrPrefix) || nodeName.equals("xmlns")) { + nsUri = "http://www.w3.org/2000/xmlns/"; } else { - return node.getOwnerDocument(); - } - } - - @JRubyMethod - public IRubyObject internal_subset(ThreadContext context) { - Document document = getOwnerDocument(); - - if(document == null) { - return context.getRuntime().getNil(); + nsUri = attr.lookupNamespaceURI(attrPrefix); } - XmlDocument xdoc = - (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); - IRubyObject xdtd = xdoc.getInternalSubset(context); - return xdtd; - } - - @JRubyMethod - public IRubyObject create_internal_subset(ThreadContext context, - IRubyObject name, - IRubyObject external_id, - IRubyObject system_id) { - IRubyObject subset = internal_subset(context); - if (!subset.isNil()) { - throw context.getRuntime() - .newRuntimeError("Document already has internal subset"); + if (nsUri != null && nsUri.equals(e.getNamespaceURI())) { + nsUri = null; } - Document document = getOwnerDocument(); - if(document == null) { - return context.getRuntime().getNil(); + if (!(nsUri == null || "".equals(nsUri) || "http://www.w3.org/XML/1998/namespace".equals(nsUri))) { + // Create a new namespace object and add it to the document namespace cache. + // TODO: why do we need the namespace cache ? + XmlNamespace.createFromAttr(context.runtime, attr); } - - XmlDocument xdoc = - (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); - IRubyObject xdtd = xdoc.createInternalSubset(context, name, - external_id, system_id); - return xdtd; - } - - @JRubyMethod - public IRubyObject external_subset(ThreadContext context) { - Document document = getOwnerDocument(); - - if (document == null) { - return context.getRuntime().getNil(); - } - - XmlDocument xdoc = - (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); - IRubyObject xdtd = xdoc.getExternalSubset(context); - return xdtd; - } - - @JRubyMethod - public IRubyObject create_external_subset(ThreadContext context, - IRubyObject name, - IRubyObject external_id, - IRubyObject system_id) { - IRubyObject subset = external_subset(context); - if (!subset.isNil()) { - throw context.getRuntime() - .newRuntimeError("Document already has external subset"); - } - - Document document = getOwnerDocument(); - if(document == null) { - return context.getRuntime().getNil(); - } - XmlDocument xdoc = (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); - IRubyObject xdtd = xdoc.createExternalSubset(context, name, external_id, system_id); - return xdtd; + NokogiriHelpers.renameNode(attr, nsUri, nodeName); + } } - /** - * Test if this node has an attribute named rbkey. - * Overridden in XmlElement. - */ - @JRubyMethod(name = {"key?", "has_attribute?"}) - public IRubyObject key_p(ThreadContext context, IRubyObject rbkey) { - if (node instanceof Element) { - String key = rubyStringToString(rbkey); - Element element = (Element) node; - if (element.hasAttribute(key)) { - return context.getRuntime().getTrue(); - } else { - NamedNodeMap namedNodeMap = element.getAttributes(); - for (int i=0; i namespaces = ((XmlDocument)doc).getNamespaceCache().get(node); - for (XmlNamespace namespace : namespaces) { - namespace_definitions.append(namespace); + } + + // Users might extend XmlNode. This method works for such a case. + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + visitor.enter(node); + acceptChildren(context, getChildren(), visitor); + visitor.leave(node); + } + + void + acceptChildren(ThreadContext context, IRubyObject[] nodes, SaveContextVisitor visitor) + { + if (nodes.length > 0) { + for (int i = 0; i < nodes.length; i++) { + Object item = nodes[i]; + if (item instanceof XmlNode) { + ((XmlNode) item).accept(context, visitor); + } else if (item instanceof XmlNamespace) { + ((XmlNamespace) item).accept(context, visitor); } - - return namespace_definitions; + } } + } + + RubyString + doSetName(IRubyObject name) + { + if (name.isNil()) { return this.name = null; } + return this.name = name.convertToString(); + } + + public void + setDocument(ThreadContext context, XmlDocument doc) + { + this.doc = doc; + + setDocumentAndDecorate(context, this, doc); + } + + // shared logic with XmlNodeSet + static void + setDocumentAndDecorate(ThreadContext context, RubyObject self, XmlDocument doc) + { + self.setInstanceVariable("@document", doc == null ? context.nil : doc); + if (doc != null) { Helpers.invoke(context, doc, "decorate", self); } + } + + public void + setNode(Ruby runtime, Node node) + { + this.node = node; + + decorate(runtime); + + if (this instanceof XmlAttr) { + ((XmlAttr) this).setNamespaceIfNecessary(runtime); + } + } + + protected IRubyObject + getNodeName(ThreadContext context) + { + if (name != null) { return name; } + + String str = null; + if (node != null) { + str = NokogiriHelpers.getLocalPart(node.getNodeName()); + } + if (str == null) { str = ""; } + if (str.startsWith("#")) { str = str.substring(1); } // eliminates '#' + return name = context.runtime.newString(str); + } + + /** + * Add a namespace definition to this node. To the underlying + * node, add an attribute of the form + * xmlns:prefix="uri". + */ + @JRubyMethod(name = {"add_namespace_definition", "add_namespace"}) + public IRubyObject + add_namespace_definition(ThreadContext context, IRubyObject prefix, IRubyObject href) + { + String hrefStr, prefixStr = prefix.isNil() ? null : prefix.convertToString().decodeString(); + + // try to search the namespace first + if (href.isNil()) { + hrefStr = findNamespaceHref(context, prefixStr); + if (hrefStr == null) { return context.nil; } + href = context.runtime.newString(hrefStr); + } else { + hrefStr = rubyStringToString(href.convertToString()); + } + + Node namespaceOwner; + if (node.getNodeType() == Node.ELEMENT_NODE) { + namespaceOwner = node; + // adds namespace as node's attribute + String qName = prefix.isNil() ? "xmlns" : "xmlns:" + prefixStr; + ((Element)node).setAttributeNS("http://www.w3.org/2000/xmlns/", qName, hrefStr); + } else if (node.getNodeType() == Node.ATTRIBUTE_NODE) { + namespaceOwner = ((Attr) node).getOwnerElement(); + } else { + namespaceOwner = node.getParentNode(); + } + + NokogiriNamespaceCache nsCache = NokogiriHelpers.getNamespaceCache(node); + XmlNamespace ns = nsCache.get(prefixStr, hrefStr); + if (ns == null) { + ns = XmlNamespace.createImpl(namespaceOwner, prefix, prefixStr, href, hrefStr); + } + if (node != namespaceOwner) { + node = NokogiriHelpers.renameNode(node, ns.getHref(), ns.getPrefix() + ':' + node.getLocalName()); + } + updateNodeNamespaceIfNecessary(ns); + + return ns; + } + + private void + updateNodeNamespaceIfNecessary(XmlNamespace ns) + { + String oldPrefix = this.node.getPrefix(); - /** - * Return an array of XmlNamespace nodes defined on this node and - * on any ancestor node. + /* + * Update if both prefixes are null or equal */ - @JRubyMethod - public IRubyObject namespace_scopes(ThreadContext context) { - RubyArray scoped_namespaces = context.getRuntime().newArray(); - if (doc == null) return scoped_namespaces; - if (doc instanceof HtmlDocument) return scoped_namespaces; - - Node previousNode; - if (node.getNodeType() == Node.ELEMENT_NODE) { - previousNode = node; - } else if (node.getNodeType() == Node.ATTRIBUTE_NODE) { - previousNode = ((Attr)node).getOwnerElement(); - } else { - previousNode = findPreviousElement(node); - } - if (previousNode == null) return scoped_namespaces; - - List prefixes_in_scope = new ArrayList(); - NokogiriNamespaceCache nsCache = NokogiriHelpers.getNamespaceCacheFormNode(previousNode); - for (Node previous=previousNode; previous != null; ) { - List namespaces = nsCache.get(previous); - for (XmlNamespace namespace : namespaces) { - if (prefixes_in_scope.contains(namespace.getPrefix())) continue; - scoped_namespaces.append(namespace); - prefixes_in_scope.add(namespace.getPrefix()); - } - previous = findPreviousElement(previous); - } - return scoped_namespaces; - } - - private Node findPreviousElement(Node n) { - Node previous = n.getPreviousSibling() == null ? n.getParentNode() : n.getPreviousSibling(); - if (previous == null || previous.getNodeType() == Node.DOCUMENT_NODE) return null; - if (previous.getNodeType() == Node.ELEMENT_NODE) { - return previous; - } else { - return findPreviousElement(previous); - } - } - - @JRubyMethod(name="namespaced_key?") - public IRubyObject namespaced_key_p(ThreadContext context, IRubyObject elementLName, IRubyObject namespaceUri) { - return this.attribute_with_ns(context, elementLName, namespaceUri).isNil() ? - context.getRuntime().getFalse() : context.getRuntime().getTrue(); + boolean update = + (oldPrefix == null && ns.getPrefix() == null) || + (oldPrefix != null && oldPrefix.equals(ns.getPrefix())); + + if (update) { + this.node = NokogiriHelpers.renameNode(this.node, ns.getHref(), this.node.getNodeName()); + } + } + + @JRubyMethod(name = {"attribute", "attr"}) + public IRubyObject + attribute(ThreadContext context, IRubyObject name) + { + NamedNodeMap attrs = this.node.getAttributes(); + Node attr = attrs.getNamedItem(rubyStringToString(name)); + if (attr == null) { return context.nil; } + return getCachedNodeOrCreate(context.runtime, attr); + } + + @JRubyMethod + public IRubyObject + attribute_nodes(ThreadContext context) + { + final Ruby runtime = context.runtime; + + NamedNodeMap nodeMap = this.node.getAttributes(); + + if (nodeMap == null) { return runtime.newEmptyArray(); } + RubyArray attr = runtime.newArray(nodeMap.getLength()); + + final XmlDocument doc = document(context.runtime); + for (int i = 0; i < nodeMap.getLength(); i++) { + if ((doc instanceof Html4Document) || !NokogiriHelpers.isNamespace(nodeMap.item(i))) { + attr.append(getCachedNodeOrCreate(runtime, nodeMap.item(i))); + } } - protected void setContent(IRubyObject content) { - String javaContent = rubyStringToString(content); - node.setTextContent(javaContent); - if (javaContent == null || javaContent.length() == 0) return; - if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) return; - if (node.getFirstChild() != null) { - node.getFirstChild().setUserData(NokogiriHelpers.ENCODED_STRING, true, null); - } + return attr; + } + + @JRubyMethod + public IRubyObject + attribute_with_ns(ThreadContext context, IRubyObject name, IRubyObject namespace) + { + String namej = rubyStringToString(name); + String nsj = (namespace.isNil()) ? null : rubyStringToString(namespace); + + Node el = this.node.getAttributes().getNamedItemNS(nsj, namej); + + if (el == null) { return context.nil; } + + return NokogiriHelpers.getCachedNodeOrCreate(context.runtime, el); + } + + @JRubyMethod(name = "blank?") + public IRubyObject + blank_p(ThreadContext context) + { + // according to libxml doc, + // a node is blank if if it is a Text or CDATA node consisting of whitespace only + if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) { + String data = node.getTextContent(); + return context.runtime.newBoolean(data == null || isBlank(data)); + } + return context.runtime.getFalse(); + } + + @JRubyMethod + public IRubyObject + child(ThreadContext context) + { + return getCachedNodeOrCreate(context.getRuntime(), node.getFirstChild()); + } + + @JRubyMethod + public IRubyObject + children(ThreadContext context) + { + final IRubyObject[] nodes = getChildren(); + if (nodes.length == 0) { + return XmlNodeSet.newEmptyNodeSet(context, this); + } + return XmlNodeSet.newNodeSet(context.runtime, nodes); + } + + IRubyObject[] + getChildren() + { + NodeList nodeList = node.getChildNodes(); + if (nodeList.getLength() > 0) { + return nodeListToRubyArray(getRuntime(), nodeList); + } + return IRubyObject.NULL_ARRAY; + } + + @JRubyMethod + public IRubyObject + first_element_child(ThreadContext context) + { + List elementNodes = getElements(node, true); + if (elementNodes.size() == 0) { return context.nil; } + return getCachedNodeOrCreate(context.runtime, elementNodes.get(0)); + } + + @JRubyMethod + public IRubyObject + last_element_child(ThreadContext context) + { + List elementNodes = getElements(node, false); + if (elementNodes.size() == 0) { return context.nil; } + return getCachedNodeOrCreate(context.runtime, elementNodes.get(elementNodes.size() - 1)); + } + + @JRubyMethod(name = {"element_children", "elements"}) + public IRubyObject + element_children(ThreadContext context) + { + List elementNodes = getElements(node, false); + IRubyObject[] array = NokogiriHelpers.nodeListToArray(context.runtime, elementNodes); + return XmlNodeSet.newNodeSet(context.runtime, array, this); + } + + private static List + getElements(Node node, final boolean firstOnly) + { + NodeList children = node.getChildNodes(); + if (children.getLength() == 0) { + return Collections.emptyList(); + } + ArrayList elements = firstOnly ? null : new ArrayList(children.getLength()); + for (int i = 0; i < children.getLength(); i++) { + Node child = children.item(i); + if (child.getNodeType() == Node.ELEMENT_NODE) { + if (firstOnly) { + return Collections.singletonList(child); + } + elements.add(child); + } } + return elements; + } + + /** + * call-seq: + * compare(other) + * + * Compare this Node to +other+ with respect to their Document + */ + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + compare(ThreadContext context, IRubyObject other) + { + if (!(other instanceof XmlNode)) { + return context.runtime.newFixnum(-2); + } + + Node otherNode = asXmlNode(context, other).node; + + // Do not touch this if, if it's not for a good reason. + if (node.getNodeType() == Node.DOCUMENT_NODE || + otherNode.getNodeType() == Node.DOCUMENT_NODE) { + return context.runtime.newFixnum(1); + } + + try { + int res = node.compareDocumentPosition(otherNode); + if ((res & FIRST_PRECEDES_SECOND) == FIRST_PRECEDES_SECOND) { + return context.runtime.newFixnum(-1); + } else if ((res & SECOND_PRECEDES_FIRST) == SECOND_PRECEDES_FIRST) { + return context.runtime.newFixnum(1); + } else if (res == IDENTICAL_ELEMENTS) { + return context.runtime.newFixnum(0); + } - private void setContent(String content) { - node.setTextContent(content); - this.content = null; // clear cache + return context.runtime.newFixnum(-2); + } catch (Exception ex) { + return context.runtime.newFixnum(-2); + } + } + + /** + * TODO: this is a stub implementation. It's not clear what + * 'in_context' is supposed to do. Also should take + * options into account. + */ + @JRubyMethod(required = 2, visibility = Visibility.PRIVATE) + public IRubyObject + in_context(ThreadContext context, IRubyObject str, IRubyObject options) + { + RubyClass klass; + XmlDomParserContext ctx; + InputStream istream; + + final Ruby runtime = context.runtime; + + XmlDocument document = document(runtime); + if (document == null) { return context.nil; } + + if (document instanceof Html4Document) { + klass = getNokogiriClass(runtime, "Nokogiri::HTML4::Document"); + ctx = new HtmlDomParserContext(runtime, options); + ((HtmlDomParserContext) ctx).enableDocumentFragment(); + ctx.setStringInputSource(context, str, context.nil); + } else { + klass = getNokogiriClass(runtime, "Nokogiri::XML::Document"); + ctx = new XmlDomParserContext(runtime, options); + ctx.setStringInputSource(context, str, context.nil); + } + + // TODO: for some reason, document.getEncoding() can be null or nil (don't know why) + // run `test_parse_with_unparented_html_text_context_node' few times to see this happen + if (document instanceof Html4Document && !(document.getEncoding() == null || document.getEncoding().isNil())) { + HtmlDomParserContext htmlCtx = (HtmlDomParserContext) ctx; + htmlCtx.setEncoding(document.getEncoding().asJavaString()); + } + + XmlDocument doc = ctx.parse(context, klass, context.nil); + + RubyArray documentErrors = getErrors(document); + RubyArray docErrors = getErrors(doc); + if (checkNewErrors(documentErrors, docErrors)) { + for (int i = 0; i < docErrors.getLength(); i++) { + documentErrors.append(docErrors.entry(i)); + } + document.setInstanceVariable("@errors", documentErrors); + return XmlNodeSet.newNodeSet(context.runtime, IRubyObject.NULL_ARRAY, this); + } + + // The first child might be document type node (dtd declaration). + // XmlNodeSet to be return should not have dtd decl in its list. + Node first; + if (doc.node.getFirstChild().getNodeType() == Node.DOCUMENT_TYPE_NODE) { + first = doc.node.getFirstChild().getNextSibling(); + } else { + first = doc.node.getFirstChild(); + } + + IRubyObject[] nodes = new IRubyObject[] { NokogiriHelpers.getCachedNodeOrCreate(runtime, first) }; + return XmlNodeSet.newNodeSet(context.runtime, nodes, this); + } + + private static RubyArray + getErrors(XmlDocument document) + { + IRubyObject obj = document.getInstanceVariable("@errors"); + if (obj instanceof RubyArray) { return (RubyArray) obj; } + return RubyArray.newEmptyArray(document.getRuntime()); + } + + private static boolean + checkNewErrors(RubyArray baseErrors, RubyArray newErrors) + { + int length = ((RubyArray) newErrors.op_diff(baseErrors)).size(); + return length > 0; + } + + @JRubyMethod(name = {"content", "text", "inner_text"}) + public IRubyObject + content(ThreadContext context) + { + return stringOrNil(context.runtime, getContentImpl()); + } + + public CharSequence + getContentImpl() + { + if (!node.hasChildNodes() && node.getNodeValue() == null && + (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE)) { + return null; } - - @JRubyMethod(name = "native_content=") - public IRubyObject native_content_set(ThreadContext context, IRubyObject content) { - setContent(content); - return content; + CharSequence textContent; + if (this instanceof XmlDocument) { + Node node = ((Document) this.node).getDocumentElement(); + if (node == null) { + textContent = ""; + } else { + Node documentElement = ((Document) this.node).getDocumentElement(); + textContent = getTextContentRecursively(new StringBuilder(), documentElement); + } + } else { + textContent = getTextContentRecursively(new StringBuilder(), node); + } + // textContent = NokogiriHelpers.convertEncodingByNKFIfNecessary(context, (XmlDocument) document(context), textContent); + return textContent; + } + + private static StringBuilder + getTextContentRecursively(StringBuilder buffer, Node currentNode) + { + CharSequence textContent = currentNode.getNodeValue(); + if (textContent != null && NokogiriHelpers.shouldDecode(currentNode)) { + textContent = NokogiriHelpers.decodeJavaString(textContent); + } + if (textContent != null) { buffer.append(textContent); } + NodeList children = currentNode.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + Node child = children.item(i); + if (hasTextContent(child)) { getTextContentRecursively(buffer, child); } + } + return buffer; + } + + private static boolean + hasTextContent(Node child) + { + return child.getNodeType() != Node.COMMENT_NODE && child.getNodeType() != Node.PROCESSING_INSTRUCTION_NODE; + } + + @JRubyMethod + public final IRubyObject + document(ThreadContext context) + { + return document(context.runtime); + } + + XmlDocument + document(final Ruby runtime) + { + return document(runtime, true); + } + + XmlDocument + document(final Ruby runtime, boolean create) + { + if (doc == null) { + doc = (XmlDocument) node.getOwnerDocument().getUserData(NokogiriHelpers.CACHED_NODE); + if (doc == null && create) { + doc = (XmlDocument) getCachedNodeOrCreate(runtime, node.getOwnerDocument()); + node.getOwnerDocument().setUserData(NokogiriHelpers.CACHED_NODE, doc, null); + } } - - @JRubyMethod - public IRubyObject lang(ThreadContext context) { - IRubyObject currentObj = this ; - while (!currentObj.isNil()) { - XmlNode currentNode = asXmlNode(context, currentObj); - IRubyObject lang = currentNode.getAttribute(context.getRuntime(), "xml:lang"); - if (!lang.isNil()) { return lang ; } - - currentObj = currentNode.parent(context); + return doc; + } + + public IRubyObject + dup() + { + return dup_implementation(getMetaClass().getClassRuntime(), true); + } + + @JRubyMethod + public IRubyObject + dup(ThreadContext context) + { + return dup_implementation(context, true); + } + + @JRubyMethod + public IRubyObject + dup(ThreadContext context, IRubyObject depth) + { + boolean deep = depth instanceof RubyInteger && RubyFixnum.fix2int(depth) != 0; + return dup_implementation(context, deep); + } + + protected final IRubyObject + dup_implementation(ThreadContext context, boolean deep) + { + return dup_implementation(context.runtime, deep); + } + + protected IRubyObject + dup_implementation(Ruby runtime, boolean deep) + { + XmlNode clone; + try { + clone = (XmlNode) clone(); + } catch (CloneNotSupportedException e) { + throw runtime.newRuntimeError(e.toString()); + } + Node newNode = node.cloneNode(deep); + clone.node = newNode; + return clone; + } + + public static RubyString + encode_special_chars(ThreadContext context, IRubyObject string) + { + CharSequence str = NokogiriHelpers.encodeJavaString(rubyStringToString(string)); + return RubyString.newString(context.runtime, str); + } + + /** + * Instance method version of the above static method. + */ + @JRubyMethod(name = "encode_special_chars") + public IRubyObject + i_encode_special_chars(ThreadContext context, IRubyObject string) + { + return encode_special_chars(context, string); + } + + /** + * Get the attribute at the given key, key. + * Assumes that this node has attributes (i.e. that key? returned + * true). + */ + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + get(ThreadContext context, IRubyObject rbkey) + { + if (node instanceof Element) { + if (rbkey == null || rbkey.isNil()) { return context.nil; } + String key = rubyStringToString(rbkey); + Element element = (Element) node; + if (!element.hasAttribute(key)) { return context.nil; } + String value = element.getAttribute(key); + return stringOrNil(context.runtime, value); + } + return context.nil; + } + + /** + * Returns the owner document, checking if this node is the + * document, or returns null if there is no owner. + */ + protected Document + getOwnerDocument() + { + if (node.getNodeType() == Node.DOCUMENT_NODE) { + return (Document) node; + } else { + return node.getOwnerDocument(); + } + } + + @JRubyMethod + public IRubyObject + internal_subset(ThreadContext context) + { + Document document = getOwnerDocument(); + + if (document == null) { + return context.getRuntime().getNil(); + } + + XmlDocument xdoc = + (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); + IRubyObject xdtd = xdoc.getInternalSubset(context); + return xdtd; + } + + @JRubyMethod + public IRubyObject + create_internal_subset(ThreadContext context, + IRubyObject name, + IRubyObject external_id, + IRubyObject system_id) + { + IRubyObject subset = internal_subset(context); + if (!subset.isNil()) { + throw context.runtime.newRuntimeError("Document already has internal subset"); + } + + Document document = getOwnerDocument(); + if (document == null) { + return context.getRuntime().getNil(); + } + + XmlDocument xdoc = + (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); + IRubyObject xdtd = xdoc.createInternalSubset(context, name, + external_id, system_id); + return xdtd; + } + + @JRubyMethod + public IRubyObject + external_subset(ThreadContext context) + { + Document document = getOwnerDocument(); + + if (document == null) { + return context.getRuntime().getNil(); + } + + XmlDocument xdoc = + (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); + IRubyObject xdtd = xdoc.getExternalSubset(context); + return xdtd; + } + + @JRubyMethod + public IRubyObject + create_external_subset(ThreadContext context, + IRubyObject name, + IRubyObject external_id, + IRubyObject system_id) + { + IRubyObject subset = external_subset(context); + if (!subset.isNil()) { + throw context.runtime.newRuntimeError("Document already has external subset"); + } + + Document document = getOwnerDocument(); + if (document == null) { + return context.getRuntime().getNil(); + } + XmlDocument xdoc = (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); + IRubyObject xdtd = xdoc.createExternalSubset(context, name, external_id, system_id); + return xdtd; + } + + /** + * Test if this node has an attribute named rbkey. + * Overridden in XmlElement. + */ + @JRubyMethod(name = {"key?", "has_attribute?"}) + public IRubyObject + key_p(ThreadContext context, IRubyObject rbkey) + { + if (node instanceof Element) { + String key = rubyStringToString(rbkey); + Element element = (Element) node; + if (element.hasAttribute(key)) { + return context.runtime.getTrue(); + } else { + NamedNodeMap namedNodeMap = element.getAttributes(); + for (int i = 0; i < namedNodeMap.getLength(); i++) { + Node n = namedNodeMap.item(i); + if (key.equals(n.getLocalName())) { + return context.runtime.getTrue(); + } } - return context.nil ; + } + return context.runtime.getFalse(); + } + return context.nil; + } + + @JRubyMethod + public IRubyObject + namespace(ThreadContext context) + { + final XmlDocument doc = document(context.runtime); + if (doc instanceof Html4Document) { return context.nil; } + + String namespaceURI = node.getNamespaceURI(); + if (namespaceURI == null || namespaceURI.isEmpty()) { + return context.nil; + } + + String prefix = node.getPrefix(); + NokogiriNamespaceCache nsCache = NokogiriHelpers.getNamespaceCache(node); + XmlNamespace namespace = nsCache.get(prefix, namespaceURI); + + if (namespace == null || namespace.isEmpty()) { + // if it's not in the cache, create an unowned, uncached namespace and + // return that. XmlReader can't insert namespaces into the cache, so + // this is necessary for XmlReader to work correctly. + namespace = new XmlNamespace(context.runtime, null, prefix, namespaceURI, doc); + } + + return namespace; + } + + /** + * Return an array of XmlNamespace nodes based on the attributes + * of this node. + */ + @JRubyMethod + public RubyArray + namespace_definitions(ThreadContext context) + { + // don't use namespace_definitions cache anymore since + // namespaces might be deleted. Reflecting the result of + // namespace removals is complicated, so the cache might not be + // updated. + final XmlDocument doc = document(context.runtime); + if (doc == null) { return context.runtime.newEmptyArray(); } + if (doc instanceof Html4Document) { return context.runtime.newEmptyArray(); } + + List namespaces = doc.getNamespaceCache().get(node); + return RubyArray.newArray(context.runtime, namespaces); + + // // TODO: I think this implementation would be better but there are edge cases + // // See https://github.com/sparklemotion/nokogiri/issues/2543 + // RubyArray nsdefs = RubyArray.newArray(context.getRuntime()); + // NamedNodeMap attrs = node.getAttributes(); + // for (int j = 0 ; j < attrs.getLength() ; j++) { + // Attr attr = (Attr)attrs.item(j); + // if ("http://www.w3.org/2000/xmlns/" == attr.getNamespaceURI()) { + // nsdefs.append(XmlNamespace.createFromAttr(context.getRuntime(), attr)); + // } + // } + // return nsdefs; + } + + /** + * Return an array of XmlNamespace nodes defined on this node and + * on any ancestor node. + */ + @JRubyMethod + public RubyArray + namespace_scopes(ThreadContext context) + { + final XmlDocument doc = document(context.runtime); + if (doc == null) { return context.runtime.newEmptyArray(); } + if (doc instanceof Html4Document) { return context.runtime.newEmptyArray(); } + + Node previousNode; + if (node.getNodeType() == Node.ELEMENT_NODE) { + previousNode = node; + } else if (node.getNodeType() == Node.ATTRIBUTE_NODE) { + previousNode = ((Attr)node).getOwnerElement(); + } else { + previousNode = findPreviousElement(node); + } + if (previousNode == null) { return context.runtime.newEmptyArray(); } + + final RubyArray scoped_namespaces = context.runtime.newArray(); + final HashSet prefixes_in_scope = new HashSet(8); + NokogiriNamespaceCache nsCache = NokogiriHelpers.getNamespaceCache(previousNode); + for (Node previous = previousNode; previous != null;) { + List namespaces = nsCache.get(previous); + for (XmlNamespace namespace : namespaces) { + if (prefixes_in_scope.contains(namespace.getPrefix())) { continue; } + scoped_namespaces.append(namespace); + prefixes_in_scope.add(namespace.getPrefix()); + } + previous = findPreviousElement(previous); + } + return scoped_namespaces; + } + + private Node + findPreviousElement(Node n) + { + Node previous = n.getPreviousSibling() == null ? n.getParentNode() : n.getPreviousSibling(); + if (previous == null || previous.getNodeType() == Node.DOCUMENT_NODE) { return null; } + if (previous.getNodeType() == Node.ELEMENT_NODE) { + return previous; + } else { + return findPreviousElement(previous); + } + } + + @JRubyMethod(name = "namespaced_key?") + public IRubyObject + namespaced_key_p(ThreadContext context, IRubyObject elementLName, IRubyObject namespaceUri) + { + return this.attribute_with_ns(context, elementLName, namespaceUri).isNil() ? + context.runtime.getFalse() : context.runtime.getTrue(); + } + + protected void + setContent(IRubyObject content) + { + String javaContent = rubyStringToString(content); + node.setTextContent(javaContent); + if (javaContent == null || javaContent.length() == 0) { return; } + if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) { return; } + if (node.getFirstChild() != null) { + node.getFirstChild().setUserData(NokogiriHelpers.ENCODED_STRING, true, null); + } + } + + private void + setContent(String content) + { + node.setTextContent(content); + this.content = null; // clear cache + } + + @JRubyMethod(name = "native_content=") + public IRubyObject + native_content_set(ThreadContext context, IRubyObject content) + { + setContent(content); + return content; + } + + @JRubyMethod + public IRubyObject + lang(ThreadContext context) + { + IRubyObject currentObj = this ; + while (!currentObj.isNil()) { + XmlNode currentNode = asXmlNode(context, currentObj); + IRubyObject lang = currentNode.getAttribute(context.runtime, "xml:lang"); + if (!lang.isNil()) { return lang ; } + + currentObj = currentNode.parent(context); + } + return context.nil; + } + + @JRubyMethod(name = "lang=") + public IRubyObject + set_lang(ThreadContext context, IRubyObject lang) + { + setAttribute(context, "xml:lang", rubyStringToString(lang)); + return context.nil ; + } + + /** + * @param args {IRubyObject io, + * IRubyObject encoding, + * IRubyObject indentString, + * IRubyObject options} + */ + @JRubyMethod(required = 4, visibility = Visibility.PRIVATE) + public IRubyObject + native_write_to(ThreadContext context, IRubyObject[] args) + { + + IRubyObject io = args[0]; + IRubyObject encoding = args[1]; + IRubyObject indentString = args[2]; + IRubyObject options = args[3]; + + String encString = rubyStringToString(encoding); + + SaveContextVisitor visitor = + new SaveContextVisitor(RubyFixnum.fix2int(options), rubyStringToString(indentString), encString, isHtmlDoc(context), + isFragment(), 0); + accept(context, visitor); + + final IRubyObject rubyString; + if (NokogiriHelpers.isUTF8(encString)) { + rubyString = convertString(context.runtime, visitor.getInternalBuffer()); + } else { + ByteBuffer bytes = convertEncoding(Charset.forName(encString), visitor.getInternalBuffer()); + ByteList str = new ByteList(bytes.array(), bytes.arrayOffset(), bytes.remaining()); + rubyString = RubyString.newString(context.runtime, str); + } + Helpers.invoke(context, io, "write", rubyString); + + return io; + } + + private boolean + isHtmlDoc(ThreadContext context) + { + return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.runtime, "Nokogiri::HTML4::Document")); + } + + private boolean + isFragment() + { + if (node instanceof DocumentFragment) { return true; } + if (node.getParentNode() != null && node.getParentNode() instanceof DocumentFragment) { return true; } + return false; + } + + @JRubyMethod(name = {"next_sibling", "next"}) + public IRubyObject + next_sibling(ThreadContext context) + { + return getCachedNodeOrCreate(context.getRuntime(), node.getNextSibling()); + } + + @JRubyMethod(name = {"previous_sibling", "previous"}) + public IRubyObject + previous_sibling(ThreadContext context) + { + return getCachedNodeOrCreate(context.getRuntime(), node.getPreviousSibling()); + } + + @JRubyMethod(name = {"node_name", "name"}) + public IRubyObject + node_name(ThreadContext context) + { + return getNodeName(context); + } + + @JRubyMethod(name = {"node_name=", "name="}) + public IRubyObject + node_name_set(ThreadContext context, IRubyObject nodeName) + { + nodeName = doSetName(nodeName); + String newName = nodeName == null ? null : rubyStringToString((RubyString) nodeName); + this.node = NokogiriHelpers.renameNode(node, null, newName); + return this; + } + + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + set(ThreadContext context, IRubyObject rbkey, IRubyObject rbval) + { + if (node instanceof Element) { + setAttribute(context, rubyStringToString(rbkey), rubyStringToString(rbval)); + return this; + } else { + return rbval; + } + } + + private void + setAttribute(ThreadContext context, String key, String val) + { + Element element = (Element) node; + + String uri = null; + int colonIndex = key.indexOf(":"); + if (colonIndex > 0) { + String prefix = key.substring(0, colonIndex); + if (prefix.equals("xml")) { + uri = "http://www.w3.org/XML/1998/namespace"; + } else if (prefix.equals("xmlns")) { + uri = "http://www.w3.org/2000/xmlns/"; + } else { + uri = node.lookupNamespaceURI(prefix); + } } - @JRubyMethod(name = "lang=") - public IRubyObject set_lang(ThreadContext context, IRubyObject lang) { - setAttribute(context, "xml:lang", rubyStringToString(lang)); - return context.nil ; + if (uri != null) { + element.setAttributeNS(uri, key, val); + } else { + element.setAttribute(key, val); + } + clearXpathContext(node); + } + + private String + findNamespaceHref(ThreadContext context, String prefix) + { + XmlNode currentNode = this; + final XmlDocument doc = document(context.runtime); + while (currentNode != doc) { + RubyArray namespaces = currentNode.namespace_scopes(context); + for (int i = 0; i < namespaces.size(); i++) { + XmlNamespace namespace = (XmlNamespace) namespaces.eltInternal(i); + if (namespace.hasPrefix(prefix)) { return namespace.getHref(); } + } + IRubyObject parent = currentNode.parent(context); + if (parent == context.nil) { break; } + currentNode = (XmlNode) parent; } + return null; + } - /** - * @param args {IRubyObject io, - * IRubyObject encoding, - * IRubyObject indentString, - * IRubyObject options} + @JRubyMethod + public IRubyObject + parent(ThreadContext context) + { + /* + * Check if this node is the root node of the document. + * If so, parent is the document. */ - @JRubyMethod(required=4, visibility=Visibility.PRIVATE) - public IRubyObject native_write_to(ThreadContext context, IRubyObject[] args) { - - IRubyObject io = args[0]; - IRubyObject encoding = args[1]; - IRubyObject indentString = args[2]; - IRubyObject options = args[3]; - - String encString = encoding.isNil() ? null : rubyStringToString(encoding); - - SaveContextVisitor visitor = - new SaveContextVisitor(RubyFixnum.fix2int(options), rubyStringToString(indentString), encString, isHtmlDoc(context), isFragment(), 0); - accept(context, visitor); - - final IRubyObject rubyString; - if (NokogiriHelpers.isUTF8(encString)) { - rubyString = convertString(context.getRuntime(), visitor.getInternalBuffer()); - } else { - ByteBuffer bytes = convertEncoding(Charset.forName(encString), visitor.getInternalBuffer()); - ByteList str = new ByteList(bytes.array(), bytes.arrayOffset(), bytes.remaining()); - rubyString = RubyString.newString(context.getRuntime(), str); - } - Helpers.invoke(context, io, "write", rubyString); - - return io; - } - - private boolean isHtmlDoc(ThreadContext context) { - return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document")); - } - - private boolean isFragment() { - if (node instanceof DocumentFragment) return true; - if (node.getParentNode() != null && node.getParentNode() instanceof DocumentFragment) return true; - return false; - } - - @JRubyMethod(name = {"next_sibling", "next"}) - public IRubyObject next_sibling(ThreadContext context) { - return getCachedNodeOrCreate(context.getRuntime(), node.getNextSibling()); - } - - @JRubyMethod(name = {"previous_sibling", "previous"}) - public IRubyObject previous_sibling(ThreadContext context) { - return getCachedNodeOrCreate(context.getRuntime(), node.getPreviousSibling()); - } - - @JRubyMethod(meta = true, rest = true) - public static IRubyObject new_from_str(ThreadContext context, - IRubyObject cls, - IRubyObject[] args) { - XmlDocument doc = (XmlDocument) XmlDocument.read_memory(context, args); - return doc.root(context); - } - - @JRubyMethod(name = {"node_name", "name"}) - public IRubyObject node_name(ThreadContext context) { - return getNodeName(context); - } - - @JRubyMethod(name = {"node_name=", "name="}) - public IRubyObject node_name_set(ThreadContext context, IRubyObject nodeName) { - String newName = rubyStringToString(nodeName); - this.node = NokogiriHelpers.renameNode(node, null, newName); - setName(nodeName); - return this; - } - - @JRubyMethod(visibility = Visibility.PRIVATE) - public IRubyObject set(ThreadContext context, IRubyObject rbkey, IRubyObject rbval) { - if (node instanceof Element) { - setAttribute(context, rubyStringToString(rbkey), rubyStringToString(rbval)); - return this; + if (node.getOwnerDocument() != null && + node.getOwnerDocument().getDocumentElement() == node) { + return document(context); + } + return getCachedNodeOrCreate(context.runtime, node.getParentNode()); + } + + @JRubyMethod + public IRubyObject + path(ThreadContext context) + { + return RubyString.newString(context.runtime, NokogiriHelpers.getNodeCompletePath(this.node)); + } + + @JRubyMethod + public IRubyObject + pointer_id(ThreadContext context) + { + return RubyFixnum.newFixnum(context.runtime, this.node.hashCode()); + } + + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + set_namespace(ThreadContext context, IRubyObject namespace) + { + if (namespace.isNil()) { + XmlDocument doc = document(context.runtime); + if (doc != null) { + Node node = this.node; + doc.getNamespaceCache().remove(node); + this.node = NokogiriHelpers.renameNode(node, null, NokogiriHelpers.getLocalPart(node.getNodeName())); + } + } else { + XmlNamespace ns = (XmlNamespace) namespace; + + // Assigning node = ...renameNode() or not seems to make no + // difference. Why not? -pmahoney + + // It actually makes a great deal of difference. renameNode() + // will operate in place if it can, but sometimes it can't. + // The node you passed in *might* come back as you expect, but + // it might not. It's much safer to throw away the original + // and keep the return value. -mbklein + String new_name = NokogiriHelpers.newQName(ns.getPrefix(), node); + this.node = NokogiriHelpers.renameNode(node, ns.getHref(), new_name); + } + + clearXpathContext(getNode()); + + return this; + } + + @JRubyMethod(name = {"unlink", "remove"}) + public IRubyObject + unlink(ThreadContext context) + { + final Node parent = node.getParentNode(); + if (parent != null) { + parent.removeChild(node); + clearXpathContext(parent); + } + return this; + } + + /** + * The C-library simply returns libxml2 magic numbers. Here we + * convert Java Xml nodes to the appropriate constant defined in + * xml/node.rb. + */ + @JRubyMethod(name = {"node_type", "type"}) + public IRubyObject + node_type(ThreadContext context) + { + String type; + switch (node.getNodeType()) { + case Node.ELEMENT_NODE: + if (this instanceof XmlElementDecl) { + type = "ELEMENT_DECL"; + } else if (this instanceof XmlAttributeDecl) { + type = "ATTRIBUTE_DECL"; + } else if (this instanceof XmlEntityDecl) { + type = "ENTITY_DECL"; } else { - return rbval; - } - } - - private void setAttribute(ThreadContext context, String key, String val) { - Element element = (Element) node; - - String uri = null; - int colonIndex = key.indexOf(":"); - if (colonIndex > 0) { - String prefix = key.substring(0, colonIndex); - if (prefix.equals("xml")) { - uri = "http://www.w3.org/XML/1998/namespace"; - } else if (prefix.equals("xmlns")) { - uri = "http://www.w3.org/2000/xmlns/"; - } else { - uri = node.lookupNamespaceURI(prefix); - } - } - - if (uri != null) { - element.setAttributeNS(uri, key, val); + type = "ELEMENT_NODE"; + } + break; + case Node.ATTRIBUTE_NODE: + type = "ATTRIBUTE_NODE"; + break; + case Node.TEXT_NODE: + type = "TEXT_NODE"; + break; + case Node.CDATA_SECTION_NODE: + type = "CDATA_SECTION_NODE"; + break; + case Node.ENTITY_REFERENCE_NODE: + type = "ENTITY_REF_NODE"; + break; + case Node.ENTITY_NODE: + type = "ENTITY_NODE"; + break; + case Node.PROCESSING_INSTRUCTION_NODE: + type = "PI_NODE"; + break; + case Node.COMMENT_NODE: + type = "COMMENT_NODE"; + break; + case Node.DOCUMENT_NODE: + if (this instanceof Html4Document) { + type = "HTML_DOCUMENT_NODE"; } else { - element.setAttribute(key, val); - } - clearXpathContext(node); - } - - private String findNamespaceHref(ThreadContext context, String prefix) { - XmlNode currentNode = this; - while(currentNode != document(context)) { - RubyArray namespaces = (RubyArray) currentNode.namespace_scopes(context); - Iterator iterator = namespaces.iterator(); - while(iterator.hasNext()) { - XmlNamespace namespace = (XmlNamespace) iterator.next(); - if (namespace.getPrefix().equals(prefix)) { - return namespace.getHref(); + type = "DOCUMENT_NODE"; + } + break; + case Node.DOCUMENT_TYPE_NODE: + type = "DOCUMENT_TYPE_NODE"; + break; + case Node.DOCUMENT_FRAGMENT_NODE: + type = "DOCUMENT_FRAG_NODE"; + break; + case Node.NOTATION_NODE: + type = "NOTATION_NODE"; + break; + default: + return context.runtime.newFixnum(0); + } + + return getNokogiriClass(context.runtime, "Nokogiri::XML::Node").getConstant(type); + } + + /* + * NOTE that the behavior of this function is very difference from the CRuby implementation, see + * the docstring in ext/nokogiri/xml_node.c for details. + */ + @JRubyMethod + public IRubyObject + line(ThreadContext context) + { + Node root = getOwnerDocument(); + int[] counter = new int[1]; + count(root, counter); + // offset of 2: + // - one because humans start counting at 1 not zero + // - one to account for the XML declaration present in the output + return RubyFixnum.newFixnum(context.runtime, counter[0] + 2); + } + + private boolean + count(Node node, int[] counter) + { + if (node == this.node) { + return true; + } + + NodeList list = node.getChildNodes(); + for (int jchild = 0; jchild < list.getLength(); jchild++) { + Node child = list.item(jchild); + String text = null; + + if (child instanceof Text) { + text = ((Text)child).getData(); + } else if (child instanceof Comment) { + text = ((Comment)child).getData(); + } + if (text != null) { + int textLength = text.length(); + for (int jchar = 0; jchar < textLength; jchar++) { + if (text.charAt(jchar) == '\n') { + counter[0] += 1; } } - if (currentNode.parent(context).isNil()) { - break; - } else { - currentNode = (XmlNode) currentNode.parent(context); - } } - return null; - } - - @JRubyMethod - public IRubyObject parent(ThreadContext context) { - /* - * Check if this node is the root node of the document. - * If so, parent is the document. - */ - if (node.getOwnerDocument() != null && - node.getOwnerDocument().getDocumentElement() == node) { - return document(context); - } else { - return getCachedNodeOrCreate(context.getRuntime(), node.getParentNode()); - } - } - - @JRubyMethod - public IRubyObject path(ThreadContext context) { - return RubyString.newString(context.getRuntime(), NokogiriHelpers.getNodeCompletePath(this.node)); - } - - @JRubyMethod - public IRubyObject pointer_id(ThreadContext context) { - return RubyFixnum.newFixnum(context.getRuntime(), this.node.hashCode()); - } - - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject set_namespace(ThreadContext context, IRubyObject namespace) { - if (namespace.isNil()) { - if (doc != null) { - Node n = node; - String prefix = n.getPrefix(); - String href = n.getNamespaceURI(); - ((XmlDocument)doc).getNamespaceCache().remove(prefix == null ? "" : prefix, href); - this.node = NokogiriHelpers.renameNode(n, null, NokogiriHelpers.getLocalPart(n.getNodeName())); - } - } else { - XmlNamespace ns = (XmlNamespace) namespace; - String prefix = rubyStringToString(ns.prefix(context)); - String href = rubyStringToString(ns.href(context)); - - // Assigning node = ...renameNode() or not seems to make no - // difference. Why not? -pmahoney - - // It actually makes a great deal of difference. renameNode() - // will operate in place if it can, but sometimes it can't. - // The node you passed in *might* come back as you expect, but - // it might not. It's much safer to throw away the original - // and keep the return value. -mbklein - String new_name = NokogiriHelpers.newQName(prefix, node); - this.node = NokogiriHelpers.renameNode(node, href, new_name); - } - clearXpathContext(getNode()); + if (count(child, counter)) { return true; } + } + return false; + } + + @JRubyMethod + public IRubyObject + next_element(ThreadContext context) + { + Node nextNode = node.getNextSibling(); + if (nextNode == null) { return context.nil; } + if (nextNode instanceof Element) { + return getCachedNodeOrCreate(context.runtime, nextNode); + } + Node deeper = nextNode.getNextSibling(); + if (deeper == null) { return context.nil; } + return getCachedNodeOrCreate(context.runtime, deeper); + } + + @JRubyMethod + public IRubyObject + previous_element(ThreadContext context) + { + Node prevNode = node.getPreviousSibling(); + if (prevNode == null) { return context.nil; } + if (prevNode instanceof Element) { + return getCachedNodeOrCreate(context.runtime, prevNode); + } + Node shallower = prevNode.getPreviousSibling(); + if (shallower == null) { return context.nil; } + return getCachedNodeOrCreate(context.runtime, shallower); + } + + protected enum AdoptScheme { + CHILD, PREV_SIBLING, NEXT_SIBLING, REPLACEMENT + } + + /** + * Adopt XmlNode other into the document of + * this using the specified scheme. + */ + protected IRubyObject + adoptAs(ThreadContext context, AdoptScheme scheme, IRubyObject other_) + { + final XmlNode other = asXmlNode(context, other_); + // this.doc might be null since this node can be empty node. + if (doc != null) { other.setDocument(context, doc); } + + IRubyObject nodeOrTags = other; + Node thisNode = node; + Node otherNode = other.node; + + try { + Document prev = otherNode.getOwnerDocument(); + Document doc = thisNode.getOwnerDocument(); + if (doc == null && thisNode instanceof Document) { + // we are adding the new node to a new empty document + doc = (Document) thisNode; + } + clearXpathContext(prev); + clearXpathContext(doc); + if (doc != null && doc != otherNode.getOwnerDocument()) { + Node ret = doc.adoptNode(otherNode); + if (ret == null) { + throw context.runtime.newRuntimeError("Failed to take ownership of node"); + } + // FIXME: this is really a hack, see documentation of fixUserData() for more details. + fixUserData(prev, ret); + otherNode = ret; + } - return this; - } + Node parent = thisNode.getParentNode(); - @JRubyMethod(name = {"unlink", "remove"}) - public IRubyObject unlink(ThreadContext context) { - final Node parent = node.getParentNode(); - if (parent != null) { - parent.removeChild(node); - clearXpathContext(parent); - } - return this; - } - - /** - * The C-library simply returns libxml2 magic numbers. Here we - * convert Java Xml nodes to the appropriate constant defined in - * xml/node.rb. - */ - @JRubyMethod(name = {"node_type", "type"}) - public IRubyObject node_type(ThreadContext context) { - String type; - switch (node.getNodeType()) { - case Node.ELEMENT_NODE: - if (this instanceof XmlElementDecl) - type = "ELEMENT_DECL"; - else if (this instanceof XmlAttributeDecl) - type = "ATTRIBUTE_DECL"; - else if (this instanceof XmlEntityDecl) - type = "ENTITY_DECL"; - else - type = "ELEMENT_NODE"; - break; - case Node.ATTRIBUTE_NODE: type = "ATTRIBUTE_NODE"; break; - case Node.TEXT_NODE: type = "TEXT_NODE"; break; - case Node.CDATA_SECTION_NODE: type = "CDATA_SECTION_NODE"; break; - case Node.ENTITY_REFERENCE_NODE: type = "ENTITY_REF_NODE"; break; - case Node.ENTITY_NODE: type = "ENTITY_NODE"; break; - case Node.PROCESSING_INSTRUCTION_NODE: type = "PI_NODE"; break; - case Node.COMMENT_NODE: type = "COMMENT_NODE"; break; - case Node.DOCUMENT_NODE: - if (this instanceof HtmlDocument) - type = "HTML_DOCUMENT_NODE"; - else - type = "DOCUMENT_NODE"; + switch (scheme) { + case CHILD: + Node[] children = adoptAsChild(thisNode, otherNode); + if (children.length == 1 && otherNode == children[0]) { break; - case Node.DOCUMENT_TYPE_NODE: type = "DOCUMENT_TYPE_NODE"; break; - case Node.DOCUMENT_FRAGMENT_NODE: type = "DOCUMENT_FRAG_NODE"; break; - case Node.NOTATION_NODE: type = "NOTATION_NODE"; break; - default: - return context.getRuntime().newFixnum(0); - } - - return getNokogiriClass(context.getRuntime(), "Nokogiri::XML::Node").getConstant(type); - } - - @JRubyMethod - public IRubyObject line(ThreadContext context) { - Node root = getOwnerDocument(); - int[] counter = new int[1]; - count(root, counter); - return RubyFixnum.newFixnum(context.getRuntime(), counter[0]+1); - } - - private boolean count(Node node, int[] counter) { - if (node == this.node) { - return true; - } - NodeList list = node.getChildNodes(); - for (int i=0; iother
into the document of - * this using the specified scheme. - */ - protected IRubyObject adoptAs(ThreadContext context, AdoptScheme scheme, - IRubyObject other_) { - XmlNode other = asXmlNode(context, other_); - // this.doc might be null since this node can be empty node. - if (this.doc != null) { - other.setDocument(context, this.doc); - } - IRubyObject nodeOrTags = other; - Node thisNode = node; - Node otherNode = other.node; - - try { - Document prev = otherNode.getOwnerDocument(); - Document doc = thisNode.getOwnerDocument(); - if (doc == null && thisNode instanceof Document) { - // we are adding the new node to a new empty document - doc = (Document) thisNode; - } - clearXpathContext(prev); - clearXpathContext(doc); - if (doc != null && doc != otherNode.getOwnerDocument()) { - Node ret = doc.adoptNode(otherNode); - // FIXME: this is really a hack, see documentation of fixUserData() for more details. - fixUserData(prev, ret); - if (ret == null) { - throw context.getRuntime().newRuntimeError("Failed to take ownership of node"); - } - otherNode = ret; - } - - Node parent = thisNode.getParentNode(); - - switch (scheme) { - case CHILD: - Node[] children = adoptAsChild(context, thisNode, otherNode); - if (children.length == 1 && otherNode == children[0]) { - break; - } else { - nodeOrTags = nodeArrayToRubyArray(context.getRuntime(), children); - } - break; - case PREV_SIBLING: - adoptAsPrevSibling(context, parent, thisNode, otherNode); - break; - case NEXT_SIBLING: - adoptAsNextSibling(context, parent, thisNode, otherNode); - break; - case REPLACEMENT: - adoptAsReplacement(context, parent, thisNode, otherNode); - break; - } - } catch (Exception e) { - throw context.getRuntime().newRuntimeError(e.toString()); - } - - if (otherNode.getNodeType() == Node.TEXT_NODE) { - coalesceTextNodes(context, other, scheme); - } - - if (this instanceof XmlDocument) { - ((XmlDocument) this).resetNamespaceCache(context); - } - - other.relink_namespace(context); - - return nodeOrTags; - } - - /** - * This is a hack to fix #839. We should submit a patch to Xerces. - * It looks like CoreDocumentImpl.adoptNode() doesn't copy - * the user data associated with child nodes (recursively). - */ - private void fixUserData(Document previous, Node ret) { - String key = NokogiriHelpers.ENCODED_STRING; - for (Node child = ret.getFirstChild(); child != null; child = child.getNextSibling()) { - CoreDocumentImpl previousDocument = (CoreDocumentImpl) previous; - child.setUserData(key, previousDocument.getUserData(child, key), null); - fixUserData(previous, child); + } else { + nodeOrTags = nodeArrayToRubyArray(context.runtime, children); + } + break; + case PREV_SIBLING: + adoptAsPrevSibling(context, parent, thisNode, otherNode); + break; + case NEXT_SIBLING: + adoptAsNextSibling(context, parent, thisNode, otherNode); + break; + case REPLACEMENT: + adoptAsReplacement(context, parent, thisNode, otherNode); + break; } + } catch (Exception e) { + throw context.runtime.newRuntimeError(e.toString()); } - protected Node[] adoptAsChild(ThreadContext context, Node parent, - Node otherNode) { - /* - * This is a bit of a hack. C-Nokogiri allows adding a bare - * text node as the root element. Java (and XML spec?) does - * not. So we wrap the text node in an element. - */ - if (parent.getNodeType() == Node.DOCUMENT_NODE && otherNode.getNodeType() == Node.TEXT_NODE) { - Element e = (Element) parent.getFirstChild(); - if (e == null || !e.getNodeName().equals(TEXT_WRAPPER_NAME)) { - e = ((Document)parent).createElement(TEXT_WRAPPER_NAME); - adoptAsChild(context, parent, e); - } - e.appendChild(otherNode); - otherNode = e; - } else { - addNamespaceURIIfNeeded(otherNode); - parent.appendChild(otherNode); - } - Node[] nodes = new Node[1]; - nodes[0] = otherNode; - return nodes; - } - - private void addNamespaceURIIfNeeded(Node child) { - if (this instanceof XmlDocumentFragment && ((XmlDocumentFragment)this).getFragmentContext() != null) { - XmlElement fragmentContext = ((XmlDocumentFragment)this).getFragmentContext(); - String namespace_uri = fragmentContext.node.getNamespaceURI(); - if (namespace_uri != null && namespace_uri.length() > 0) { - NokogiriHelpers.renameNode(child, namespace_uri, child.getNodeName()); - } - } + if (otherNode.getNodeType() == Node.TEXT_NODE) { + coalesceTextNodes(context, other, scheme); } - protected void adoptAsPrevSibling(ThreadContext context, - Node parent, - Node thisNode, Node otherNode) { - if (parent == null) { - /* I'm not sure what do do here... A node with no - * parent can't exactly have a 'sibling', so we make - * otherNode parentless also. */ - if (otherNode.getParentNode() != null) - otherNode.getParentNode().removeChild(otherNode); - return; - } - - parent.insertBefore(otherNode, thisNode); + if (this instanceof XmlDocument) { + ((XmlDocument) this).resetNamespaceCache(context); } - protected void adoptAsNextSibling(ThreadContext context, - Node parent, - Node thisNode, Node otherNode) { - if (parent == null) { - /* I'm not sure what do do here... A node with no - * parent can't exactly have a 'sibling', so we make - * otherNode parentless also. */ - if (otherNode.getParentNode() != null) - otherNode.getParentNode().removeChild(otherNode); + other.relink_namespace(context); - return; - } - - Node nextSib = thisNode.getNextSibling(); + return nodeOrTags; + } - if (nextSib != null) { - parent.insertBefore(otherNode, nextSib); - } else { - parent.appendChild(otherNode); - } - } - - protected void adoptAsReplacement(ThreadContext context, - Node parentNode, - Node thisNode, Node otherNode) { - if (parentNode == null) { - /* nothing to replace? */ - return; - } - - try { - parentNode.replaceChild(otherNode, thisNode); - } catch (Exception e) { - String prefix = "could not replace child: "; - throw context.getRuntime().newRuntimeError(prefix + e.toString()); - } + /** + * This is a hack to fix #839. We should submit a patch to Xerces. + * It looks like CoreDocumentImpl.adoptNode() doesn't copy + * the user data associated with child nodes (recursively). + */ + private static void + fixUserData(Document previous, Node ret) + { + final String key = NokogiriHelpers.ENCODED_STRING; + for (Node child = ret.getFirstChild(); child != null; child = child.getNextSibling()) { + CoreDocumentImpl previousDocument = (CoreDocumentImpl) previous; + child.setUserData(key, previousDocument.getUserData(child, key), null); + fixUserData(previous, child); } + } - /** - * Add other as a child of this. + private Node[] + adoptAsChild(final Node parent, Node otherNode) + { + /* + * This is a bit of a hack. C-Nokogiri allows adding a bare text node as the root element. + * Java (and XML spec?) does not. So we wrap the text node in an element. */ - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject add_child_node(ThreadContext context, IRubyObject other) { - return adoptAs(context, AdoptScheme.CHILD, other); - } + if (parent.getNodeType() == Node.DOCUMENT_NODE && otherNode.getNodeType() == Node.TEXT_NODE) { + Element e = (Element) parent.getFirstChild(); + if (e == null || !e.getNodeName().equals(TEXT_WRAPPER_NAME)) { + e = ((Document) parent).createElement(TEXT_WRAPPER_NAME); + adoptAsChild(parent, e); + } + e.appendChild(otherNode); + otherNode = e; + } else { + parent.appendChild(otherNode); + } + return new Node[] { otherNode }; + } + + protected void + adoptAsPrevSibling(ThreadContext context, + Node parent, + Node thisNode, Node otherNode) + { + if (parent == null) { + /* I'm not sure what do do here... A node with no + * parent can't exactly have a 'sibling', so we make + * otherNode parentless also. */ + if (otherNode.getParentNode() != null) { + otherNode.getParentNode().removeChild(otherNode); + } + return; + } + + parent.insertBefore(otherNode, thisNode); + } + + protected void + adoptAsNextSibling(ThreadContext context, + Node parent, + Node thisNode, Node otherNode) + { + if (parent == null) { + /* I'm not sure what do do here... A node with no + * parent can't exactly have a 'sibling', so we make + * otherNode parentless also. */ + if (otherNode.getParentNode() != null) { + otherNode.getParentNode().removeChild(otherNode); + } - /** - * Replace this with other. - */ - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject replace_node(ThreadContext context, IRubyObject other) { - return adoptAs(context, AdoptScheme.REPLACEMENT, other); + return; + } + + Node nextSib = thisNode.getNextSibling(); + + if (nextSib != null) { + parent.insertBefore(otherNode, nextSib); + } else { + parent.appendChild(otherNode); + } + } + + protected void + adoptAsReplacement(ThreadContext context, + Node parentNode, + Node thisNode, Node otherNode) + { + if (parentNode == null) { + /* nothing to replace? */ + return; + } + + try { + parentNode.replaceChild(otherNode, thisNode); + } catch (Exception e) { + String prefix = "could not replace child: "; + throw context.runtime.newRuntimeError(prefix + e.toString()); + } + } + + /** + * Add other as a child of this. + */ + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + add_child_node(ThreadContext context, IRubyObject other) + { + return adoptAs(context, AdoptScheme.CHILD, other); + } + + /** + * Replace this with other. + */ + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + replace_node(ThreadContext context, IRubyObject other) + { + return adoptAs(context, AdoptScheme.REPLACEMENT, other); + } + + /** + * Add other as a sibling before this. + */ + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + add_previous_sibling_node(ThreadContext context, IRubyObject other) + { + return adoptAs(context, AdoptScheme.PREV_SIBLING, other); + } + + /** + * Add other as a sibling after this. + */ + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + add_next_sibling_node(ThreadContext context, IRubyObject other) + { + return adoptAs(context, AdoptScheme.NEXT_SIBLING, other); + } + + /** + * call-seq: + * process_xincludes(options) + * + * Loads and substitutes all xinclude elements below the node. The + * parser context will be initialized with +options+. + * + */ + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + process_xincludes(ThreadContext context, IRubyObject options) + { + XmlDocument xmlDocument = (XmlDocument)document(context); + RubyArray errors = (RubyArray)xmlDocument.getInstanceVariable("@errors"); + while (errors.getLength() > 0) { + XmlSyntaxError error = (XmlSyntaxError)errors.shift(context); + if (error.toString().contains("Include operation failed")) { + throw error.toThrowable(); + } } + return this; + } - /** - * Add other as a sibling before this. - */ - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject add_previous_sibling_node(ThreadContext context, IRubyObject other) { - return adoptAs(context, AdoptScheme.PREV_SIBLING, other); - } + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + clear_xpath_context(ThreadContext context) + { + clearXpathContext(getNode()); + return context.nil ; + } - /** - * Add other as a sibling after this. - */ - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject add_next_sibling_node(ThreadContext context, IRubyObject other) { - return adoptAs(context, AdoptScheme.NEXT_SIBLING, other); - } - - /** - * call-seq: - * process_xincludes(options) - * - * Loads and substitutes all xinclude elements below the node. The - * parser context will be initialized with +options+. - * - */ - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject process_xincludes(ThreadContext context, IRubyObject options) { - XmlDocument xmlDocument = (XmlDocument)document(context); - RubyArray errors = (RubyArray)xmlDocument.getInstanceVariable("@errors"); - while(errors.getLength() > 0) { - XmlSyntaxError error = (XmlSyntaxError)errors.shift(context); - if (error.toString().contains("Include operation failed")) { - throw new RaiseException(error); - } - } - return this; + @SuppressWarnings("unchecked") + @Override + public T + toJava(Class target) + { + if (target == Object.class || Node.class.isAssignableFrom(target)) { + return (T)getNode(); } + return super.toJava(target); + } - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject clear_xpath_context(ThreadContext context) { - clearXpathContext(getNode()); - return context.nil ; - } } diff --git a/ext/java/nokogiri/XmlNodeSet.java b/ext/java/nokogiri/XmlNodeSet.java index 831e3af183..f7390f832a 100644 --- a/ext/java/nokogiri/XmlNodeSet.java +++ b/ext/java/nokogiri/XmlNodeSet.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.XmlNode.setDocumentAndDecorate; @@ -39,13 +7,13 @@ import java.util.Arrays; import org.jruby.Ruby; +import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyFixnum; import org.jruby.RubyObject; import org.jruby.RubyRange; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.Block; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Node; @@ -57,383 +25,439 @@ * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::NodeSet") -public class XmlNodeSet extends RubyObject implements NodeList { - - private IRubyObject[] nodes; - - @JRubyMethod(name = "new", meta = true, rest = true) - public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, - IRubyObject[] args, Block block) { - RubyClass klass = (RubyClass) cls; - XmlNodeSet set = (XmlNodeSet) klass.allocate(); - set.setNodes(new IRubyObject[0]); - set.callInit(args, block); - return set; +@JRubyClass(name = "Nokogiri::XML::NodeSet") +public class XmlNodeSet extends RubyObject implements NodeList +{ + private static final long serialVersionUID = 1L; + + IRubyObject[] nodes; + + public + XmlNodeSet(Ruby ruby, RubyClass klazz) + { + super(ruby, klazz); + nodes = IRubyObject.NULL_ARRAY; + } + + XmlNodeSet(Ruby ruby, RubyClass klazz, IRubyObject[] nodes) + { + super(ruby, klazz); + this.nodes = nodes; + } + + public static XmlNodeSet + newEmptyNodeSet(ThreadContext context, XmlNodeSet docOwner) + { + final Ruby runtime = context.runtime; + XmlNodeSet set = new XmlNodeSet(runtime, getNokogiriClass(runtime, "Nokogiri::XML::NodeSet")); + set.initializeFrom(context, docOwner); + return set; + } + + public static XmlNodeSet + newEmptyNodeSet(ThreadContext context, XmlNode docOwner) + { + final Ruby runtime = context.runtime; + XmlNodeSet set = new XmlNodeSet(runtime, getNokogiriClass(runtime, "Nokogiri::XML::NodeSet")); + set.initialize(runtime, docOwner); + return set; + } + + public static XmlNodeSet + newNodeSet(Ruby runtime, IRubyObject[] nodes) + { + XmlNodeSet xmlNodeSet = new XmlNodeSet(runtime, getNokogiriClass(runtime, "Nokogiri::XML::NodeSet")); + xmlNodeSet.setNodes(nodes); + return xmlNodeSet; + } + + public static XmlNodeSet + newNodeSet(Ruby runtime, IRubyObject[] nodes, XmlNode docOwner) + { + XmlNodeSet set = new XmlNodeSet(runtime, getNokogiriClass(runtime, "Nokogiri::XML::NodeSet"), nodes); + set.initialize(runtime, docOwner); + return set; + } + + /** + * Create and return a copy of this object. + * + * @return a clone of this object + */ + @Override + public Object + clone() throws CloneNotSupportedException + { + return super.clone(); + } + + private void + setNodes(IRubyObject[] array) + { + this.nodes = array; + + IRubyObject first = array.length > 0 ? array[0] : null; + initialize(getRuntime(), first); + } + + private void + initializeFrom(ThreadContext context, XmlNodeSet ref) + { + IRubyObject document = ref.getInstanceVariable("@document"); + if (document != null && !document.isNil()) { + initialize(context, (XmlDocument) document); } - - public XmlNodeSet(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); + } + + final void + initialize(Ruby runtime, IRubyObject refNode) + { + if (refNode instanceof XmlNode) { + XmlDocument doc = ((XmlNode) refNode).document(runtime); + setDocumentAndDecorate(runtime.getCurrentContext(), this, doc); } - - private static XmlNodeSet create(final Ruby runtime) { - return (XmlNodeSet) NokogiriService.XML_NODESET_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::NodeSet")); + } + + private void + initialize(ThreadContext context, XmlDocument doc) + { + setDocumentAndDecorate(context, this, doc); + } + + public int + length() + { + return nodes == null ? 0 : nodes.length; + } + + @JRubyMethod(name = "&") + public IRubyObject + op_and(ThreadContext context, IRubyObject nodeSet) + { + IRubyObject[] otherNodes = getNodes(context, nodeSet); + + if (otherNodes == null || otherNodes.length == 0) { + return newEmptyNodeSet(context, this); } - public static XmlNodeSet newEmptyNodeSet(ThreadContext context) { - XmlNodeSet set = create(context.getRuntime()); - set.nodes = new IRubyObject[0]; - return set; + if (nodes == null || nodes.length == 0) { + return newEmptyNodeSet(context, this); } - public static XmlNodeSet newXmlNodeSet(ThreadContext context, IRubyObject[] nodes) { - XmlNodeSet xmlNodeSet = create(context.runtime); - xmlNodeSet.setNodes(nodes); - return xmlNodeSet; - } + IRubyObject[] curr = nodes; + IRubyObject[] other = getNodes(context, nodeSet); + IRubyObject[] result = new IRubyObject[nodes.length]; - /** - * Create and return a copy of this object. - * - * @return a clone of this object - */ - @Override - public Object clone() throws CloneNotSupportedException { - return super.clone(); - } - - void setNodes(IRubyObject[] array) { - this.nodes = array; - - IRubyObject first = array.length > 0 ? array[0] : null; - initialize(getRuntime(), first); - } - - private void setReference(XmlNodeSet reference) { - IRubyObject first = reference.nodes.length > 0 ? reference.nodes[0] : null; - initialize(getRuntime(), first); - } - - public void setNodeList(NodeList nodeList) { - setNodes(nodeListToRubyArray(getRuntime(), nodeList)); - } + int last = 0; + outer: + for (int i = 0; i < curr.length; i++) { + IRubyObject n = curr[i]; - final void initialize(Ruby runtime, IRubyObject refNode) { - if (refNode instanceof XmlNode) { - IRubyObject doc = ((XmlNode) refNode).document(runtime); - setDocumentAndDecorate(runtime.getCurrentContext(), this, doc); - } - } - - public int length() { - return nodes == null ? 0 : nodes.length; - } - - public void relink_namespace(ThreadContext context) { - for (int i = 0; i < nodes.length; i++) { - if (nodes[i] instanceof XmlNode) { - ((XmlNode) nodes[i]).relink_namespace(context); - } + for (int j = 0; j < other.length; j++) { + if (other[j] == n) { + result[last++] = n; + continue outer; } + } } - @JRubyMethod(name="&") - public IRubyObject op_and(ThreadContext context, IRubyObject nodeSet) { - IRubyObject[] otherNodes = getNodes(context, nodeSet); - - if (otherNodes == null || otherNodes.length == 0) { - return newEmptyNodeSet(context); - } + XmlNodeSet newSet = newNodeSet(context.runtime, Arrays.copyOf(result, last)); + newSet.initializeFrom(context, this); + return newSet; + } - if (nodes == null || nodes.length == 0) { - return newEmptyNodeSet(context); - } + @JRubyMethod + public IRubyObject + delete (ThreadContext context, IRubyObject node_or_namespace) + { + IRubyObject nodeOrNamespace = asXmlNodeOrNamespace(context, node_or_namespace); - IRubyObject[] curr = nodes; - IRubyObject[] other = getNodes(context, nodeSet); - IRubyObject[] result = new IRubyObject[nodes.length]; - - int last = 0; -outer: - for (int i = 0; i < curr.length; i++) { - IRubyObject n = curr[i]; - - for (int j = 0; j < other.length; j++) { - if (other[j] == n) { - result[last++] = n; - continue outer; - } - } - } - - XmlNodeSet newSet = newXmlNodeSet(context, Arrays.copyOf(result, last)); - newSet.setReference(this); - return newSet; + if (nodes.length == 0) { + return context.nil; } - @JRubyMethod - public IRubyObject delete(ThreadContext context, IRubyObject node_or_namespace) { - IRubyObject nodeOrNamespace = asXmlNodeOrNamespace(context, node_or_namespace); - - if (nodes.length == 0) { - return context.nil; - } - - IRubyObject[] orig = nodes; - IRubyObject[] result = new IRubyObject[nodes.length]; - - int last = 0; - - for (int i = 0; i < orig.length; i++) { - IRubyObject n = orig[i]; - - if (n == nodeOrNamespace) { - continue; - } - - result[last++] = n; - } - - if (nodeOrNamespace instanceof XmlNamespace) { - ((XmlNamespace) nodeOrNamespace).deleteHref(); - } + IRubyObject[] orig = nodes; + IRubyObject[] result = new IRubyObject[nodes.length]; - nodes = Arrays.copyOf(result, last); + int last = 0; - if (nodes.length < orig.length) { - // if we found the node return it - return nodeOrNamespace; - } + for (int i = 0; i < orig.length; i++) { + IRubyObject n = orig[i]; - return context.nil; - } + if (n == nodeOrNamespace) { + continue; + } - @JRubyMethod - public IRubyObject dup(ThreadContext context) { - return newXmlNodeSet(context, nodes); + result[last++] = n; } - @JRubyMethod(name = "include?") - public IRubyObject include_p(ThreadContext context, IRubyObject node_or_namespace) { - for (int i = 0; i < nodes.length; i++) { - if (nodes[i] == node_or_namespace) { - return context.tru; - } - } + nodes = Arrays.copyOf(result, last); - return context.runtime.getFalse(); + if (nodes.length < orig.length) { + // if we found the node return it + return nodeOrNamespace; } - @JRubyMethod(name = {"length", "size"}) - public IRubyObject length(ThreadContext context) { - return context.getRuntime().newFixnum(nodes.length); + return context.nil; + } + + @JRubyMethod + public IRubyObject + dup(ThreadContext context) + { + XmlNodeSet dup = newNodeSet(context.runtime, nodes.clone()); + dup.initializeFrom(context, this); + return dup; + } + + @JRubyMethod(name = "include?") + public IRubyObject + include_p(ThreadContext context, IRubyObject node_or_namespace) + { + for (int i = 0; i < nodes.length; i++) { + if (nodes[i] == node_or_namespace) { + return context.tru; + } } - @JRubyMethod(name="-") - public IRubyObject op_diff(ThreadContext context, IRubyObject nodeSet) { - IRubyObject[] otherNodes = getNodes(context, nodeSet); + return context.fals; + } - if (otherNodes.length == 0) { - return dup(context); - } + @JRubyMethod(name = {"length", "size"}) + public IRubyObject + length(ThreadContext context) + { + return context.runtime.newFixnum(nodes.length); + } - if (nodes.length == 0) { - return newEmptyNodeSet(context); - } + @JRubyMethod(name = "-") + public IRubyObject + op_diff(ThreadContext context, IRubyObject nodeSet) + { + IRubyObject[] otherNodes = getNodes(context, nodeSet); - IRubyObject[] curr = nodes; - IRubyObject[] other = getNodes(context, nodeSet); - IRubyObject[] result = new IRubyObject[nodes.length]; - - int last = 0; -outer: - for (int i = 0; i < curr.length; i++) { - IRubyObject n = curr[i]; - - for (int j = 0; j < other.length; j++) { - if (other[j] == n) { - continue outer; - } - } - - result[last++] = n; - } + if (otherNodes.length == 0) { + return dup(context); + } - XmlNodeSet newSet = newXmlNodeSet(context, Arrays.copyOf(result, last)); - newSet.setReference(this); - return newSet; + if (nodes.length == 0) { + return newEmptyNodeSet(context, this); } - @JRubyMethod(name={"|", "+"}) - public IRubyObject op_or(ThreadContext context, IRubyObject nodeSet) { - IRubyObject[] otherNodes = getNodes(context, nodeSet); + IRubyObject[] curr = nodes; + IRubyObject[] other = getNodes(context, nodeSet); + IRubyObject[] result = new IRubyObject[nodes.length]; - if (nodes.length == 0) { - return ((XmlNodeSet) nodeSet).dup(context); - } + int last = 0; + outer: + for (int i = 0; i < curr.length; i++) { + IRubyObject n = curr[i]; - if (otherNodes.length == 0) { - return dup(context); + for (int j = 0; j < other.length; j++) { + if (other[j] == n) { + continue outer; } + } - IRubyObject[] curr = nodes; - IRubyObject[] other = getNodes(context, nodeSet); - IRubyObject[] result = Arrays.copyOf(curr, curr.length + other.length); - - int last = curr.length; -outer: - for (int i = 0; i < other.length; i++) { - IRubyObject n = other[i]; + result[last++] = n; + } - for (int j = 0; j < curr.length; j++) { - if (curr[j] == n) { - continue outer; - } - } + XmlNodeSet newSet = newNodeSet(context.runtime, Arrays.copyOf(result, last)); + newSet.initializeFrom(context, this); + return newSet; + } - result[last++] = n; - } + @JRubyMethod(name = {"|", "+"}) + public IRubyObject + op_or(ThreadContext context, IRubyObject nodeSet) + { + IRubyObject[] otherNodes = getNodes(context, nodeSet); - XmlNodeSet newSet = newXmlNodeSet(context, Arrays.copyOf(result, last)); - newSet.setReference(this); - return newSet; + if (nodes.length == 0) { + return ((XmlNodeSet) nodeSet).dup(context); } - @JRubyMethod(name = {"push", "<<"}) - public IRubyObject push(ThreadContext context, IRubyObject node_or_namespace) { - nodes = Arrays.copyOf(nodes, nodes.length+1); - nodes[nodes.length-1] = node_or_namespace; - return this; + if (otherNodes.length == 0) { + return dup(context); } - // replace with - // https://github.com/jruby/jruby/blame/13a3ec76d883a162b9d46c374c6e9eeea27b3261/core/src/main/java/org/jruby/RubyRange.java#L974 - // once we upgraded the min JRuby version to >= 9.2 - private static IRubyObject rangeBeginLength(ThreadContext context, IRubyObject rangeMaybe, int len, int[] begLen) { - RubyRange range = (RubyRange) rangeMaybe; - int min = range.begin(context).convertToInteger().getIntValue(); - int max = range.end(context).convertToInteger().getIntValue(); - - if (min < 0) { - min += len; - if (min < 0) { - throw context.runtime.newRangeError(min + ".." + (range.isExcludeEnd() ? "." : "") + max + " out of range"); - } - } + IRubyObject[] curr = nodes; + IRubyObject[] other = getNodes(context, nodeSet); + IRubyObject[] result = Arrays.copyOf(curr, curr.length + other.length); - if (max < 0) { - max += len; - } + int last = curr.length; + outer: + for (int i = 0; i < other.length; i++) { + IRubyObject n = other[i]; - if (!range.isExcludeEnd()) { - max++; + for (int j = 0; j < curr.length; j++) { + if (curr[j] == n) { + continue outer; } + } - begLen[0] = min; - begLen[1] = max; - return context.tru; + result[last++] = n; } + XmlNodeSet newSet = newNodeSet(context.runtime, Arrays.copyOf(result, last)); + newSet.initializeFrom(context, this); + return newSet; + } + + @JRubyMethod(name = {"push", "<<"}) + public IRubyObject + push(ThreadContext context, IRubyObject node_or_namespace) + { + nodes = Arrays.copyOf(nodes, nodes.length + 1); + nodes[nodes.length - 1] = node_or_namespace; + return this; + } + + // replace with + // https://github.com/jruby/jruby/blame/13a3ec76d883a162b9d46c374c6e9eeea27b3261/core/src/main/java/org/jruby/RubyRange.java#L974 + // once we upgraded the min JRuby version to >= 9.2 + private static IRubyObject + rangeBeginLength(ThreadContext context, IRubyObject rangeMaybe, int len, int[] begLen) + { + RubyRange range = (RubyRange) rangeMaybe; + int min = range.begin(context).convertToInteger().getIntValue(); + int max = range.end(context).convertToInteger().getIntValue(); + + if (min < 0) { + min += len; + if (min < 0) { + throw context.runtime.newRangeError(min + ".." + (range.isExcludeEnd() ? "." : "") + max + " out of range"); + } + } - @JRubyMethod(name={"[]", "slice"}) - public IRubyObject slice(ThreadContext context, IRubyObject indexOrRange) { - if (indexOrRange instanceof RubyFixnum) { - int idx = ((RubyFixnum)indexOrRange).getIntValue(); - - if (idx < 0) { - idx += nodes.length; - } - - if (idx >= nodes.length || idx < 0) { - return context.nil; - } - - return nodes[idx]; - } + if (max < 0) { + max += len; + } - int[] begLen = new int[2]; - rangeBeginLength(context, indexOrRange, nodes.length, begLen); - int min = begLen[0]; - int max = begLen[1]; - return subseq(context, min, max - min); + if (!range.isExcludeEnd()) { + max++; } - @JRubyMethod(name={"[]", "slice"}) - public IRubyObject slice(ThreadContext context, IRubyObject start, IRubyObject length) { - int s = ((RubyFixnum) start).getIntValue(); - int l = ((RubyFixnum) length).getIntValue(); + begLen[0] = min; + begLen[1] = max; + return context.tru; + } - if (s < 0) { - s += nodes.length; - } - return subseq(context, s, l); + @JRubyMethod(name = {"[]", "slice"}) + public IRubyObject + slice(ThreadContext context, IRubyObject indexOrRange) + { + if (indexOrRange instanceof RubyFixnum) { + return slice(context, ((RubyFixnum) indexOrRange).getIntValue()); + } + if (indexOrRange instanceof RubyRange) { + int[] begLen = new int[2]; + rangeBeginLength(context, indexOrRange, nodes.length, begLen); + int min = begLen[0]; + int max = begLen[1]; + return subseq(context, min, max - min); + } + throw context.runtime.newTypeError("index must be an Integer or a Range"); + } + + IRubyObject + slice(ThreadContext context, int idx) + { + if (idx < 0) { + idx += nodes.length; } - public IRubyObject subseq(ThreadContext context, int start, int length) { - if (start > nodes.length) { - return context.nil; - } - - if (start < 0 || length < 0) { - return context.nil; - } - - if (start + length > nodes.length) { - length = nodes.length - start; - } + if (idx >= nodes.length || idx < 0) { + return context.nil; + } - int to = start + length; + return nodes[idx]; + } - IRubyObject[] newNodes = Arrays.copyOfRange(nodes, start, to); + @JRubyMethod(name = {"[]", "slice"}) + public IRubyObject + slice(ThreadContext context, IRubyObject start, IRubyObject length) + { + int s = ((RubyFixnum) start).getIntValue(); + int l = ((RubyFixnum) length).getIntValue(); - return newXmlNodeSet(context, newNodes); + if (s < 0) { + s += nodes.length; } - @JRubyMethod(name = {"to_a", "to_ary"}) - public IRubyObject to_a(ThreadContext context) { - return context.runtime.newArrayNoCopy(nodes); - } + return subseq(context, s, l); + } - @JRubyMethod(name = {"unlink", "remove"}) - public IRubyObject unlink(ThreadContext context) { - for (int i = 0; i < nodes.length; i++) { - if (nodes[i] instanceof XmlNode) { - ((XmlNode) nodes[i] ).unlink(context); - } - } - return this; + public IRubyObject + subseq(ThreadContext context, int start, int length) + { + if (start > nodes.length) { + return context.nil; } - private static XmlNodeSet newXmlNodeSet(ThreadContext context, XmlNodeSet reference) { - XmlNodeSet xmlNodeSet = create(context.getRuntime()); - xmlNodeSet.setReference(reference); - xmlNodeSet.nodes = new IRubyObject[0]; - return xmlNodeSet; + if (start < 0 || length < 0) { + return context.nil; } - private static IRubyObject asXmlNodeOrNamespace(ThreadContext context, IRubyObject possibleNode) { - if (possibleNode instanceof XmlNode || possibleNode instanceof XmlNamespace) { - return possibleNode; - } - throw context.getRuntime().newArgumentError("node must be a Nokogiri::XML::Node or Nokogiri::XML::Namespace"); + if (start + length > nodes.length) { + length = nodes.length - start; } - static IRubyObject[] getNodes(ThreadContext context, IRubyObject possibleNodeSet) { - if (possibleNodeSet instanceof XmlNodeSet) { - return ((XmlNodeSet) possibleNodeSet).nodes; - } - throw context.getRuntime().newArgumentError("node must be a Nokogiri::XML::NodeSet"); + int to = start + length; + + return newNodeSet(context.runtime, Arrays.copyOfRange(nodes, start, to)); + } + + @JRubyMethod(name = {"to_a", "to_ary"}) + public RubyArray + to_a(ThreadContext context) + { + return context.runtime.newArrayNoCopy(nodes); + } + + @JRubyMethod(name = {"unlink", "remove"}) + public IRubyObject + unlink(ThreadContext context) + { + for (int i = 0; i < nodes.length; i++) { + if (nodes[i] instanceof XmlNode) { + ((XmlNode) nodes[i]).unlink(context); + } } - - public int getLength() { - return nodes.length; + return this; + } + + private static IRubyObject + asXmlNodeOrNamespace(ThreadContext context, IRubyObject possibleNode) + { + if (possibleNode instanceof XmlNode || possibleNode instanceof XmlNamespace) { + return possibleNode; } - - public Node item(int index) { - Object n = nodes[index]; - if (n instanceof XmlNode) return ((XmlNode)n).node; - if (n instanceof XmlNamespace) return ((XmlNamespace)n).getNode(); - return null; + throw context.getRuntime().newArgumentError("node must be a Nokogiri::XML::Node or Nokogiri::XML::Namespace"); + } + + private static IRubyObject[] + getNodes(ThreadContext context, IRubyObject possibleNodeSet) + { + if (possibleNodeSet instanceof XmlNodeSet) { + return ((XmlNodeSet) possibleNodeSet).nodes; } + throw context.getRuntime().newArgumentError("node must be a Nokogiri::XML::NodeSet"); + } + + public int + getLength() + { + return nodes.length; + } + + public Node + item(int index) + { + Object n = nodes[index]; + if (n instanceof XmlNode) { return ((XmlNode) n).node; } + if (n instanceof XmlNamespace) { return ((XmlNamespace) n).getNode(); } + return null; + } } diff --git a/ext/java/nokogiri/XmlProcessingInstruction.java b/ext/java/nokogiri/XmlProcessingInstruction.java index 08641f443a..fc41098b39 100644 --- a/ext/java/nokogiri/XmlProcessingInstruction.java +++ b/ext/java/nokogiri/XmlProcessingInstruction.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.rubyStringToString; @@ -49,52 +17,63 @@ /** * Class for Nokogiri::XML::ProcessingInstruction - * + * * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::ProcessingInstruction", parent="Nokogiri::XML::Node") -public class XmlProcessingInstruction extends XmlNode { +@JRubyClass(name = "Nokogiri::XML::ProcessingInstruction", parent = "Nokogiri::XML::Node") +public class XmlProcessingInstruction extends XmlNode +{ + private static final long serialVersionUID = 1L; + + public + XmlProcessingInstruction(Ruby ruby, RubyClass klazz) + { + super(ruby, klazz); + } + + public + XmlProcessingInstruction(Ruby ruby, RubyClass klazz, Node node) + { + super(ruby, klazz, node); + } - public XmlProcessingInstruction(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); - } - - public XmlProcessingInstruction(Ruby ruby, RubyClass klazz, Node node) { - super(ruby, klazz, node); - } + @JRubyMethod(name = "new", meta = true, rest = true, required = 3) + public static IRubyObject + rbNew(ThreadContext context, + IRubyObject klazz, + IRubyObject[] args) + { - @JRubyMethod(name="new", meta=true, rest=true, required=3) - public static IRubyObject rbNew(ThreadContext context, - IRubyObject klazz, - IRubyObject[] args) { + IRubyObject doc = args[0]; + IRubyObject target = args[1]; + IRubyObject data = args[2]; - IRubyObject doc = args[0]; - IRubyObject target = args[1]; - IRubyObject data = args[2]; + Document document = ((XmlNode) doc).getOwnerDocument(); + Node node = + document.createProcessingInstruction(rubyStringToString(target), + rubyStringToString(data)); + XmlProcessingInstruction self = + new XmlProcessingInstruction(context.getRuntime(), + (RubyClass) klazz, + node); - Document document = ((XmlNode) doc).getOwnerDocument(); - Node node = - document.createProcessingInstruction(rubyStringToString(target), - rubyStringToString(data)); - XmlProcessingInstruction self = - new XmlProcessingInstruction(context.getRuntime(), - (RubyClass) klazz, - node); + Helpers.invoke(context, self, "initialize", args); - Helpers.invoke(context, self, "initialize", args); + // TODO: if_block_given. - // TODO: if_block_given. + return self; + } - return self; - } + @Override + public boolean + isProcessingInstruction() { return true; } - @Override - public boolean isProcessingInstruction() { return true; } - - @Override - public void accept(ThreadContext context, SaveContextVisitor visitor) { - visitor.enter((ProcessingInstruction)node); - visitor.leave((ProcessingInstruction)node); - } + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + visitor.enter((ProcessingInstruction)node); + visitor.leave((ProcessingInstruction)node); + } } diff --git a/ext/java/nokogiri/XmlReader.java b/ext/java/nokogiri/XmlReader.java index 7b93b62e56..403b5e7d4c 100644 --- a/ext/java/nokogiri/XmlReader.java +++ b/ext/java/nokogiri/XmlReader.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; @@ -41,15 +9,6 @@ import java.util.List; import java.util.Stack; -import nokogiri.internals.NokogiriEntityResolver; -import nokogiri.internals.ParserContext; -import nokogiri.internals.ParserContext.Options; -import nokogiri.internals.ReaderNode; -import nokogiri.internals.ReaderNode.ClosingNode; -import nokogiri.internals.ReaderNode.ElementNode; -import nokogiri.internals.ReaderNode.TextNode; -import nokogiri.internals.UncloseableInputStream; - import org.apache.xerces.impl.Constants; import org.apache.xerces.impl.xs.opti.DefaultXMLDocumentHandler; import org.apache.xerces.parsers.StandardParserConfiguration; @@ -81,452 +40,576 @@ import org.jruby.util.IOInputStream; import org.xml.sax.InputSource; +import nokogiri.internals.NokogiriEntityResolver; +import nokogiri.internals.ParserContext; +import nokogiri.internals.ParserContext.Options; +import nokogiri.internals.ReaderNode; +import nokogiri.internals.ReaderNode.ClosingNode; +import nokogiri.internals.ReaderNode.ElementNode; +import nokogiri.internals.ReaderNode.TextNode; + /** * Class for Nokogiri:XML::Reader * * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::Reader") -public class XmlReader extends RubyObject { - - private static final int XML_TEXTREADER_MODE_INITIAL = 0; - private static final int XML_TEXTREADER_MODE_INTERACTIVE = 1; - private static final int XML_TEXTREADER_MODE_ERROR = 2; - private static final int XML_TEXTREADER_MODE_EOF = 3; - private static final int XML_TEXTREADER_MODE_CLOSED = 4; - private static final int XML_TEXTREADER_MODE_READING = 5; - - List nodeQueue; - private int state; - private int position = 0; - private XMLPullParserConfiguration config; - private boolean continueParsing = true; - - public XmlReader(Ruby runtime, RubyClass klazz) { - super(runtime, klazz); +@JRubyClass(name = "Nokogiri::XML::Reader") +public class XmlReader extends RubyObject +{ + private static final long serialVersionUID = 1L; + + private static final int XML_TEXTREADER_MODE_INITIAL = 0; + private static final int XML_TEXTREADER_MODE_INTERACTIVE = 1; + private static final int XML_TEXTREADER_MODE_ERROR = 2; + private static final int XML_TEXTREADER_MODE_EOF = 3; + private static final int XML_TEXTREADER_MODE_CLOSED = 4; + private static final int XML_TEXTREADER_MODE_READING = 5; + + List nodeQueue; + private int state; + private int position = 0; + private XMLPullParserConfiguration config; + private boolean continueParsing = true; + + public + XmlReader(Ruby runtime, RubyClass klazz) + { + super(runtime, klazz); + } + + /** + * Create and return a copy of this object. + * + * @return a clone of this object + */ + @Override + public Object + clone() throws CloneNotSupportedException + { + return super.clone(); + } + + public void + init(Ruby runtime) + { + nodeQueue = new LinkedList(); + nodeQueue.add(new ReaderNode.EmptyNode(runtime)); + } + + private void + setInput(ThreadContext context, InputStream in, IRubyObject url, Options options) + { + this.setState(XML_TEXTREADER_MODE_READING); + config = this.createReader(context.getRuntime(), options); + InputSource inputSource = new InputSource(); + ParserContext.setUrl(context, inputSource, url); + XMLInputSource xmlInputSource = new XMLInputSource(inputSource.getPublicId(), + inputSource.getSystemId(), null, in, null); + try { + config.setInputSource(xmlInputSource); + } catch (IOException e) { + throw context.getRuntime().newRuntimeError(e.getMessage()); + } + this.setState(XML_TEXTREADER_MODE_CLOSED); + } + + private void + setState(int state) { this.state = state; } + + @JRubyMethod + public IRubyObject + attribute(ThreadContext context, IRubyObject name) + { + return currentNode().getAttributeByName(name); + } + + @JRubyMethod + public IRubyObject + attribute_at(ThreadContext context, IRubyObject index) + { + return currentNode().getAttributeByIndex(index); + } + + @JRubyMethod + public IRubyObject + attribute_count(ThreadContext context) + { + return currentNode().getAttributeCount(); + } + + @JRubyMethod + public IRubyObject + attribute_nodes(ThreadContext context) + { + context.runtime.getWarnings().warn("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); + return currentNode().getAttributesNodes(); + } + + @JRubyMethod + public IRubyObject + attribute_hash(ThreadContext context) + { + return currentNode().getAttributes(context); + } + + @JRubyMethod(name = "attributes?") + public IRubyObject + attributes_p(ThreadContext context) + { + return currentNode().hasAttributes(); + } + + @JRubyMethod + public IRubyObject + base_uri(ThreadContext context) + { + return currentNode().getXmlBase(); + } + + @JRubyMethod(name = "default?") + public IRubyObject + default_p(ThreadContext context) + { + return currentNode().isDefault(); + } + + @JRubyMethod + public IRubyObject + depth(ThreadContext context) + { + return currentNode().getDepth(); + } + + @JRubyMethod(name = {"empty_element?", "self_closing?"}) + public IRubyObject + empty_element_p(ThreadContext context) + { + ReaderNode readerNode = currentNode(); + ensureNodeClosed(context); + + if (readerNode == null) { return context.getRuntime().getNil(); } + if (!(readerNode instanceof ElementNode)) { context.getRuntime().getFalse(); } + return RubyBoolean.newBoolean(context.getRuntime(), !readerNode.hasChildren); + } + + @JRubyMethod + public IRubyObject + encoding(ThreadContext context) + { + IRubyObject constructor_encoding = getInstanceVariable("@encoding"); + if (!constructor_encoding.isNil()) { + return constructor_encoding; + } + // TODO: get the parser's detected encoding + return context.getRuntime().getNil(); + } + + @JRubyMethod(meta = true, rest = true) + public static IRubyObject + from_io(ThreadContext context, IRubyObject cls, IRubyObject args[]) + { + // Only to pass the source test. + Ruby runtime = context.getRuntime(); + // Not nil allowed! + if (args[0].isNil()) { throw runtime.newArgumentError("io cannot be nil"); } + + XmlReader reader = (XmlReader) NokogiriService.XML_READER_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Reader")); + reader.init(runtime); + reader.setInstanceVariable("@source", args[0]); + reader.setInstanceVariable("@errors", runtime.newArray()); + IRubyObject url = context.nil; + if (args.length > 1) { url = args[1]; } + if (args.length > 2) { reader.setInstanceVariable("@encoding", args[2]); } + + Options options; + if (args.length > 3) { + options = new ParserContext.Options(args[3].toJava(Long.class)); + } else { + // use the default options RECOVER | NONET + options = new ParserContext.Options(2048 | 1); + } + + InputStream in = new IOInputStream(args[0]); + reader.setInput(context, in, url, options); + return reader; + } + + @JRubyMethod(meta = true, rest = true) + public static IRubyObject + from_memory(ThreadContext context, IRubyObject cls, IRubyObject args[]) + { + // args[0]: string, args[1]: url, args[2]: encoding, args[3]: options + Ruby runtime = context.getRuntime(); + // Not nil allowed! + if (args[0].isNil()) { throw runtime.newArgumentError("string cannot be nil"); } + + XmlReader reader = (XmlReader) NokogiriService.XML_READER_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Reader")); + reader.init(runtime); + reader.setInstanceVariable("@source", args[0]); + reader.setInstanceVariable("@errors", runtime.newArray()); + IRubyObject url = context.nil; + if (args.length > 1) { url = args[1]; } + if (args.length > 2) { reader.setInstanceVariable("@encoding", args[2]); } + + Options options; + if (args.length > 3) { + options = new ParserContext.Options(args[3].toJava(Long.class)); + } else { + // use the default options RECOVER | NONET + options = new ParserContext.Options(2048 | 1); + } + IRubyObject stringIO = runtime.getClass("StringIO").newInstance(context, args[0], Block.NULL_BLOCK); + InputStream in = new IOInputStream(stringIO); + reader.setInput(context, in, url, options); + return reader; + } + + @JRubyMethod + public IRubyObject + node_type(ThreadContext context) + { + IRubyObject node_type = currentNode().getNodeType(); + return node_type == null ? RubyFixnum.zero(context.getRuntime()) : node_type; + } + + @JRubyMethod + public IRubyObject + inner_xml(ThreadContext context) + { + ensureNodeClosed(context); + return stringOrBlank(context.getRuntime(), getInnerXml(currentNode())); + } + + private String + getInnerXml(ReaderNode current) + { + if (current.depth < 0) { return null; } + if (!current.hasChildren) { return null; } + StringBuffer sb = new StringBuffer(); + for (int i = current.startOffset + 1; i <= current.endOffset - 1; i++) { + sb.append(nodeQueue.get(i).getString()); + } + return new String(sb); + } + + @JRubyMethod + public IRubyObject + outer_xml(ThreadContext context) + { + ensureNodeClosed(context); + return stringOrBlank(context.getRuntime(), getOuterXml()); + } + + private String + getOuterXml() + { + ReaderNode current = currentNode(); + if (current == null || current.depth < 0) { return null; } + + if (current instanceof ClosingNode) { + return "<" + current.name + "/>"; + } + + StringBuilder sb = new StringBuilder(); + for (int i = position; i <= current.endOffset; i++) { + sb.append(nodeQueue.get(i).getString()); + } + return new String(sb); + } + + @JRubyMethod + public IRubyObject + lang(ThreadContext context) + { + return currentNode().getLang(); + } + + @JRubyMethod + public IRubyObject + local_name(ThreadContext context) + { + return currentNode().getLocalName(); + } + + @JRubyMethod + public IRubyObject + name(ThreadContext context) + { + return currentNode().getName(); + } + + @JRubyMethod + public IRubyObject + namespace_uri(ThreadContext context) + { + return currentNode().getUri(); + } + + @JRubyMethod + public IRubyObject + namespaces(ThreadContext context) + { + return currentNode().getNamespaces(context); + } + + @JRubyMethod + public IRubyObject + prefix(ThreadContext context) + { + return currentNode().getPrefix(); + } + + private void + readMoreData(ThreadContext context) + { + if (!continueParsing) { throw context.runtime.newRuntimeError("Cannot parse more data"); } + try { + continueParsing = config.parse(false); + } catch (XNIException e) { + throw XmlSyntaxError.createXMLSyntaxError(context.runtime, e).toThrowable(); // Nokogiri::XML::SyntaxError + } catch (IOException e) { + throw context.runtime.newRuntimeError(e.toString()); + } + } + + private void + ensureNodeClosed(ThreadContext context) + { + ReaderNode node = currentNode(); + if (node instanceof TextNode) { return; } + while (node.endOffset < 1) { readMoreData(context); } + } + + @JRubyMethod + public IRubyObject + read(ThreadContext context) + { + position++; + try { + while (nodeQueue.size() <= position && continueParsing) { + readMoreData(context); + } + return setAndRaiseErrorsIfAny(context.runtime, null); + } catch (RaiseException ex) { + return setAndRaiseErrorsIfAny(context.runtime, ex); + } + } + + private IRubyObject + setAndRaiseErrorsIfAny(final Ruby runtime, final RaiseException ex) throws RaiseException + { + final ReaderNode currentNode = currentNode(); + if (currentNode == null) { return runtime.getNil(); } + if (currentNode.isError()) { + RubyArray errors = (RubyArray) getInstanceVariable("@errors"); + IRubyObject error = currentNode.toSyntaxError(); + errors.append(error); + setInstanceVariable("@errors", errors); + + throw ex != null ? ex : ((XmlSyntaxError) error).toThrowable(); + } + if (ex != null) { throw ex; } + return this; + } + + private ReaderNode + currentNode() + { + if (position >= nodeQueue.size()) { return null; } + return nodeQueue.get(position); + } + + @JRubyMethod + public IRubyObject + state(ThreadContext context) + { + return context.getRuntime().newFixnum(this.state); + } + + @JRubyMethod + public IRubyObject + value(ThreadContext context) + { + return currentNode().getValue(); + } + + @JRubyMethod(name = "value?") + public IRubyObject + value_p(ThreadContext context) + { + return currentNode().hasValue(); + } + + @JRubyMethod + public IRubyObject + xml_version(ThreadContext context) + { + return currentNode().getXmlVersion(); + } + + protected XMLPullParserConfiguration + createReader(Ruby ruby, Options options) + { + StandardParserConfiguration config = new StandardParserConfiguration(); + DocumentHandler handler = new DocumentHandler(ruby); + // XMLReader reader = XMLReaderFactory.createXMLReader(); + config.setDocumentHandler(handler); + config.setDTDHandler(handler); + config.setErrorHandler(handler); + config.setEntityResolver(new EntityResolver2Wrapper(new NokogiriEntityResolver(ruby, null, options))); + // config.setFeature("http://xml.org/sax/features/xmlns-uris", true); + // config.setFeature("http://xml.org/sax/features/namespace-prefixes", true); + config.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", options.dtdLoad + || options.dtdValid); + return config; + } + + private class DocumentHandler extends DefaultXMLDocumentHandler implements XMLErrorHandler + { + + Stack langStack; + int depth; + Stack xmlBaseStack; + Stack elementStack; + private final Ruby ruby; + + public + DocumentHandler(Ruby ruby) + { + this.ruby = ruby; } - /** - * Create and return a copy of this object. - * - * @return a clone of this object - */ @Override - public Object clone() throws CloneNotSupportedException { - return super.clone(); - } - - public void init(Ruby runtime) { - nodeQueue = new LinkedList(); - nodeQueue.add(new ReaderNode.EmptyNode(runtime)); - } - - private void setInput(ThreadContext context, InputStream in, IRubyObject url, Options options){ - this.setState(XML_TEXTREADER_MODE_READING); - config = this.createReader(context.getRuntime(), options); - InputSource inputSource = new InputSource(); - ParserContext.setUrl(context, inputSource, url); - XMLInputSource xmlInputSource = new XMLInputSource(inputSource.getPublicId(), - inputSource.getSystemId(), null, in, null); - try { - config.setInputSource(xmlInputSource); - } catch (IOException e) { - throw context.getRuntime().newRuntimeError(e.getMessage()); - } - this.setState(XML_TEXTREADER_MODE_CLOSED); - } - - private void setState(int state) { this.state = state; } - - @JRubyMethod - public IRubyObject attribute(ThreadContext context, IRubyObject name) { - return currentNode().getAttributeByName(name); - } - - @JRubyMethod - public IRubyObject attribute_at(ThreadContext context, IRubyObject index) { - return currentNode().getAttributeByIndex(index); - } - - @JRubyMethod - public IRubyObject attribute_count(ThreadContext context) { - return currentNode().getAttributeCount(); - } - - @JRubyMethod - public IRubyObject attribute_nodes(ThreadContext context) { - return currentNode().getAttributesNodes(); + public void + startGeneralEntity(String name, XMLResourceIdentifier identifier, + String encoding, Augmentations augs) throws XNIException + { + Object entitySkipped; + if (augs != null && (entitySkipped = augs.getItem(Constants.ENTITY_SKIPPED)) != null && ((Boolean) entitySkipped)) { + nodeQueue.add(new ReaderNode.ExceptionNode(ruby, null)); + } } - @JRubyMethod - public IRubyObject attr_nodes(ThreadContext context) { - return currentNode().getAttributesNodes(); - } - - @JRubyMethod(name = "attributes?") - public IRubyObject attributes_p(ThreadContext context) { - return currentNode().hasAttributes(); - } - @JRubyMethod - public IRubyObject base_uri(ThreadContext context) { - return currentNode().getXmlBase(); - } - - @JRubyMethod(name="default?") - public IRubyObject default_p(ThreadContext context){ - return currentNode().isDefault(); - } - - @JRubyMethod - public IRubyObject depth(ThreadContext context) { - return currentNode().getDepth(); - } - @JRubyMethod(name = {"empty_element?", "self_closing?"}) - public IRubyObject empty_element_p(ThreadContext context) { - ReaderNode readerNode = currentNode(); - ensureNodeClosed(context); - - if (readerNode == null) return context.getRuntime().getNil(); - if (!(readerNode instanceof ElementNode)) context.getRuntime().getFalse(); - return RubyBoolean.newBoolean(context.getRuntime(), !readerNode.hasChildren); - } - - @JRubyMethod(meta = true, rest = true) - public static IRubyObject from_io(ThreadContext context, IRubyObject cls, IRubyObject args[]) { - // Only to pass the source test. - Ruby runtime = context.getRuntime(); - // Not nil allowed! - if(args[0].isNil()) throw runtime.newArgumentError("io cannot be nil"); - - XmlReader reader = (XmlReader) NokogiriService.XML_READER_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Reader")); - reader.init(runtime); - reader.setInstanceVariable("@source", args[0]); - reader.setInstanceVariable("@errors", runtime.newArray()); - IRubyObject url = context.nil; - if (args.length > 1) url = args[1]; - if (args.length > 2) reader.setInstanceVariable("@encoding", args[2]); - - Options options; - if (args.length > 3) { - options = new ParserContext.Options((Long)args[3].toJava(Long.class)); - } else { - // use the default options RECOVER | NONET - options = new ParserContext.Options(2048 | 1); - } - - InputStream in = new UncloseableInputStream(new IOInputStream(args[0])); - reader.setInput(context, in, url, options); - return reader; - } - - @JRubyMethod(meta = true, rest = true) - public static IRubyObject from_memory(ThreadContext context, IRubyObject cls, IRubyObject args[]) { - // args[0]: string, args[1]: url, args[2]: encoding, args[3]: options - Ruby runtime = context.getRuntime(); - // Not nil allowed! - if(args[0].isNil()) throw runtime.newArgumentError("string cannot be nil"); - - XmlReader reader = (XmlReader) NokogiriService.XML_READER_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Reader")); - reader.init(runtime); - reader.setInstanceVariable("@source", args[0]); - reader.setInstanceVariable("@errors", runtime.newArray()); - IRubyObject url = context.nil; - if (args.length > 1) url = args[1]; - if (args.length > 2) reader.setInstanceVariable("@encoding", args[2]); - - Options options; - if (args.length > 3) { - options = new ParserContext.Options((Long)args[3].toJava(Long.class)); - } else { - // use the default options RECOVER | NONET - options = new ParserContext.Options(2048 | 1); - } - IRubyObject stringIO = runtime.getClass("StringIO").newInstance(context, args[0], Block.NULL_BLOCK); - InputStream in = new UncloseableInputStream(new IOInputStream(stringIO)); - reader.setInput(context, in, url, options); - return reader; - } - - @JRubyMethod - public IRubyObject node_type(ThreadContext context) { - IRubyObject node_type = currentNode().getNodeType(); - return node_type == null ? RubyFixnum.zero(context.getRuntime()) : node_type; - } - - @JRubyMethod - public IRubyObject inner_xml(ThreadContext context) { - ensureNodeClosed(context); - return stringOrBlank(context.getRuntime(), getInnerXml(currentNode())); - } - - private String getInnerXml(ReaderNode current) { - if (current.depth < 0) return null; - if (!current.hasChildren) return null; - StringBuffer sb = new StringBuffer(); - for (int i = current.startOffset + 1; i <= current.endOffset - 1; i++) { - sb.append(nodeQueue.get(i).getString()); - } - return new String(sb); - } - - @JRubyMethod - public IRubyObject outer_xml(ThreadContext context) { - ensureNodeClosed(context); - return stringOrBlank(context.getRuntime(), getOuterXml()); - } - - private String getOuterXml() { - ReaderNode current = currentNode(); - if (current == null || current.depth < 0) return null; - - if (current instanceof ClosingNode) { - return "<" + current.name + "/>"; - } - - StringBuilder sb = new StringBuilder(); - for (int i = position; i <= current.endOffset; i++) { - sb.append(nodeQueue.get(i).getString()); - } - return new String(sb); - } - - @JRubyMethod - public IRubyObject lang(ThreadContext context) { - return currentNode().getLang(); - } - - @JRubyMethod - public IRubyObject local_name(ThreadContext context) { - return currentNode().getLocalName(); - } - - @JRubyMethod - public IRubyObject name(ThreadContext context) { - return currentNode().getName(); - } - - @JRubyMethod - public IRubyObject namespace_uri(ThreadContext context) { - return currentNode().getUri(); - } - - @JRubyMethod - public IRubyObject namespaces(ThreadContext context) { - return currentNode().getNamespaces(context); - } - - @JRubyMethod - public IRubyObject prefix(ThreadContext context) { - return currentNode().getPrefix(); - } - - private void readMoreData(ThreadContext context) { - if (!continueParsing) throw context.runtime.newRuntimeError("Cannot parse more data"); - try { - continueParsing = config.parse(false); - } - catch (XNIException e) { - throw new RaiseException(XmlSyntaxError.createXMLSyntaxError(context.runtime, e)); // Nokogiri::XML::SyntaxError - } - catch (IOException e) { - throw context.runtime.newRuntimeError(e.toString()); - } - } - - private void ensureNodeClosed(ThreadContext context) { - ReaderNode node = currentNode(); - if (node instanceof TextNode) return; - while (node.endOffset < 1) readMoreData(context); - } - - @JRubyMethod - public IRubyObject read(ThreadContext context) { - position++; - try { - while (nodeQueue.size() <= position && continueParsing) { - readMoreData(context); - } - return setAndRaiseErrorsIfAny(context.runtime, null); - } - catch (RaiseException ex) { - return setAndRaiseErrorsIfAny(context.runtime, ex); - } + @Override + public void + startDocument(XMLLocator locator, String encoding, NamespaceContext context, Augmentations augs) + { + depth = 0; + langStack = new Stack(); + xmlBaseStack = new Stack(); + elementStack = new Stack(); } - private IRubyObject setAndRaiseErrorsIfAny(final Ruby runtime, final RaiseException ex) throws RaiseException { - final ReaderNode currentNode = currentNode(); - if (currentNode == null) return runtime.getNil(); - if (currentNode.isError()) { - RubyArray errors = (RubyArray) getInstanceVariable("@errors"); - IRubyObject error = currentNode.toSyntaxError(); - errors.append(error); - setInstanceVariable("@errors", errors); - - throw ex != null ? ex : new RaiseException((XmlSyntaxError) error); - } - if ( ex != null ) throw ex; - return this; + @Override + public void + endDocument(Augmentations augs) + { + langStack = null; + xmlBaseStack = null; + elementStack = null; } - private ReaderNode currentNode() { - if (position >= nodeQueue.size()) return null; - return nodeQueue.get(position); + @Override + public void + startElement(QName element, XMLAttributes attrs, Augmentations augs) + { + commonElement(element, attrs, false); } - @JRubyMethod - public IRubyObject state(ThreadContext context) { - return context.getRuntime().newFixnum(this.state); + @Override + public void + endElement(QName element, Augmentations augs) + { + String uri = element.uri; + String localName = element.localpart; + String qName = element.rawname; + depth--; + ElementNode startElementNode = elementStack.pop(); + ReaderNode node = ReaderNode.createClosingNode(ruby, uri, localName, qName, depth, langStack, xmlBaseStack); + + startElementNode.endOffset = nodeQueue.size() - 1; + + if (startElementNode.endOffset != startElementNode.startOffset) { + // this node isn't empty + node.attributeList = startElementNode.attributeList; + node.namespaces = startElementNode.namespaces; + node.startOffset = startElementNode.startOffset; + node.endOffset = ++startElementNode.endOffset; + node.hasChildren = startElementNode.hasChildren = true; + nodeQueue.add(node); + } + if (!langStack.isEmpty()) { langStack.pop(); } + if (!xmlBaseStack.isEmpty()) { xmlBaseStack.pop(); } } - @JRubyMethod - public IRubyObject value(ThreadContext context) { - return currentNode().getValue(); + @Override + public void + emptyElement(QName element, XMLAttributes attrs, Augmentations augs) + { + commonElement(element, attrs, true); + } + + private void + commonElement(QName element, XMLAttributes attrs, boolean isEmpty) + { + String qName = element.rawname; + String uri = element.uri; + String localName = element.localpart; + ReaderNode readerNode = ReaderNode.createElementNode(ruby, uri, localName, qName, attrs, depth, langStack, + xmlBaseStack); + if (!elementStack.isEmpty()) { + ElementNode parent = elementStack.peek(); + parent.hasChildren = true; + } + nodeQueue.add(readerNode); + readerNode.startOffset = nodeQueue.size() - 1; + if (!isEmpty) { + depth++; + if (readerNode.lang != null) { langStack.push(readerNode.lang); } + if (readerNode.xmlBase != null) { xmlBaseStack.push(readerNode.xmlBase); } + elementStack.push((ReaderNode.ElementNode)readerNode); + } else { + readerNode.endOffset = readerNode.startOffset; + readerNode.hasChildren = false; + } } - @JRubyMethod(name = "value?") - public IRubyObject value_p(ThreadContext context) { - return currentNode().hasValue(); + @Override + public void + characters(XMLString string, Augmentations augs) + { + ReaderNode.TextNode node = ReaderNode.createTextNode(ruby, string.toString(), depth, langStack, xmlBaseStack); + nodeQueue.add(node); + node.startOffset = node.endOffset = nodeQueue.size() - 1; } - @JRubyMethod - public IRubyObject xml_version(ThreadContext context) { - return currentNode().getXmlVersion(); + @Override + public void + error(String domain, String key, XMLParseException ex) + { + nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex)); + throw ex; } - protected XMLPullParserConfiguration createReader(Ruby ruby, Options options) { - StandardParserConfiguration config = new StandardParserConfiguration(); - DocumentHandler handler = new DocumentHandler(ruby); - // XMLReader reader = XMLReaderFactory.createXMLReader(); - config.setDocumentHandler(handler); - config.setDTDHandler(handler); - config.setErrorHandler(handler); - config.setEntityResolver(new EntityResolver2Wrapper(new NokogiriEntityResolver(ruby, null, options))); - // config.setFeature("http://xml.org/sax/features/xmlns-uris", true); - // config.setFeature("http://xml.org/sax/features/namespace-prefixes", true); - config.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", options.dtdLoad || options.dtdValid); - return config; + @Override + public void + fatalError(String domain, String key, XMLParseException ex) + { + nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex)); + throw ex; } - private class DocumentHandler extends DefaultXMLDocumentHandler implements XMLErrorHandler { - - Stack langStack; - int depth; - Stack xmlBaseStack; - Stack elementStack; - private final Ruby ruby; - - public DocumentHandler(Ruby ruby) { - this.ruby = ruby; - } - - @Override - public void startGeneralEntity(String name, XMLResourceIdentifier identifier, - String encoding, Augmentations augs) throws XNIException { - Object entitySkipped; - if (augs != null && (entitySkipped = augs.getItem(Constants.ENTITY_SKIPPED)) != null && ((Boolean) entitySkipped)) { - nodeQueue.add(new ReaderNode.ExceptionNode(ruby, null)); - } - } - - - - @Override - public void startDocument(XMLLocator locator, String encoding, NamespaceContext context, Augmentations augs) { - depth = 0; - langStack = new Stack(); - xmlBaseStack = new Stack(); - elementStack = new Stack(); - } - - @Override - public void endDocument(Augmentations augs) { - langStack = null; - xmlBaseStack = null; - elementStack = null; - } - - @Override - public void startElement(QName element, XMLAttributes attrs, Augmentations augs) { - commonElement(element, attrs, false); - } - - @Override - public void endElement(QName element, Augmentations augs) { - String uri = element.uri; - String localName = element.localpart; - String qName = element.rawname; - depth--; - ElementNode startElementNode = elementStack.pop(); - ReaderNode node = ReaderNode.createClosingNode(ruby, uri, localName, qName, depth, langStack, xmlBaseStack); - - startElementNode.endOffset = nodeQueue.size() - 1; - - if (startElementNode.endOffset != startElementNode.startOffset) { - // this node isn't empty - node.attributeList = startElementNode.attributeList; - node.namespaces = startElementNode.namespaces; - node.startOffset = startElementNode.startOffset; - node.endOffset = ++startElementNode.endOffset; - node.hasChildren = startElementNode.hasChildren = true; - nodeQueue.add(node); - } - if (!langStack.isEmpty()) langStack.pop(); - if (!xmlBaseStack.isEmpty()) xmlBaseStack.pop(); - } - - @Override - public void emptyElement(QName element, XMLAttributes attrs, Augmentations augs) { - commonElement(element, attrs, true); - } - - private void commonElement(QName element, XMLAttributes attrs, boolean isEmpty) { - String qName = element.rawname; - String uri = element.uri; - String localName = element.localpart; - ReaderNode readerNode = ReaderNode.createElementNode(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack); - if (!elementStack.isEmpty()) { - ElementNode parent = elementStack.peek(); - parent.hasChildren = true; - } - nodeQueue.add(readerNode); - readerNode.startOffset = nodeQueue.size() - 1; - if (!isEmpty) { - depth++; - if (readerNode.lang != null) langStack.push(readerNode.lang); - if (readerNode.xmlBase != null) xmlBaseStack.push(readerNode.xmlBase); - elementStack.push((ReaderNode.ElementNode)readerNode); - } else { - readerNode.endOffset = readerNode.startOffset; - readerNode.hasChildren = false; - } - } - - @Override - public void characters(XMLString string, Augmentations augs) { - ReaderNode.TextNode node = ReaderNode.createTextNode(ruby, string.toString(), depth, langStack, xmlBaseStack); - nodeQueue.add(node); - node.startOffset = node.endOffset = nodeQueue.size() - 1; - } - - @Override - public void error(String domain, String key, XMLParseException ex) { - nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex)); - throw ex; - } - - @Override - public void fatalError(String domain, String key, XMLParseException ex) { - nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex)); - throw ex; - } - - @Override - public void warning(String domain, String key, XMLParseException ex) { - nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex)); - throw ex; - } + @Override + public void + warning(String domain, String key, XMLParseException ex) + { + nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex)); + throw ex; } + } } diff --git a/ext/java/nokogiri/XmlRelaxng.java b/ext/java/nokogiri/XmlRelaxng.java index 4b0d73f4b9..eee9113a65 100644 --- a/ext/java/nokogiri/XmlRelaxng.java +++ b/ext/java/nokogiri/XmlRelaxng.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; @@ -56,89 +24,110 @@ import org.jruby.RubyClass; import org.jruby.anno.JRubyClass; import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; /** * Class for Nokogiri::XML::RelaxNG - * + * * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::RelaxNG", parent="Nokogiri::XML::Schema") -public class XmlRelaxng extends XmlSchema { - private Verifier verifier; +@JRubyClass(name = "Nokogiri::XML::RelaxNG", parent = "Nokogiri::XML::Schema") +public class XmlRelaxng extends XmlSchema +{ + private static final long serialVersionUID = 1L; + private Verifier verifier; - public XmlRelaxng(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); - } - - private void setVerifier(Verifier verifier) { - this.verifier = verifier; - } - - static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source) { - Ruby runtime = context.getRuntime(); - XmlRelaxng xmlRelaxng = (XmlRelaxng) NokogiriService.XML_RELAXNG_ALLOCATOR.allocate(runtime, klazz); - xmlRelaxng.setInstanceVariable("@errors", runtime.newEmptyArray()); - - try { - Schema schema = xmlRelaxng.getSchema(source, context); - xmlRelaxng.setVerifier(schema.newVerifier()); - return xmlRelaxng; - } catch (VerifierConfigurationException ex) { - throw context.getRuntime().newRuntimeError("Could not parse document: " + ex.getMessage()); - } + public + XmlRelaxng(Ruby ruby, RubyClass klazz) + { + super(ruby, klazz); + } + + private void + setVerifier(Verifier verifier) + { + this.verifier = verifier; + } + + static XmlSchema + createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) + { + Ruby runtime = context.getRuntime(); + XmlRelaxng xmlRelaxng = (XmlRelaxng) NokogiriService.XML_RELAXNG_ALLOCATOR.allocate(runtime, klazz); + + if (parseOptions == null) { + parseOptions = defaultParseOptions(context.getRuntime()); } - private Schema getSchema(Source source, ThreadContext context) { - InputStream is; - VerifierFactory factory = new com.thaiopensource.relaxng.jarv.VerifierFactoryImpl(); - if (source instanceof StreamSource) { - StreamSource ss = (StreamSource)source; - is = ss.getInputStream(); - } else { //if (this.source instanceof DOMSource) - DOMSource ds = (DOMSource)source; - StringWriter xmlAsWriter = new StringWriter(); - StreamResult result = new StreamResult(xmlAsWriter); - try { - TransformerFactory.newInstance().newTransformer().transform(ds, result); - } catch (TransformerConfigurationException ex) { - throw context.getRuntime() - .newRuntimeError("Could not parse document: "+ex.getMessage()); - } catch (TransformerException ex) { - throw context.getRuntime() - .newRuntimeError("Could not parse document: "+ex.getMessage()); - } - try { - is = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8")); - } catch (UnsupportedEncodingException ex) { - throw context.getRuntime() - .newRuntimeError("Could not parse document: "+ex.getMessage()); - } - } + xmlRelaxng.setInstanceVariable("@errors", runtime.newEmptyArray()); + xmlRelaxng.setInstanceVariable("@parse_options", parseOptions); - try { - return factory.compileSchema(is); - } catch (VerifierConfigurationException ex) { - throw context.getRuntime() - .newRuntimeError("Could not parse document: "+ex.getMessage()); - } catch (SAXException ex) { - throw context.getRuntime() - .newRuntimeError("Could not parse document: "+ex.getMessage()); - } catch (IOException ex) { - throw context.getRuntime().newIOError(ex.getMessage()); - } + try { + Schema schema = xmlRelaxng.getSchema(source, context); + xmlRelaxng.setVerifier(schema.newVerifier()); + return xmlRelaxng; + } catch (VerifierConfigurationException ex) { + throw context.getRuntime().newRuntimeError("Could not parse document: " + ex.getMessage()); } - - @Override - protected void setErrorHandler(ErrorHandler errorHandler) { - verifier.setErrorHandler(errorHandler); + } + + private Schema + getSchema(Source source, ThreadContext context) + { + InputStream is; + VerifierFactory factory = new com.thaiopensource.relaxng.jarv.VerifierFactoryImpl(); + if (source instanceof StreamSource) { + StreamSource ss = (StreamSource)source; + is = ss.getInputStream(); + } else { //if (this.source instanceof DOMSource) + DOMSource ds = (DOMSource)source; + StringWriter xmlAsWriter = new StringWriter(); + StreamResult result = new StreamResult(xmlAsWriter); + try { + TransformerFactory.newInstance().newTransformer().transform(ds, result); + } catch (TransformerConfigurationException ex) { + throw context.getRuntime() + .newRuntimeError("Could not parse document: " + ex.getMessage()); + } catch (TransformerException ex) { + throw context.getRuntime() + .newRuntimeError("Could not parse document: " + ex.getMessage()); + } + try { + is = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8")); + } catch (UnsupportedEncodingException ex) { + throw context.getRuntime() + .newRuntimeError("Could not parse document: " + ex.getMessage()); + } } - - @Override - protected void validate(Document document) throws SAXException, IOException { - verifier.verify(document); + + try { + return factory.compileSchema(is); + } catch (VerifierConfigurationException ex) { + throw context.getRuntime() + .newRuntimeError("Could not parse document: " + ex.getMessage()); + } catch (SAXException ex) { + throw context.getRuntime() + .newRuntimeError("Could not parse document: " + ex.getMessage()); + } catch (IOException ex) { + throw context.getRuntime().newIOError(ex.getClass() + ": " + ex.getMessage()); } + } + + @Override + protected void + setErrorHandler(ErrorHandler errorHandler) + { + verifier.setErrorHandler(errorHandler); + } + + @Override + protected void + validate(Document document) throws SAXException, IOException + { + verifier.verify(document); + } } diff --git a/ext/java/nokogiri/XmlSaxParserContext.java b/ext/java/nokogiri/XmlSaxParserContext.java index 5537619022..53ea7383b0 100644 --- a/ext/java/nokogiri/XmlSaxParserContext.java +++ b/ext/java/nokogiri/XmlSaxParserContext.java @@ -1,369 +1,329 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.IOException; -import java.io.InputStream; - -import nokogiri.internals.NokogiriHandler; -import nokogiri.internals.NokogiriHelpers; -import nokogiri.internals.ParserContext; -import nokogiri.internals.XmlSaxParser; - +import nokogiri.internals.*; import org.apache.xerces.parsers.AbstractSAXParser; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyFixnum; -import org.jruby.RubyModule; -import org.jruby.RubyObjectAdapter; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; -import org.jruby.javasupport.JavaEmbedUtils; +import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import org.xml.sax.ContentHandler; -import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; -import org.xml.sax.SAXNotRecognizedException; -import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; +import java.io.IOException; +import java.io.InputStream; + +import static org.jruby.runtime.Helpers.invoke; + /** * Base class for the SAX parsers. * * @author Patrick Mahoney * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::SAX::ParserContext") -public class XmlSaxParserContext extends ParserContext { - - protected static final String FEATURE_NAMESPACES = - "http://xml.org/sax/features/namespaces"; - protected static final String FEATURE_NAMESPACE_PREFIXES = - "http://xml.org/sax/features/namespace-prefixes"; - protected static final String FEATURE_LOAD_EXTERNAL_DTD = - "http://apache.org/xml/features/nonvalidating/load-external-dtd"; - protected static final String FEATURE_CONTINUE_AFTER_FATAL_ERROR = - "http://apache.org/xml/features/continue-after-fatal-error"; - - protected AbstractSAXParser parser; - - protected NokogiriHandler handler; - private boolean replaceEntities = true; - private boolean recovery = false; - - public XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass) { - super(ruby, rubyClass); - } - - protected void initialize(Ruby runtime) { - try { - parser = createParser(); - } - catch (SAXException se) { - throw RaiseException.createNativeRaiseException(runtime, se); - } - } - - /** - * Create and return a copy of this object. - * - * @return a clone of this object - */ - @Override - public Object clone() throws CloneNotSupportedException { - return super.clone(); - } - - protected AbstractSAXParser createParser() throws SAXException { - XmlSaxParser parser = new XmlSaxParser(); - parser.setFeature(FEATURE_NAMESPACE_PREFIXES, true); - parser.setFeature(FEATURE_LOAD_EXTERNAL_DTD, false); - return parser; - } - - /** - * Create a new parser context that will parse the string - * data. - */ - @JRubyMethod(name="memory", meta=true) - public static IRubyObject parse_memory(ThreadContext context, - IRubyObject klazz, - IRubyObject data) { - final Ruby runtime = context.runtime; - XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz); - ctx.initialize(runtime); - ctx.setInputSource(context, data, runtime.getNil()); - return ctx; - } - - /** - * Create a new parser context that will read from the file - * data and parse. - */ - @JRubyMethod(name="file", meta=true) - public static IRubyObject parse_file(ThreadContext context, - IRubyObject klazz, - IRubyObject data) { - final Ruby runtime = context.runtime; - XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz); - ctx.initialize(context.getRuntime()); - ctx.setInputSourceFile(context, data); - return ctx; - } - - /** - * Create a new parser context that will read from the IO or - * StringIO data and parse. - * - * TODO: Currently ignores encoding enc. - */ - @JRubyMethod(name="io", meta=true) - public static IRubyObject parse_io(ThreadContext context, - IRubyObject klazz, - IRubyObject data, - IRubyObject enc) { - //int encoding = (int)enc.convertToInteger().getLongValue(); - final Ruby runtime = context.runtime; - XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz); - ctx.initialize(runtime); - ctx.setInputSource(context, data, runtime.getNil()); - return ctx; - } - - /** - * Create a new parser context that will read from a raw input stream. - * Meant to be run in a separate thread by XmlSaxPushParser. - */ - static XmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream) { - XmlSaxParserContext ctx = newInstance(runtime, klazz); - ctx.initialize(runtime); - ctx.setInputSource(stream); - return ctx; - } - - private static XmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) { - return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz); - } - - /** - * Set a property of the underlying parser. - */ - protected void setProperty(String key, Object val) - throws SAXNotRecognizedException, SAXNotSupportedException { - parser.setProperty(key, val); +@JRubyClass(name = "Nokogiri::XML::SAX::ParserContext") +public class XmlSaxParserContext extends ParserContext +{ + private static final long serialVersionUID = 1L; + + protected static final String FEATURE_NAMESPACES = + "http://xml.org/sax/features/namespaces"; + protected static final String FEATURE_NAMESPACE_PREFIXES = + "http://xml.org/sax/features/namespace-prefixes"; + protected static final String FEATURE_LOAD_EXTERNAL_DTD = + "http://apache.org/xml/features/nonvalidating/load-external-dtd"; + protected static final String FEATURE_CONTINUE_AFTER_FATAL_ERROR = + "http://apache.org/xml/features/continue-after-fatal-error"; + + protected AbstractSAXParser parser; + + protected NokogiriHandler handler; + protected NokogiriErrorHandler errorHandler; + private boolean replaceEntities = true; + private boolean recovery = false; + + public + XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass) + { + super(ruby, rubyClass); + } + + protected void + initialize(Ruby runtime) + { + try { + parser = createParser(); + } catch (SAXException se) { + // Unexpected failure in XML subsystem + RaiseException ex = runtime.newRuntimeError(se.toString()); + ex.initCause(se); + throw ex; } - - protected void setContentHandler(ContentHandler handler) { - parser.setContentHandler(handler); + } + + /** + * Create and return a copy of this object. + * + * @return a clone of this object + */ + @Override + public Object + clone() throws CloneNotSupportedException + { + return super.clone(); + } + + protected AbstractSAXParser + createParser() throws SAXException + { + XmlSaxParser parser = new XmlSaxParser(); + parser.setFeature(FEATURE_NAMESPACE_PREFIXES, true); + parser.setFeature(FEATURE_LOAD_EXTERNAL_DTD, false); + return parser; + } + + /** + * Create a new parser context that will parse the string + * data. + */ + @JRubyMethod(name = "memory", meta = true) + public static IRubyObject + parse_memory(ThreadContext context, + IRubyObject klazz, + IRubyObject data) + { + final Ruby runtime = context.runtime; + XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz); + ctx.initialize(runtime); + ctx.setStringInputSource(context, data, runtime.getNil()); + return ctx; + } + + /** + * Create a new parser context that will read from the file + * data and parse. + */ + @JRubyMethod(name = "file", meta = true) + public static IRubyObject + parse_file(ThreadContext context, + IRubyObject klazz, + IRubyObject data) + { + final Ruby runtime = context.runtime; + XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz); + ctx.initialize(context.getRuntime()); + ctx.setInputSourceFile(context, data); + return ctx; + } + + /** + * Create a new parser context that will read from the IO or + * StringIO data and parse. + * + * TODO: Currently ignores encoding enc. + */ + @JRubyMethod(name = "io", meta = true) + public static IRubyObject + parse_io(ThreadContext context, + IRubyObject klazz, + IRubyObject data, + IRubyObject encoding) + { + // check the type of the unused encoding to match behavior of CRuby + if (!(encoding instanceof RubyFixnum)) { + throw context.getRuntime().newTypeError("encoding must be kind_of String"); } - - protected void setErrorHandler(ErrorHandler handler) { - parser.setErrorHandler(handler); + final Ruby runtime = context.runtime; + XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz); + ctx.initialize(runtime); + ctx.setIOInputSource(context, data, runtime.getNil()); + return ctx; + } + + /** + * Create a new parser context that will read from a raw input stream. + * Meant to be run in a separate thread by XmlSaxPushParser. + */ + static XmlSaxParserContext + parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream) + { + XmlSaxParserContext ctx = newInstance(runtime, klazz); + ctx.initialize(runtime); + ctx.setInputSource(stream); + return ctx; + } + + private static XmlSaxParserContext + newInstance(final Ruby runtime, final RubyClass klazz) + { + return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz); + } + + public final NokogiriHandler + getNokogiriHandler() { return handler; } + + public final NokogiriErrorHandler + getNokogiriErrorHandler() { return errorHandler; } + + /** + * Perform any initialization prior to parsing with the handler + * handlerRuby. Convenience hook for subclasses. + */ + protected void + preParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) + { + ((XmlSaxParser) parser).setXmlDeclHandler(handler); + if (recovery) { + try { + parser.setFeature(FEATURE_CONTINUE_AFTER_FATAL_ERROR, true); + } catch (Exception e) { + // Unexpected failure in XML subsystem + throw runtime.newRuntimeError(e.getMessage()); + } } - - public final NokogiriHandler getNokogiriHandler() { return handler; } - - /** - * Perform any initialization prior to parsing with the handler - * handlerRuby. Convenience hook for subclasses. - */ - protected void preParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) { - ((XmlSaxParser) parser).setXmlDeclHandler(handler); - if (recovery) { - try { - parser.setFeature(FEATURE_CONTINUE_AFTER_FATAL_ERROR, true); - } - catch (Exception e) { - throw RaiseException.createNativeRaiseException(runtime, e); - } - } - } - - protected void postParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) { - // noop - } - - protected void do_parse() throws SAXException, IOException { - parser.parse(getInputSource()); + } + + protected void + postParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) + { + // noop + } + + protected void + do_parse() throws SAXException, IOException + { + parser.parse(getInputSource()); + } + + protected static Options + defaultParseOptions(ThreadContext context) + { + return new ParserContext.Options( + RubyFixnum.fix2long(Helpers.invoke(context, + ((RubyClass)context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions")) + .getConstant("DEFAULT_XML"), + "to_i")) + ); + } + + @JRubyMethod + public IRubyObject + parse_with(ThreadContext context, IRubyObject handlerRuby) + { + final Ruby runtime = context.getRuntime(); + + if (!invoke(context, handlerRuby, "respond_to?", runtime.newSymbol("document")).isTrue()) { + throw runtime.newArgumentError("argument must respond_to document"); } - @JRubyMethod - public IRubyObject parse_with(ThreadContext context, IRubyObject handlerRuby) { - final Ruby runtime = context.getRuntime(); + /* TODO: how should we pass in parse options? */ + ParserContext.Options options = defaultParseOptions(context); - if(!invoke(context, handlerRuby, "respond_to?", runtime.newSymbol("document")).isTrue()) { - throw runtime.newArgumentError("argument must respond_to document"); - } - - NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby); - preParse(runtime, handlerRuby, handler); - - setContentHandler(handler); - setErrorHandler(handler); + errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning); + handler = new NokogiriHandler(runtime, handlerRuby, errorHandler); - try{ - setProperty("http://xml.org/sax/properties/lexical-handler", handler); - } - catch (Exception ex) { - throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString()); - } - - try{ - try { - do_parse(); - } - catch (SAXParseException ex) { - // A bad document () should call the - // error handler instead of raising a SAX exception. - - // However, an EMPTY document should raise a RuntimeError. - // This is a bit kludgy, but AFAIK SAX doesn't distinguish - // between empty and bad whereas Nokogiri does. - String message = ex.getMessage(); - if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) { - throw runtime.newRuntimeError("couldn't parse document: " + message); - } - handler.error(ex); - } - } - catch (SAXException ex) { - throw RaiseException.createNativeRaiseException(runtime, ex); - } - catch (IOException ex) { - throw runtime.newIOErrorFromException(ex); - } + preParse(runtime, handlerRuby, handler); + parser.setContentHandler(handler); + parser.setErrorHandler(handler); + parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options)); - postParse(runtime, handlerRuby, handler); - - //maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby); - - return runtime.getNil(); + try { + parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler); + } catch (Exception ex) { + throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString()); } - /** - * Can take a boolean assignment. - * - * @param context - * @param value - * @return - */ - @JRubyMethod(name = "replace_entities=") - public IRubyObject set_replace_entities(ThreadContext context, IRubyObject value) { - replaceEntities = value.isTrue(); - return this; - } - - @JRubyMethod(name="replace_entities") - public IRubyObject get_replace_entities(ThreadContext context) { - return context.runtime.newBoolean(replaceEntities); - } - - /** - * Can take a boolean assignment. - * - * @param context - * @param value - * @return - */ - @JRubyMethod(name = "recovery=") - public IRubyObject set_recovery(ThreadContext context, IRubyObject value) { - recovery = value.isTrue(); - return this; - } - - @JRubyMethod(name="recovery") - public IRubyObject get_recovery(ThreadContext context) { - return context.runtime.newBoolean(recovery); - } - - /** - * If the handler's document is a FragmentHandler, attempt to trim - * leading and trailing whitespace. - * - * This is a bit hackish and depends heavily on the internals of - * FragmentHandler. - */ - protected void maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, IRubyObject parser) { - RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter(); - RubyModule mod = context.getRuntime().getClassFromPath("Nokogiri::XML::FragmentHandler"); - - IRubyObject handler = adapter.getInstanceVariable(parser, "@document"); - if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod)) - return; - IRubyObject stack = adapter.getInstanceVariable(handler, "@stack"); - if (stack == null || stack.isNil()) - return; - // doc is finally a DocumentFragment whose nodes we can check - IRubyObject doc = adapter.callMethod(stack, "first"); - if (doc == null || doc.isNil()) - return; - - IRubyObject children; - - for (;;) { - children = adapter.callMethod(doc, "children"); - IRubyObject first = adapter.callMethod(children, "first"); - if (NokogiriHelpers.isBlank(first)) adapter.callMethod(first, "unlink"); - else break; - } - - for (;;) { - children = adapter.callMethod(doc, "children"); - IRubyObject last = adapter.callMethod(children, "last"); - if (NokogiriHelpers.isBlank(last)) adapter.callMethod(last, "unlink"); - else break; + try { + try { + do_parse(); + } catch (SAXParseException ex) { + // A bad document () should call the + // error handler instead of raising a SAX exception. + + // However, an EMPTY document should raise a RuntimeError. + // This is a bit kludgy, but AFAIK SAX doesn't distinguish + // between empty and bad whereas Nokogiri does. + String message = ex.getMessage(); + if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) { + throw runtime.newRuntimeError("couldn't parse document: " + message); } - - // While we have a document, normalize it. - ((XmlNode) doc).normalize(); - } - - @JRubyMethod(name="column") - public IRubyObject column(ThreadContext context) { - final Integer number = handler.getColumn(); - if (number == null) return context.getRuntime().getNil(); - return RubyFixnum.newFixnum(context.getRuntime(), number.longValue()); - } - - @JRubyMethod(name="line") - public IRubyObject line(ThreadContext context) { - final Integer number = handler.getLine(); - if (number == null) return context.getRuntime().getNil(); - return RubyFixnum.newFixnum(context.getRuntime(), number.longValue()); + handler.error(ex); + } + } catch (SAXException ex) { + // Unexpected failure in XML subsystem + throw runtime.newRuntimeError(ex.getMessage()); + } catch (IOException ex) { + throw runtime.newIOErrorFromException(ex); } + postParse(runtime, handlerRuby, handler); + + return runtime.getNil(); + } + + /** + * Can take a boolean assignment. + * + * @param context + * @param value + * @return + */ + @JRubyMethod(name = "replace_entities=") + public IRubyObject + set_replace_entities(ThreadContext context, IRubyObject value) + { + replaceEntities = value.isTrue(); + return this; + } + + @JRubyMethod(name = "replace_entities") + public IRubyObject + get_replace_entities(ThreadContext context) + { + return context.runtime.newBoolean(replaceEntities); + } + + /** + * Can take a boolean assignment. + * + * @param context + * @param value + * @return + */ + @JRubyMethod(name = "recovery=") + public IRubyObject + set_recovery(ThreadContext context, IRubyObject value) + { + recovery = value.isTrue(); + return this; + } + + @JRubyMethod(name = "recovery") + public IRubyObject + get_recovery(ThreadContext context) + { + return context.runtime.newBoolean(recovery); + } + + @JRubyMethod(name = "column") + public IRubyObject + column(ThreadContext context) + { + final Integer number = handler.getColumn(); + if (number == null) { return context.getRuntime().getNil(); } + return RubyFixnum.newFixnum(context.getRuntime(), number.longValue()); + } + + @JRubyMethod(name = "line") + public IRubyObject + line(ThreadContext context) + { + final Integer number = handler.getLine(); + if (number == null) { return context.getRuntime().getNil(); } + return RubyFixnum.newFixnum(context.getRuntime(), number.longValue()); + } } diff --git a/ext/java/nokogiri/XmlSaxPushParser.java b/ext/java/nokogiri/XmlSaxPushParser.java index 8e65b92167..26261a33e3 100644 --- a/ext/java/nokogiri/XmlSaxPushParser.java +++ b/ext/java/nokogiri/XmlSaxPushParser.java @@ -1,52 +1,9 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.ThreadFactory; - +import nokogiri.internals.*; import org.jruby.Ruby; import org.jruby.RubyClass; +import org.jruby.RubyException; import org.jruby.RubyObject; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; @@ -54,11 +11,14 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import nokogiri.internals.ClosedStreamException; -import nokogiri.internals.NokogiriBlockingQueueInputStream; -import nokogiri.internals.NokogiriHandler; -import nokogiri.internals.NokogiriHelpers; -import nokogiri.internals.ParserContext; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.concurrent.*; + +import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; +import static org.jruby.runtime.Helpers.invoke; /** * Class for Nokogiri::XML::SAX::PushParser @@ -66,221 +26,263 @@ * @author Patrick Mahoney * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::SAX::PushParser") -public class XmlSaxPushParser extends RubyObject { - ParserContext.Options options; - IRubyObject saxParser; - - NokogiriBlockingQueueInputStream stream; - - private ParserTask parserTask = null; - private FutureTask futureTask = null; - private ExecutorService executor = null; - RaiseException ex = null; - - public XmlSaxPushParser(Ruby ruby, RubyClass rubyClass) { - super(ruby, rubyClass); +@JRubyClass(name = "Nokogiri::XML::SAX::PushParser") +public class XmlSaxPushParser extends RubyObject +{ + private static final long serialVersionUID = 1L; + + ParserContext.Options options; + IRubyObject saxParser; + + NokogiriBlockingQueueInputStream stream; + + private ParserTask parserTask = null; + private FutureTask futureTask = null; + private ExecutorService executor = null; + RaiseException ex = null; + + public + XmlSaxPushParser(Ruby ruby, RubyClass rubyClass) + { + super(ruby, rubyClass); + } + + @SuppressWarnings("deprecation") + @Override + public void + finalize() + { + try { + terminateImpl(); + } catch (Exception e) { /* ignored */ } + } + + @JRubyMethod + public IRubyObject + initialize_native(final ThreadContext context, IRubyObject saxParser, IRubyObject fileName) + { + options = new ParserContext.Options(0); + this.saxParser = saxParser; + return this; + } + + private transient IRubyObject parse_options; + + private IRubyObject + parse_options(final ThreadContext context) + { + if (parse_options == null) { + parse_options = invoke(context, context.runtime.getClassFromPath("Nokogiri::XML::ParseOptions"), "new"); } - - @Override - public void finalize() { - try { - terminateImpl(); - } - catch (Exception e) { /* ignored */ } + return parse_options; + } + + @JRubyMethod(name = "options") + public IRubyObject + getOptions(ThreadContext context) + { + return invoke(context, parse_options(context), "options"); + } + + @JRubyMethod(name = "options=") + public IRubyObject + setOptions(ThreadContext context, IRubyObject opts) + { + invoke(context, parse_options(context), "options=", opts); + options = new ParserContext.Options(opts.convertToInteger().getLongValue()); + return getOptions(context); + } + + /** + * Can take a boolean assignment. + * + * @param context + * @param value + * @return + */ + @JRubyMethod(name = "replace_entities=") + public IRubyObject + setReplaceEntities(ThreadContext context, IRubyObject value) + { + // Ignore the value. + return this; + } + + @JRubyMethod(name = "replace_entities") + public IRubyObject + getReplaceEntities(ThreadContext context) + { + // The java parser always replaces entities. + return context.getRuntime().getTrue(); + } + + @JRubyMethod + public IRubyObject + native_write(ThreadContext context, IRubyObject chunk, + IRubyObject isLast) + { + if (ex != null) { + // parser has already errored, rethrow the exception + throw ex; } - @JRubyMethod - public IRubyObject initialize_native(final ThreadContext context, IRubyObject saxParser, IRubyObject fileName) { - options = new ParserContext.Options(0); - this.saxParser = saxParser; - return this; + try { + initialize_task(context); + } catch (IOException e) { + throw context.runtime.newRuntimeError(e.getMessage()); } - - private transient IRubyObject parse_options; - - private IRubyObject parse_options(final ThreadContext context) { - if (parse_options == null) { - parse_options = invoke(context, context.runtime.getClassFromPath("Nokogiri::XML::ParseOptions"), "new"); - } - return parse_options; + final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk); + if (data == null) { + return this; } - @JRubyMethod(name="options") - public IRubyObject getOptions(ThreadContext context) { - return invoke(context, parse_options(context), "options"); - } + int errorCount0 = parserTask.getErrorCount(); - @JRubyMethod(name="options=") - public IRubyObject setOptions(ThreadContext context, IRubyObject opts) { - invoke(context, parse_options(context), "options=", opts); - options = new ParserContext.Options(opts.convertToInteger().getLongValue()); - return getOptions(context); + try { + Future task = stream.addChunk(data); + task.get(); + } catch (ClosedStreamException ex) { + // this means the stream is closed, ignore this exception + } catch (Exception e) { + throw context.runtime.newRuntimeError(e.toString()); } - /** - * Can take a boolean assignment. - * - * @param context - * @param value - * @return - */ - @JRubyMethod(name = "replace_entities=") - public IRubyObject setReplaceEntities(ThreadContext context, IRubyObject value) { - // Ignore the value. - return this; + if (isLast.isTrue()) { + parserTask.getNokogiriHandler().endDocument(); + terminateTask(context.runtime); } - @JRubyMethod(name="replace_entities") - public IRubyObject getReplaceEntities(ThreadContext context) { - // The java parser always replaces entities. - return context.getRuntime().getTrue(); + if (!options.recover && parserTask.getErrorCount() > errorCount0) { + terminateTask(context.runtime); + ex = parserTask.getLastError().toThrowable(); + throw ex; } - @JRubyMethod - public IRubyObject native_write(ThreadContext context, IRubyObject chunk, - IRubyObject isLast) { - if (ex != null) { - // parser has already errored, rethrow the exception - throw ex; - } - - try { - initialize_task(context); - } catch (IOException e) { - throw context.runtime.newRuntimeError(e.getMessage()); - } - final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk); - if (data == null) { - return this; - } + return this; + } - int errorCount0 = parserTask.getErrorCount(); - - try { - Future task = stream.addChunk(data); - task.get(); - } catch (ClosedStreamException ex) { - // this means the stream is closed, ignore this exception - } catch (Exception e) { - throw context.runtime.newRuntimeError(e.toString()); - } + private void + initialize_task(ThreadContext context) throws IOException + { + if (futureTask == null || stream == null) { + stream = new NokogiriBlockingQueueInputStream(); - if (isLast.isTrue()) { - parserTask.getNokogiriHandler().endDocument(); - terminateTask(context.runtime); - } - - if (!options.recover && parserTask.getErrorCount() > errorCount0) { - terminateTask(context.runtime); - throw ex = parserTask.getLastError(); + assert saxParser != null : "saxParser null"; + parserTask = new ParserTask(context, saxParser, stream); + futureTask = new FutureTask(parserTask); + executor = Executors.newSingleThreadExecutor(new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setName("XmlSaxPushParser"); + t.setDaemon(true); + return t; } - - return this; + }); + executor.submit(futureTask); } - - private void initialize_task(ThreadContext context) throws IOException { - if (futureTask == null || stream == null) { - stream = new NokogiriBlockingQueueInputStream(); - - assert saxParser != null : "saxParser null"; - parserTask = new ParserTask(context, saxParser, stream); - futureTask = new FutureTask(parserTask); - executor = Executors.newSingleThreadExecutor(new ThreadFactory() { - @Override - public Thread newThread(Runnable r) { - Thread t = new Thread(r); - t.setName("XmlSaxPushParser"); - t.setDaemon(true); - return t; - } - }); - executor.submit(futureTask); - } + } + + private void + terminateTask(final Ruby runtime) + { + if (executor == null) { return; } + + try { + terminateImpl(); + } catch (InterruptedException e) { + throw runtime.newRuntimeError(e.toString()); + } catch (Exception e) { + throw runtime.newRuntimeError(e.toString()); } - - private void terminateTask(final Ruby runtime) { - if (executor == null) return; - - try { - terminateImpl(); - } - catch (InterruptedException e) { - throw runtime.newRuntimeError(e.toString()); - } - catch (Exception e) { - throw runtime.newRuntimeError(e.toString()); - } + } + + private synchronized void + terminateImpl() throws InterruptedException, ExecutionException + { + terminateExecution(executor, stream, futureTask); + + executor = null; + stream = null; + futureTask = null; + } + + // SHARED for Html4SaxPushParser + static void + terminateExecution(final ExecutorService executor, final NokogiriBlockingQueueInputStream stream, + final FutureTask futureTask) + throws InterruptedException, ExecutionException + { + + if (executor == null) { return; } + + try { + Future task = stream.addChunk(NokogiriBlockingQueueInputStream.END); + task.get(); + } catch (ClosedStreamException ex) { + // ignore this exception, it means the stream was closed } - - private synchronized void terminateImpl() throws InterruptedException, ExecutionException { - terminateExecution(executor, stream, futureTask); - - executor = null; stream = null; futureTask = null; + futureTask.cancel(true); + executor.shutdown(); + } + + private static XmlSaxParserContext + parse(final Ruby runtime, final InputStream stream) + { + RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::XML::SAX::ParserContext"); + return XmlSaxParserContext.parse_stream(runtime, klazz, stream); + } + + static class ParserTask extends ParserContext.ParserTask + { + + final InputStream stream; + + private + ParserTask(ThreadContext context, IRubyObject handler, InputStream stream) + { + this(context, handler, parse(context.runtime, stream), stream); } - // SHARED for HtmlSaxPushParser - static void terminateExecution(final ExecutorService executor, final NokogiriBlockingQueueInputStream stream, - final FutureTask futureTask) - throws InterruptedException, ExecutionException { - - if (executor == null) return; - - try { - Future task = stream.addChunk(NokogiriBlockingQueueInputStream.END); - task.get(); - } - catch (ClosedStreamException ex) { - // ignore this exception, it means the stream was closed - } - futureTask.cancel(true); - executor.shutdown(); + // IMPL with Html4SaxPushParser + protected + ParserTask(ThreadContext context, IRubyObject handler, XmlSaxParserContext parser, InputStream stream) + { + super(context, handler, parser); + this.stream = stream; } - private static XmlSaxParserContext parse(final Ruby runtime, final InputStream stream) { - RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::XML::SAX::ParserContext"); - return XmlSaxParserContext.parse_stream(runtime, klazz, stream); + @Override + public XmlSaxParserContext + call() throws Exception + { + try { + parser.parse_with(context, handler); + } finally { stream.close(); } + // we have to close the stream before exiting, otherwise someone + // can add a chunk and block on task.get() forever. + return parser; } - static class ParserTask extends ParserContext.ParserTask { - - final InputStream stream; - - private ParserTask(ThreadContext context, IRubyObject handler, InputStream stream) { - this(context, handler, parse(context.runtime, stream), stream); - } - - // IMPL with HtmlSaxPushParser - protected ParserTask(ThreadContext context, IRubyObject handler, XmlSaxParserContext parser, InputStream stream) { - super(context, handler, parser); - this.stream = stream; - } - - @Override - public XmlSaxParserContext call() throws Exception { - try { - parser.parse_with(context, handler); - } - finally { stream.close(); } - // we have to close the stream before exiting, otherwise someone - // can add a chunk and block on task.get() forever. - return parser; - } - - final NokogiriHandler getNokogiriHandler() { - return parser.getNokogiriHandler(); - } - - synchronized final int getErrorCount() { - // check for null because thread may not have started yet - if (parser.getNokogiriHandler() == null) return 0; - return parser.getNokogiriHandler().getErrorCount(); - } - - synchronized final RaiseException getLastError() { - return parser.getNokogiriHandler().getLastError(); - } + final NokogiriHandler + getNokogiriHandler() + { + return parser.getNokogiriHandler(); + } + synchronized final int + getErrorCount() + { + // check for null because thread may not have started yet + if (parser.getNokogiriErrorHandler() == null) { return 0; } + return parser.getNokogiriErrorHandler().getErrors().size(); } + synchronized final RubyException + getLastError() + { + List errors = parser.getNokogiriErrorHandler().getErrors(); + return errors.get(errors.size() - 1); + } + } } diff --git a/ext/java/nokogiri/XmlSchema.java b/ext/java/nokogiri/XmlSchema.java index 0129786ca0..52d7991d07 100644 --- a/ext/java/nokogiri/XmlSchema.java +++ b/ext/java/nokogiri/XmlSchema.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.adjustSystemIdIfNecessary; @@ -51,6 +19,8 @@ import nokogiri.internals.IgnoreSchemaErrorsErrorHandler; import nokogiri.internals.SchemaErrorHandler; import nokogiri.internals.XmlDomParserContext; +import nokogiri.internals.ParserContext; +import nokogiri.internals.ParserContext.Options; import org.jruby.Ruby; import org.jruby.RubyArray; @@ -63,11 +33,13 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.Visibility; import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.runtime.Helpers; import org.w3c.dom.Document; import org.w3c.dom.ls.LSInput; import org.w3c.dom.ls.LSResourceResolver; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; /** * Class for Nokogiri::XML::Schema @@ -75,254 +47,377 @@ * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::Schema") -public class XmlSchema extends RubyObject { - private Validator validator; - - public XmlSchema(Ruby ruby, RubyClass klazz) { - super(ruby, klazz); +@JRubyClass(name = "Nokogiri::XML::Schema") +public class XmlSchema extends RubyObject +{ + private static final long serialVersionUID = 1L; + + private Validator validator; + + public + XmlSchema(Ruby ruby, RubyClass klazz) + { + super(ruby, klazz); + } + + /** + * Create and return a copy of this object. + * + * @return a clone of this object + */ + @Override + public Object + clone() throws CloneNotSupportedException + { + return super.clone(); + } + + private Schema + getSchema(Source source, + String currentDir, + String scriptFileName, + SchemaErrorHandler errorHandler, + long parseOptions) throws SAXException + { + boolean noNet = new ParserContext.Options(parseOptions).noNet; + + SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + SchemaResourceResolver resourceResolver = + new SchemaResourceResolver(currentDir, scriptFileName, null, errorHandler, noNet); + + schemaFactory.setResourceResolver(resourceResolver); + schemaFactory.setErrorHandler(errorHandler); + + return schemaFactory.newSchema(source); + } + + private void + setValidator(Validator validator) + { + this.validator = validator; + } + + static XmlSchema + createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) + { + Ruby runtime = context.getRuntime(); + XmlSchema xmlSchema = (XmlSchema) NokogiriService.XML_SCHEMA_ALLOCATOR.allocate(runtime, klazz); + + if (parseOptions == null) { + parseOptions = defaultParseOptions(context.getRuntime()); } - - /** - * Create and return a copy of this object. - * - * @return a clone of this object - */ - @Override - public Object clone() throws CloneNotSupportedException { - return super.clone(); + long intParseOptions = RubyFixnum.fix2long(Helpers.invoke(context, parseOptions, "to_i")); + + xmlSchema.setInstanceVariable("@errors", runtime.newEmptyArray()); + xmlSchema.setInstanceVariable("@parse_options", parseOptions); + + try { + SchemaErrorHandler errorHandler = + new SchemaErrorHandler(context.getRuntime(), (RubyArray)xmlSchema.getInstanceVariable("@errors")); + Schema schema = + xmlSchema.getSchema(source, + context.getRuntime().getCurrentDirectory(), + context.getRuntime().getInstanceConfig().getScriptFileName(), + errorHandler, + intParseOptions); + xmlSchema.setValidator(schema.newValidator()); + return xmlSchema; + } catch (SAXException ex) { + throw context.getRuntime().newRuntimeError("Could not parse document: " + ex.getMessage()); } - - private Schema getSchema(Source source, String currentDir, String scriptFileName) throws SAXException { - SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); - SchemaResourceResolver resourceResolver = new SchemaResourceResolver(currentDir, scriptFileName, null); - schemaFactory.setResourceResolver(resourceResolver); - schemaFactory.setErrorHandler(new IgnoreSchemaErrorsErrorHandler()); - return schemaFactory.newSchema(source); - } - - private void setValidator(Validator validator) { - this.validator = validator; + } + + protected static IRubyObject + defaultParseOptions(Ruby runtime) + { + return ((RubyClass)runtime.getClassFromPath("Nokogiri::XML::ParseOptions")).getConstant("DEFAULT_SCHEMA"); + } + + /* + * call-seq: + * from_document(doc) + * + * Create a new Schema from the Nokogiri::XML::Document +doc+ + */ + @JRubyMethod(meta = true, required = 1, optional = 1) + public static IRubyObject + from_document(ThreadContext context, IRubyObject klazz, IRubyObject[] args) + { + IRubyObject document = args[0]; + IRubyObject parseOptions = null; + if (args.length > 1) { + parseOptions = args[1]; } - static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source) { - Ruby runtime = context.getRuntime(); - XmlSchema xmlSchema = (XmlSchema) NokogiriService.XML_SCHEMA_ALLOCATOR.allocate(runtime, klazz); - xmlSchema.setInstanceVariable("@errors", runtime.newEmptyArray()); + XmlDocument doc = ((XmlDocument)((XmlNode) document).document(context)); - try { - Schema schema = xmlSchema.getSchema(source, context.getRuntime().getCurrentDirectory(), context.getRuntime().getInstanceConfig().getScriptFileName()); - xmlSchema.setValidator(schema.newValidator()); - return xmlSchema; - } catch (SAXException ex) { - throw context.getRuntime().newRuntimeError("Could not parse document: " + ex.getMessage()); - } + RubyArray errors = (RubyArray) doc.getInstanceVariable("@errors"); + if (!errors.isEmpty()) { + throw((XmlSyntaxError) errors.first()).toThrowable(); } - /* - * call-seq: - * from_document(doc) - * - * Create a new Schema from the Nokogiri::XML::Document +doc+ - */ - @JRubyMethod(meta=true) - public static IRubyObject from_document(ThreadContext context, IRubyObject klazz, IRubyObject document) { - XmlDocument doc = ((XmlDocument) ((XmlNode) document).document(context)); - - RubyArray errors = (RubyArray) doc.getInstanceVariable("@errors"); - if (!errors.isEmpty()) { - throw new RaiseException((XmlSyntaxError) errors.first()); - } - - DOMSource source = new DOMSource(doc.getDocument()); + DOMSource source = new DOMSource(doc.getDocument()); - IRubyObject uri = doc.url(context); - - if (!uri.isNil()) { - source.setSystemId(uri.convertToString().asJavaString()); - } + IRubyObject uri = doc.url(context); - return getSchema(context, (RubyClass)klazz, source); + if (!uri.isNil()) { + source.setSystemId(uri.convertToString().asJavaString()); } - private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source) { - String moduleName = klazz.getName(); - if ("Nokogiri::XML::Schema".equals(moduleName)) { - return XmlSchema.createSchemaInstance(context, klazz, source); - } else if ("Nokogiri::XML::RelaxNG".equals(moduleName)) { - return XmlRelaxng.createSchemaInstance(context, klazz, source); - } - return context.getRuntime().getNil(); + return getSchema(context, (RubyClass)klazz, source, parseOptions); + } + + @JRubyMethod(meta = true, required = 1, optional = 1) + public static IRubyObject + read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args) + { + IRubyObject content = args[0]; + IRubyObject parseOptions = null; + if (args.length > 1) { + parseOptions = args[1]; } - - @JRubyMethod(meta=true) - public static IRubyObject read_memory(ThreadContext context, IRubyObject klazz, IRubyObject content) { - String data = content.convertToString().asJavaString(); - return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data))); + String data = content.convertToString().asJavaString(); + return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions); + } + + private static IRubyObject + getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) + { + String moduleName = klazz.getName(); + if ("Nokogiri::XML::Schema".equals(moduleName)) { + return XmlSchema.createSchemaInstance(context, klazz, source, parseOptions); + } else if ("Nokogiri::XML::RelaxNG".equals(moduleName)) { + return XmlRelaxng.createSchemaInstance(context, klazz, source, parseOptions); } - - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject validate_document(ThreadContext context, IRubyObject document) { - return validate_document_or_file(context, (XmlDocument)document); + return context.getRuntime().getNil(); + } + + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + validate_document(ThreadContext context, IRubyObject document) + { + return validate_document_or_file(context, (XmlDocument)document); + } + + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject + validate_file(ThreadContext context, IRubyObject file) + { + Ruby runtime = context.runtime; + + XmlDomParserContext ctx = new XmlDomParserContext(runtime, RubyFixnum.newFixnum(runtime, 1L)); + ctx.setInputSourceFile(context, file); + XmlDocument xmlDocument = ctx.parse(context, getNokogiriClass(runtime, "Nokogiri::XML::Document"), context.nil); + return validate_document_or_file(context, xmlDocument); + } + + IRubyObject + validate_document_or_file(ThreadContext context, XmlDocument xmlDocument) + { + RubyArray errors = (RubyArray) this.getInstanceVariable("@errors"); + ErrorHandler errorHandler = new SchemaErrorHandler(context.runtime, errors); + setErrorHandler(errorHandler); + + try { + validate(xmlDocument.getDocument()); + } catch (SAXException ex) { + XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime); + xmlSyntaxError.setException(ex); + errors.append(xmlSyntaxError); + } catch (IOException ex) { + throw context.runtime.newIOError(ex.getMessage()); } - @JRubyMethod(visibility=Visibility.PRIVATE) - public IRubyObject validate_file(ThreadContext context, IRubyObject file) { - Ruby ruby = context.getRuntime(); - - XmlDomParserContext ctx = new XmlDomParserContext(ruby, RubyFixnum.newFixnum(ruby, 1L)); - ctx.setInputSourceFile(context, file); - XmlDocument xmlDocument = ctx.parse(context, getNokogiriClass(ruby, "Nokogiri::XML::Document"), ruby.getNil()); - return validate_document_or_file(context, xmlDocument); + return errors; + } + + protected void + setErrorHandler(ErrorHandler errorHandler) + { + validator.setErrorHandler(errorHandler); + } + + protected void + validate(Document document) throws SAXException, IOException + { + DOMSource docSource = new DOMSource(document); + validator.validate(docSource); + } + + private class SchemaResourceResolver implements LSResourceResolver + { + SchemaLSInput lsInput = new SchemaLSInput(); + String currentDir; + String scriptFileName; + SchemaErrorHandler errorHandler; + boolean noNet; + //String defaultURI; + + SchemaResourceResolver(String currentDir, String scriptFileName, Object input, SchemaErrorHandler errorHandler, + boolean noNet) + { + this.currentDir = currentDir; + this.scriptFileName = scriptFileName; + this.errorHandler = errorHandler; + this.noNet = noNet; + if (input == null) { return; } + if (input instanceof String) { + lsInput.setStringData((String)input); + } else if (input instanceof Reader) { + lsInput.setCharacterStream((Reader)input); + } else if (input instanceof InputStream) { + lsInput.setByteStream((InputStream)input); + } } - IRubyObject validate_document_or_file(ThreadContext context, XmlDocument xmlDocument) { - RubyArray errors = (RubyArray) this.getInstanceVariable("@errors"); - ErrorHandler errorHandler = new SchemaErrorHandler(context.runtime, errors); - setErrorHandler(errorHandler); - - try { - validate(xmlDocument.getDocument()); - } - catch (SAXException ex) { - XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime); - xmlSyntaxError.setException(ex); - errors.append(xmlSyntaxError); + @Override + public LSInput + resolveResource(String type, + String namespaceURI, + String publicId, + String systemId, + String baseURI) + { + if (noNet && systemId != null && (systemId.startsWith("http://") || systemId.startsWith("ftp://"))) { + if (systemId.startsWith(XMLConstants.W3C_XML_SCHEMA_NS_URI)) { + return null; // use default resolver } - catch (IOException ex) { - throw context.runtime.newIOError(ex.getMessage()); + try { + this.errorHandler.warning(new SAXParseException(String.format("Attempt to load network entity '%s'", systemId), null)); + } catch (SAXException ex) { } - - return errors; + } else { + String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId); + lsInput.setPublicId(publicId); + lsInput.setSystemId(adjusted != null ? adjusted : systemId); + lsInput.setBaseURI(baseURI); + } + return lsInput; } + } + + private class SchemaLSInput implements LSInput + { + protected String fPublicId; + protected String fSystemId; + protected String fBaseSystemId; + protected InputStream fByteStream; + protected Reader fCharStream; + protected String fData; + protected String fEncoding; + protected boolean fCertifiedText = false; - protected void setErrorHandler(ErrorHandler errorHandler) { - validator.setErrorHandler(errorHandler); + @Override + public String + getBaseURI() + { + return fBaseSystemId; } - protected void validate(Document document) throws SAXException, IOException { - DOMSource docSource = new DOMSource(document); - validator.validate(docSource); + @Override + public InputStream + getByteStream() + { + return fByteStream; } - private class SchemaResourceResolver implements LSResourceResolver { - SchemaLSInput lsInput = new SchemaLSInput(); - String currentDir; - String scriptFileName; - //String defaultURI; - - SchemaResourceResolver(String currentDir, String scriptFileName, Object input) { - this.currentDir = currentDir; - this.scriptFileName = scriptFileName; - if (input == null) return; - if (input instanceof String) { - lsInput.setStringData((String)input); - } else if (input instanceof Reader) { - lsInput.setCharacterStream((Reader)input); - } else if (input instanceof InputStream) { - lsInput.setByteStream((InputStream)input); - } - } - - @Override - public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId, String baseURI) { - String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId); - lsInput.setPublicId(publicId); - lsInput.setSystemId(adjusted != null? adjusted : systemId); - lsInput.setBaseURI(baseURI); - return lsInput; - } + @Override + public boolean + getCertifiedText() + { + return fCertifiedText; } - private class SchemaLSInput implements LSInput { - protected String fPublicId; - protected String fSystemId; - protected String fBaseSystemId; - protected InputStream fByteStream; - protected Reader fCharStream; - protected String fData; - protected String fEncoding; - protected boolean fCertifiedText = false; - - @Override - public String getBaseURI() { - return fBaseSystemId; - } - - @Override - public InputStream getByteStream() { - return fByteStream; - } - - @Override - public boolean getCertifiedText() { - return fCertifiedText; - } - - @Override - public Reader getCharacterStream() { - return fCharStream; - } - - @Override - public String getEncoding() { - return fEncoding; - } + @Override + public Reader + getCharacterStream() + { + return fCharStream; + } - @Override - public String getPublicId() { - return fPublicId; - } + @Override + public String + getEncoding() + { + return fEncoding; + } - @Override - public String getStringData() { - return fData; - } + @Override + public String + getPublicId() + { + return fPublicId; + } - @Override - public String getSystemId() { - return fSystemId; - } + @Override + public String + getStringData() + { + return fData; + } - @Override - public void setBaseURI(String baseURI) { - fBaseSystemId = baseURI; - } + @Override + public String + getSystemId() + { + return fSystemId; + } - @Override - public void setByteStream(InputStream byteStream) { - fByteStream = byteStream; - } + @Override + public void + setBaseURI(String baseURI) + { + fBaseSystemId = baseURI; + } - @Override - public void setCertifiedText(boolean certified) { - fCertifiedText = certified; - } + @Override + public void + setByteStream(InputStream byteStream) + { + fByteStream = byteStream; + } - @Override - public void setCharacterStream(Reader charStream) { - fCharStream = charStream; - } + @Override + public void + setCertifiedText(boolean certified) + { + fCertifiedText = certified; + } - @Override - public void setEncoding(String encoding) { - fEncoding = encoding; - } + @Override + public void + setCharacterStream(Reader charStream) + { + fCharStream = charStream; + } - @Override - public void setPublicId(String pubId) { - fPublicId = pubId; - } + @Override + public void + setEncoding(String encoding) + { + fEncoding = encoding; + } - @Override - public void setStringData(String stringData) { - fData = stringData; - } + @Override + public void + setPublicId(String pubId) + { + fPublicId = pubId; + } - @Override - public void setSystemId(String sysId) { - fSystemId = sysId; - } + @Override + public void + setStringData(String stringData) + { + fData = stringData; + } + @Override + public void + setSystemId(String sysId) + { + fSystemId = sysId; } + + } } diff --git a/ext/java/nokogiri/XmlSyntaxError.java b/ext/java/nokogiri/XmlSyntaxError.java index feb0852b72..0fe1b649e4 100644 --- a/ext/java/nokogiri/XmlSyntaxError.java +++ b/ext/java/nokogiri/XmlSyntaxError.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.stringOrNil; @@ -50,93 +18,120 @@ * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::SyntaxError", parent="Nokogiri::SyntaxError") -public class XmlSyntaxError extends RubyException { - - private Exception exception; - private boolean messageSet; // whether a custom error message was set - - public XmlSyntaxError(Ruby runtime, RubyClass klazz) { - super(runtime, klazz); - } - - public XmlSyntaxError(Ruby runtime, RubyClass rubyClass, Exception ex) { - super(runtime, rubyClass, ex.getMessage()); - this.exception = ex; - } - - public XmlSyntaxError(Ruby runtime, RubyClass rubyClass, String message, Exception ex) { - super(runtime, rubyClass, message); - this.exception = ex; this.messageSet = true; - } - - public static XmlSyntaxError createXMLSyntaxError(final Ruby runtime) { - RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SyntaxError"); - return new XmlSyntaxError(runtime, klazz); - } - - public static XmlSyntaxError createXMLSyntaxError(final Ruby runtime, final Exception ex) { - RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SyntaxError"); - return new XmlSyntaxError(runtime, klazz, ex); - } - - public static XmlSyntaxError createHTMLSyntaxError(final Ruby runtime) { - RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::HTML::SyntaxError"); - return new XmlSyntaxError(runtime, klazz); - } - - public static RubyException createXMLXPathSyntaxError(final Ruby runtime, final String msg, final Exception ex) { - RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::XPath::SyntaxError"); - return new XmlSyntaxError(runtime, klazz, msg, ex); - } - - public static XmlSyntaxError createWarning(Ruby runtime, SAXParseException e) { - XmlSyntaxError xmlSyntaxError = createXMLSyntaxError(runtime); - xmlSyntaxError.setException(runtime, e, 1); - return xmlSyntaxError; - } - - public static XmlSyntaxError createError(Ruby runtime, SAXParseException e) { - XmlSyntaxError xmlSyntaxError = createXMLSyntaxError(runtime); - xmlSyntaxError.setException(runtime, e, 2); - return xmlSyntaxError; - } - - public static XmlSyntaxError createFatalError(Ruby runtime, SAXParseException e) { - XmlSyntaxError xmlSyntaxError = createXMLSyntaxError(runtime); - xmlSyntaxError.setException(runtime, e, 3); - return xmlSyntaxError; - } - - public void setException(Exception exception) { - this.exception = exception; - } - - public void setException(Ruby runtime, SAXParseException exception, int level) { - this.exception = exception; - setInstanceVariable("@level", runtime.newFixnum(level)); - setInstanceVariable("@line", runtime.newFixnum(exception.getLineNumber())); - setInstanceVariable("@column", runtime.newFixnum(exception.getColumnNumber())); - setInstanceVariable("@file", stringOrNil(runtime, exception.getSystemId())); - } - - // NOTE: special care - due JRuby 1.7.x - - @Override - public IRubyObject to_s(ThreadContext context) { return to_s19(context); } - - @JRubyMethod(name = "to_s") - public RubyString to_s19(ThreadContext context) { - RubyString msg = msg(context.runtime); - return msg != null ? msg : super.to_s(context).asString(); - } - - private RubyString msg(final Ruby runtime) { - if (exception != null && exception.getMessage() != null) { - if (messageSet) return null; - return runtime.newString( exception.getMessage() ); - } - return null; +@JRubyClass(name = "Nokogiri::XML::SyntaxError", parent = "Nokogiri::SyntaxError") +public class XmlSyntaxError extends RubyException +{ + private static final long serialVersionUID = 1L; + + private Exception exception; + private boolean messageSet; // whether a custom error message was set + + public + XmlSyntaxError(Ruby runtime, RubyClass klazz) + { + super(runtime, klazz); + } + + public + XmlSyntaxError(Ruby runtime, RubyClass rubyClass, Exception ex) + { + super(runtime, rubyClass, ex.getMessage()); + this.exception = ex; + } + + public + XmlSyntaxError(Ruby runtime, RubyClass rubyClass, String message, Exception ex) + { + super(runtime, rubyClass, message); + this.exception = ex; + this.messageSet = true; + } + + public static XmlSyntaxError + createXMLSyntaxError(final Ruby runtime) + { + RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SyntaxError"); + return new XmlSyntaxError(runtime, klazz); + } + + public static XmlSyntaxError + createXMLSyntaxError(final Ruby runtime, final Exception ex) + { + RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SyntaxError"); + return new XmlSyntaxError(runtime, klazz, ex); + } + + public static XmlSyntaxError + createHTMLSyntaxError(final Ruby runtime) + { + RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::HTML4::SyntaxError"); + return new XmlSyntaxError(runtime, klazz); + } + + public static RubyException + createXMLXPathSyntaxError(final Ruby runtime, final String msg, final Exception ex) + { + RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::XPath::SyntaxError"); + return new XmlSyntaxError(runtime, klazz, msg, ex); + } + + public static XmlSyntaxError + createWarning(Ruby runtime, SAXParseException e) + { + XmlSyntaxError xmlSyntaxError = createXMLSyntaxError(runtime); + xmlSyntaxError.setException(runtime, e, 1); + return xmlSyntaxError; + } + + public static XmlSyntaxError + createError(Ruby runtime, SAXParseException e) + { + XmlSyntaxError xmlSyntaxError = createXMLSyntaxError(runtime); + xmlSyntaxError.setException(runtime, e, 2); + return xmlSyntaxError; + } + + public static XmlSyntaxError + createFatalError(Ruby runtime, SAXParseException e) + { + XmlSyntaxError xmlSyntaxError = createXMLSyntaxError(runtime); + xmlSyntaxError.setException(runtime, e, 3); + return xmlSyntaxError; + } + + public void + setException(Exception exception) + { + this.exception = exception; + } + + public void + setException(Ruby runtime, SAXParseException exception, int level) + { + this.exception = exception; + setInstanceVariable("@level", runtime.newFixnum(level)); + setInstanceVariable("@line", runtime.newFixnum(exception.getLineNumber())); + setInstanceVariable("@column", runtime.newFixnum(exception.getColumnNumber())); + setInstanceVariable("@file", stringOrNil(runtime, exception.getSystemId())); + } + + @JRubyMethod(name = "to_s") + @Override + public IRubyObject + to_s(ThreadContext context) + { + RubyString msg = msg(context.runtime); + return msg != null ? msg : super.to_s(context).asString(); + } + + private RubyString + msg(final Ruby runtime) + { + if (exception != null && exception.getMessage() != null) { + if (messageSet) { return null; } + return runtime.newString(exception.getMessage()); } + return null; + } } diff --git a/ext/java/nokogiri/XmlText.java b/ext/java/nokogiri/XmlText.java index 0138a0a987..8da89b0692 100644 --- a/ext/java/nokogiri/XmlText.java +++ b/ext/java/nokogiri/XmlText.java @@ -1,46 +1,17 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate; import static nokogiri.internals.NokogiriHelpers.rubyStringToString; import nokogiri.internals.SaveContextVisitor; +import org.jcodings.specific.USASCIIEncoding; import org.jruby.Ruby; import org.jruby.RubyClass; +import org.jruby.RubyString; import org.jruby.anno.JRubyClass; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.ByteList; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.Text; @@ -51,57 +22,69 @@ * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XML::Text", parent="Nokogiri::XML::CharacterData") -public class XmlText extends XmlNode { +@JRubyClass(name = "Nokogiri::XML::Text", parent = "Nokogiri::XML::CharacterData") +public class XmlText extends XmlNode +{ + private static final long serialVersionUID = 1L; - public XmlText(Ruby runtime, RubyClass rubyClass, Node node) { - super(runtime, rubyClass, node); - } + private static final ByteList TEXT = ByteList.create("text"); + static { TEXT.setEncoding(USASCIIEncoding.INSTANCE); } + + public + XmlText(Ruby runtime, RubyClass rubyClass, Node node) + { + super(runtime, rubyClass, node); + } + + public + XmlText(Ruby runtime, RubyClass klass) + { + super(runtime, klass); + } - public XmlText(Ruby runtime, RubyClass klass) { - super(runtime, klass); + @Override + protected void + init(ThreadContext context, IRubyObject[] args) + { + if (args.length < 2) { + throw context.runtime.newArgumentError(args.length, 2); } - @Override - protected void init(ThreadContext context, IRubyObject[] args) { - if (args.length < 2) { - throw getRuntime().newArgumentError(args.length, 2); - } + content = args[0]; + IRubyObject xNode = args[1]; - content = args[0]; - IRubyObject xNode = args[1]; + Document document = asXmlNode(context, xNode).getOwnerDocument(); + // text node content should not be encoded when it is created by Text node. + // while content should be encoded when it is created by Element node. + Node node = document.createTextNode(rubyStringToString(content)); + setNode(context.runtime, node); + } - XmlNode xmlNode = asXmlNode(context, xNode); - XmlDocument xmlDoc = (XmlDocument)xmlNode.document(context); - doc = xmlDoc; - Document document = xmlDoc.getDocument(); - // text node content should not be encoded when it is created by Text node. - // while content should be encoded when it is created by Element node. - Node node = document.createTextNode(rubyStringToString(content)); - setNode(context, node); - } - - @Override - protected IRubyObject getNodeName(ThreadContext context) { - if (name == null) name = context.getRuntime().newString("text"); - return name; - } + @Override + protected IRubyObject + getNodeName(ThreadContext context) + { + if (name == null) { name = RubyString.newStringShared(context.runtime, TEXT); } + return name; + } - @Override - public void accept(ThreadContext context, SaveContextVisitor visitor) { - visitor.enter((Text)node); - Node child = node.getFirstChild(); - while (child != null) { - IRubyObject nokoNode = getCachedNodeOrCreate(context.getRuntime(), child); - if (nokoNode instanceof XmlNode) { - XmlNode cur = (XmlNode) nokoNode; - cur.accept(context, visitor); - } else if (nokoNode instanceof XmlNamespace) { - XmlNamespace cur = (XmlNamespace) nokoNode; - cur.accept(context, visitor); - } - child = child.getNextSibling(); - } - visitor.leave(node); + @Override + public void + accept(ThreadContext context, SaveContextVisitor visitor) + { + visitor.enter((Text) node); + Node child = node.getFirstChild(); + while (child != null) { + IRubyObject nokoNode = getCachedNodeOrCreate(context.runtime, child); + if (nokoNode instanceof XmlNode) { + XmlNode cur = (XmlNode) nokoNode; + cur.accept(context, visitor); + } else if (nokoNode instanceof XmlNamespace) { + XmlNamespace cur = (XmlNamespace) nokoNode; + cur.accept(context, visitor); + } + child = child.getNextSibling(); } + visitor.leave(node); + } } diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java index f142b4eaf4..16939f3066 100644 --- a/ext/java/nokogiri/XmlXpathContext.java +++ b/ext/java/nokogiri/XmlXpathContext.java @@ -1,38 +1,8 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2014: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.xml.transform.TransformerException; @@ -57,6 +27,8 @@ import nokogiri.internals.NokogiriXPathFunctionResolver; import nokogiri.internals.NokogiriXPathVariableResolver; +import static nokogiri.internals.NokogiriHelpers.nodeListToRubyArray; + /** * Class for Nokogiri::XML::XpathContext * @@ -64,208 +36,270 @@ * @author Yoko Harada * @author John Shahid */ -@JRubyClass(name="Nokogiri::XML::XPathContext") -public class XmlXpathContext extends RubyObject { - - static { - final String DTMManager = "org.apache.xml.dtm.DTMManager"; - if (SafePropertyAccessor.getProperty(DTMManager) == null) { - try { // use patched "org.apache.xml.dtm.ref.DTMManagerDefault" - System.setProperty(DTMManager, nokogiri.internals.XalanDTMManagerPatch.class.getName()); - } - catch (SecurityException ex) { /* no-op - will work although might be slower */ } - } +@JRubyClass(name = "Nokogiri::XML::XPathContext") +public class XmlXpathContext extends RubyObject +{ + private static final long serialVersionUID = 1L; + + static + { + final String DTMManager = "org.apache.xml.dtm.DTMManager"; + if (SafePropertyAccessor.getProperty(DTMManager) == null) { + try { // use patched "org.apache.xml.dtm.ref.DTMManagerDefault" + System.setProperty(DTMManager, nokogiri.internals.XalanDTMManagerPatch.class.getName()); + } catch (SecurityException ex) { /* no-op - will work although might be slower */ } } - - /** - * user-data key for (cached) {@link XPathContext} - */ - public static final String XPATH_CONTEXT = "CACHED_XPATH_CONTEXT"; - - private XmlNode context; - - public XmlXpathContext(Ruby runtime, RubyClass klass) { - super(runtime, klass); + } + + /** + * user-data key for (cached) {@link XPathContext} + */ + public static final String XPATH_CONTEXT = "CACHED_XPATH_CONTEXT"; + + private XmlNode context; + + public + XmlXpathContext(Ruby runtime, RubyClass klass) + { + super(runtime, klass); + } + + public + XmlXpathContext(Ruby runtime, RubyClass klass, XmlNode node) + { + this(runtime, klass); + initNode(node); + } + + private void + initNode(XmlNode node) + { + context = node; + } + + @JRubyMethod(name = "new", meta = true) + public static IRubyObject + rbNew(ThreadContext context, IRubyObject klazz, IRubyObject node) + { + try { + return new XmlXpathContext(context.runtime, (RubyClass) klazz, (XmlNode) node); + } catch (IllegalArgumentException e) { + throw context.getRuntime().newRuntimeError(e.getMessage()); } + } + + + // see https://en.wikipedia.org/wiki/QName + private static final String NameStartCharStr = + "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]" + ; + private static final String NameCharStr = "[-\\.0-9\u00B7\u0300-\u036F\u203F-\u2040]|" + NameStartCharStr ; + private static final String NCNameStr = "(?:" + NameStartCharStr + ")(?:" + NameCharStr + ")*"; + private static final String XPathFunctionCaptureStr = "(" + NCNameStr + "(?=\\())"; + private static final Pattern XPathFunctionCaptureRE = Pattern.compile(XPathFunctionCaptureStr); + + @JRubyMethod + public IRubyObject + evaluate(ThreadContext context, IRubyObject rbQuery, IRubyObject handler) + { + String query = rbQuery.convertToString().asJavaString(); + + if (!handler.isNil() && !isContainsPrefix(query)) { + // + // The user has passed in a handler, but isn't using the `nokogiri:` prefix as + // instructed in JRuby land, so let's try to be clever and rewrite the query, inserting + // the nokogiri namespace where appropriate. + // + StringBuilder namespacedQuery = new StringBuilder(); + int jchar = 0; + + // Find the methods on the handler object + Set methodNames = handler.getMetaClass().getMethods().keySet(); + + // Find the function calls in the xpath query + Matcher xpathFunctionCalls = XPathFunctionCaptureRE.matcher(query); + + while (xpathFunctionCalls.find()) { + namespacedQuery.append(query.subSequence(jchar, xpathFunctionCalls.start())); + jchar = xpathFunctionCalls.start(); + + if (methodNames.contains(xpathFunctionCalls.group())) { + namespacedQuery.append(NokogiriNamespaceContext.NOKOGIRI_PREFIX); + namespacedQuery.append(":"); + } - public XmlXpathContext(Ruby runtime, RubyClass klass, XmlNode node) { - this(runtime, klass); - initNode(node); - } + namespacedQuery.append(query.subSequence(xpathFunctionCalls.start(), xpathFunctionCalls.end())); + jchar = xpathFunctionCalls.end(); + } - private void initNode(XmlNode node) { - context = node; + if (jchar < query.length() - 1) { + namespacedQuery.append(query.subSequence(jchar, query.length())); + } + query = namespacedQuery.toString(); } - @JRubyMethod(name = "new", meta = true) - public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyObject node) { - try { - return new XmlXpathContext(context.runtime, (RubyClass) klazz, (XmlNode) node); - } - catch (IllegalArgumentException e) { - throw context.getRuntime().newRuntimeError(e.getMessage()); - } + return node_set(context, query, handler); + } + + @JRubyMethod + public IRubyObject + evaluate(ThreadContext context, IRubyObject expr) + { + return this.evaluate(context, expr, context.getRuntime().getNil()); + } + + private final NokogiriNamespaceContext nsContext = NokogiriNamespaceContext.create(); + + @JRubyMethod + public IRubyObject + register_ns(IRubyObject prefix, IRubyObject uri) + { + nsContext.registerNamespace(prefix.asJavaString(), uri.asJavaString()); + return this; + } + + private NokogiriXPathVariableResolver variableResolver; // binds (if any) + + @JRubyMethod + public IRubyObject + register_variable(IRubyObject name, IRubyObject value) + { + NokogiriXPathVariableResolver variableResolver = this.variableResolver; + if (variableResolver == null) { + variableResolver = NokogiriXPathVariableResolver.create(); + this.variableResolver = variableResolver; } - - @JRubyMethod - public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject handler) { - - String src = expr.convertToString().asJavaString(); - if (!handler.isNil()) { - if (!isContainsPrefix(src)) { - StringBuilder replacement = new StringBuilder(); - Set methodNames = handler.getMetaClass().getMethods().keySet(); - final String PREFIX = NokogiriNamespaceContext.NOKOGIRI_PREFIX; - for (String name : methodNames) { - replacement.setLength(0); - replacement.ensureCapacity(PREFIX.length() + 1 + name.length()); - replacement.append(PREFIX).append(':').append(name); - src = src.replace(name, replacement); // replace(name, NOKOGIRI_PREFIX + ':' + name) - } - } - } - - return node_set(context, src, handler); + variableResolver.registerVariable(name.asJavaString(), value.asJavaString()); + return this; + } + + private IRubyObject + node_set(ThreadContext context, String expr, IRubyObject handler) + { + final NokogiriXPathFunctionResolver fnResolver = NokogiriXPathFunctionResolver.create(handler); + try { + return tryGetNodeSet(context, expr, fnResolver); + } catch (TransformerException ex) { + throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, + (expr + ": " + ex.toString()), + ex).toThrowable(); } - - @JRubyMethod - public IRubyObject evaluate(ThreadContext context, IRubyObject expr) { - return this.evaluate(context, expr, context.getRuntime().getNil()); + } + + private IRubyObject + tryGetNodeSet(ThreadContext context, String expr, NokogiriXPathFunctionResolver fnResolver) throws TransformerException + { + final Node contextNode = this.context.node; + + final JAXPPrefixResolver prefixResolver = new JAXPPrefixResolver(nsContext); + XPath xpathInternal = new XPath(expr, null, prefixResolver, XPath.SELECT); + + // We always need to have a ContextNode with Xalan XPath implementation + // To allow simple expression evaluation like 1+1 we are setting + // dummy Document as Context Node + final XObject xobj; + if (contextNode == null) { + xobj = xpathInternal.execute(getXPathContext(fnResolver), DTM.NULL, prefixResolver); + } else { + xobj = xpathInternal.execute(getXPathContext(fnResolver), contextNode, prefixResolver); } - private final NokogiriNamespaceContext nsContext = NokogiriNamespaceContext.create(); - - @JRubyMethod - public IRubyObject register_ns(IRubyObject prefix, IRubyObject uri) { - nsContext.registerNamespace(prefix.asJavaString(), uri.asJavaString()); - return this; + switch (xobj.getType()) { + case XObject.CLASS_BOOLEAN : + return context.runtime.newBoolean(xobj.bool()); + case XObject.CLASS_NUMBER : + return context.runtime.newFloat(xobj.num()); + case XObject.CLASS_NODESET : + IRubyObject[] nodes = nodeListToRubyArray(context.runtime, xobj.nodelist()); + return XmlNodeSet.newNodeSet(context.runtime, nodes, this.context); + default : + return context.runtime.newString(xobj.str()); } - - private NokogiriXPathVariableResolver variableResolver; // binds (if any) - - @JRubyMethod - public IRubyObject register_variable(IRubyObject name, IRubyObject value) { - NokogiriXPathVariableResolver variableResolver = this.variableResolver; - if (variableResolver == null) { - variableResolver = NokogiriXPathVariableResolver.create(); - this.variableResolver = variableResolver; - } - variableResolver.registerVariable(name.asJavaString(), value.asJavaString()); - return this; + } + + private XPathContext + getXPathContext(final NokogiriXPathFunctionResolver fnResolver) + { + Node doc = context.getNode().getOwnerDocument(); + if (doc == null) { doc = context.getNode(); } + + XPathContext xpathContext = (XPathContext) doc.getUserData(XPATH_CONTEXT); + + if (xpathContext == null) { + xpathContext = newXPathContext(fnResolver); + if (variableResolver == null) { + // NOTE: only caching without variables - could be improved by more sophisticated caching + doc.setUserData(XPATH_CONTEXT, xpathContext, null); + } + } else { + Object owner = xpathContext.getOwnerObject(); + if ((owner == null && fnResolver == null) || + (owner instanceof JAXPExtensionsProvider && ((JAXPExtensionsProvider) owner).hasSameResolver(fnResolver))) { + // can be re-used assuming it has the same variable-stack (for now only cached if no variables) + if (variableResolver == null) { return xpathContext; } + } + xpathContext = newXPathContext(fnResolver); // otherwise we can not use the cached xpath-context } - private IRubyObject node_set(ThreadContext context, String expr, IRubyObject handler) { - final NokogiriXPathFunctionResolver fnResolver = - handler.isNil() ? null : NokogiriXPathFunctionResolver.create(handler); - try { - return tryGetNodeSet(context, expr, fnResolver); - } - catch (TransformerException ex) { - throw new RaiseException(XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, expr, ex)); // Nokogiri::XML::XPath::SyntaxError - } + if (variableResolver != null) { + xpathContext.setVarStack(new JAXPVariableStack(variableResolver)); } - private IRubyObject tryGetNodeSet(ThreadContext context, String expr, NokogiriXPathFunctionResolver fnResolver) throws TransformerException { - final Node contextNode = this.context.node; - - final JAXPPrefixResolver prefixResolver = new JAXPPrefixResolver(nsContext); - XPath xpathInternal = new XPath(expr, null, prefixResolver, XPath.SELECT); - - // We always need to have a ContextNode with Xalan XPath implementation - // To allow simple expression evaluation like 1+1 we are setting - // dummy Document as Context Node - final XObject xobj; - if ( contextNode == null ) - xobj = xpathInternal.execute(getXPathContext(fnResolver), DTM.NULL, prefixResolver); - else - xobj = xpathInternal.execute(getXPathContext(fnResolver), contextNode, prefixResolver); - - switch (xobj.getType()) { - case XObject.CLASS_BOOLEAN : return context.getRuntime().newBoolean(xobj.bool()); - case XObject.CLASS_NUMBER : return context.getRuntime().newFloat(xobj.num()); - case XObject.CLASS_NODESET : - XmlNodeSet xmlNodeSet = XmlNodeSet.newEmptyNodeSet(context); - xmlNodeSet.setNodeList(xobj.nodelist()); - xmlNodeSet.initialize(context.getRuntime(), this.context); - return xmlNodeSet; - default : return context.getRuntime().newString(xobj.str()); - } + return xpathContext; + } + + private static XPathContext + newXPathContext(final NokogiriXPathFunctionResolver functionResolver) + { + if (functionResolver == null) { return new XPathContext(false); } + return new XPathContext(new JAXPExtensionsProvider(functionResolver), false); + } + + private boolean + isContainsPrefix(final String str) + { + final StringBuilder prefix_ = new StringBuilder(); + for (String prefix : nsContext.getAllPrefixes()) { + prefix_.setLength(0); + prefix_.ensureCapacity(prefix.length() + 1); + prefix_.append(prefix).append(':'); + if (str.contains(prefix_)) { // prefix + ':' + return true; + } } + return false; + } - private XPathContext getXPathContext(final NokogiriXPathFunctionResolver fnResolver) { - Node doc = context.getNode().getOwnerDocument(); - if (doc == null) doc = context.getNode(); + private static final class JAXPExtensionsProvider extends org.apache.xpath.jaxp.JAXPExtensionsProvider + { - XPathContext xpathContext = (XPathContext) doc.getUserData(XPATH_CONTEXT); - - if ( xpathContext == null ) { - xpathContext = newXPathContext(fnResolver); - if ( variableResolver == null ) { - // NOTE: only caching without variables - could be improved by more sophisticated caching - doc.setUserData(XPATH_CONTEXT, xpathContext, null); - } - } - else { - Object owner = xpathContext.getOwnerObject(); - if ( ( owner == null && fnResolver == null ) || - ( owner instanceof JAXPExtensionsProvider && ((JAXPExtensionsProvider) owner).hasSameResolver(fnResolver) ) ) { - // can be re-used assuming it has the same variable-stack (for now only cached if no variables) - if ( variableResolver == null ) return xpathContext; - } - xpathContext = newXPathContext(fnResolver); // otherwise we can not use the cached xpath-context - } - - if ( variableResolver != null ) { - xpathContext.setVarStack(new JAXPVariableStack(variableResolver)); - } + final NokogiriXPathFunctionResolver resolver; - return xpathContext; + JAXPExtensionsProvider(NokogiriXPathFunctionResolver resolver) + { + super(resolver, false); + this.resolver = resolver; } - private static XPathContext newXPathContext(final NokogiriXPathFunctionResolver functionResolver) { - if ( functionResolver == null ) return new XPathContext(false); - return new XPathContext(new JAXPExtensionsProvider(functionResolver), false); + //@Override + //public boolean equals(Object obj) { + // if (obj instanceof JAXPExtensionsProvider) { + // return hasSameResolver(((JAXPExtensionsProvider) obj).resolver); + // } + // return false; + //} + + final boolean + hasSameResolver(final NokogiriXPathFunctionResolver resolver) + { + return resolver == this.resolver || resolver != null && ( + resolver.getHandler() == null ? this.resolver.getHandler() == null : ( + resolver.getHandler() == this.resolver.getHandler() + // resolver.getHandler().eql( this.resolver.getHandler() ) + ) + ); } - private boolean isContainsPrefix(final String str) { - final StringBuilder prefix_ = new StringBuilder(); - for ( String prefix : nsContext.getAllPrefixes() ) { - prefix_.setLength(0); - prefix_.ensureCapacity(prefix.length() + 1); - prefix_.append(prefix).append(':'); - if ( str.contains(prefix_) ) { // prefix + ':' - return true; - } - } - return false; - } - - private static final class JAXPExtensionsProvider extends org.apache.xpath.jaxp.JAXPExtensionsProvider { - - final NokogiriXPathFunctionResolver resolver; - - JAXPExtensionsProvider(NokogiriXPathFunctionResolver resolver) { - super(resolver, false); - this.resolver = resolver; - } - - //@Override - //public boolean equals(Object obj) { - // if (obj instanceof JAXPExtensionsProvider) { - // return hasSameResolver(((JAXPExtensionsProvider) obj).resolver); - // } - // return false; - //} - - final boolean hasSameResolver(final NokogiriXPathFunctionResolver resolver) { - return resolver == this.resolver || resolver != null && ( - resolver.getHandler() == null ? this.resolver.getHandler() == null : ( - resolver.getHandler() == this.resolver.getHandler() - // resolver.getHandler().eql( this.resolver.getHandler() ) - ) - ); - } - - } + } } diff --git a/ext/java/nokogiri/XsltStylesheet.java b/ext/java/nokogiri/XsltStylesheet.java index 02c8caaa81..856571b9ae 100644 --- a/ext/java/nokogiri/XsltStylesheet.java +++ b/ext/java/nokogiri/XsltStylesheet.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; @@ -41,6 +9,7 @@ import java.io.PipedWriter; import java.io.StringReader; import java.util.Set; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -54,8 +23,10 @@ import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; -import org.apache.xalan.transformer.TransformerImpl; import org.apache.xml.serializer.SerializationHandler; +import org.apache.xml.serializer.Serializer; +import org.apache.xml.serializer.SerializerFactory; + import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; @@ -77,279 +48,314 @@ * @author sergio * @author Yoko Harada */ -@JRubyClass(name="Nokogiri::XSLT::Stylesheet") -public class XsltStylesheet extends RubyObject { +@JRubyClass(name = "Nokogiri::XSLT::Stylesheet") +public class XsltStylesheet extends RubyObject +{ + private static final long serialVersionUID = 1L; - private TransformerFactory factory = null; - private Templates sheet = null; - private IRubyObject stylesheet = null; - private boolean htmlish = false; + private TransformerFactory factory = null; + private Templates sheet = null; + private IRubyObject stylesheet = null; + private boolean htmlish = false; - public XsltStylesheet(Ruby ruby, RubyClass rubyClass) { - super(ruby, rubyClass); - } - - /** - * Create and return a copy of this object. - * - * @return a clone of this object - */ - @Override - public Object clone() throws CloneNotSupportedException { - return super.clone(); - } + public + XsltStylesheet(Ruby ruby, RubyClass rubyClass) + { + super(ruby, rubyClass); + } - private void addParametersToTransformer(ThreadContext context, Transformer transf, IRubyObject parameters) { - Ruby runtime = context.getRuntime(); + /** + * Create and return a copy of this object. + * + * @return a clone of this object + */ + @Override + public Object + clone() throws CloneNotSupportedException + { + return super.clone(); + } - if (parameters instanceof RubyHash) { - setHashParameters(transf, (RubyHash)parameters); - } else if (parameters instanceof RubyArray) { - setArrayParameters(transf, runtime, (RubyArray)parameters); - } else { - throw runtime.newTypeError("parameters should be given either Array or Hash"); - } + private void + addParametersToTransformer(ThreadContext context, Transformer transf, IRubyObject parameters) + { + Ruby runtime = context.getRuntime(); + + if (parameters instanceof RubyHash) { + setHashParameters(transf, (RubyHash)parameters); + } else if (parameters instanceof RubyArray) { + setArrayParameters(transf, runtime, (RubyArray)parameters); + } else { + throw runtime.newTypeError("parameters should be given either Array or Hash"); } - - private void setHashParameters(Transformer transformer, RubyHash hash) { - Set keys = hash.keySet(); - for (String key : keys) { - String value = (String)hash.get(key); - transformer.setParameter(key, unparseValue(value)); - } + } + + @SuppressWarnings("unchecked") + private void + setHashParameters(Transformer transformer, RubyHash hash) + { + for (Map.Entry entry : (Set>)hash.entrySet()) { + transformer.setParameter((String)entry.getKey(), unparseValue((String)entry.getValue())); } - - private void setArrayParameters(Transformer transformer, Ruby runtime, RubyArray params) { - int limit = params.getLength(); - if(limit % 2 == 1) limit--; - - for(int i = 0; i < limit; i+=2) { - String name = params.aref(runtime.newFixnum(i)).asJavaString(); - String value = params.aref(runtime.newFixnum(i+1)).asJavaString(); - transformer.setParameter(name, unparseValue(value)); - } + } + + private void + setArrayParameters(Transformer transformer, Ruby runtime, RubyArray params) + { + int limit = params.getLength(); + if (limit % 2 == 1) { limit--; } + + for (int i = 0; i < limit; i += 2) { + String name = params.aref(runtime.newFixnum(i)).asJavaString(); + String value = params.aref(runtime.newFixnum(i + 1)).asJavaString(); + transformer.setParameter(name, unparseValue(value)); } - - private Pattern p = Pattern.compile("'.{1,}'"); + } - private String unparseValue(String orig) { - Matcher m = p.matcher(orig); - if ((orig.startsWith("\"") && orig.endsWith("\"")) || m.matches()) { - orig = orig.substring(1, orig.length()-1); - } + private static final Pattern QUOTED = Pattern.compile("'.{1,}'"); - return orig; + private String + unparseValue(String orig) + { + if ((orig.startsWith("\"") && orig.endsWith("\"")) || QUOTED.matcher(orig).matches()) { + orig = orig.substring(1, orig.length() - 1); } - @JRubyMethod(meta = true, rest = true) - public static IRubyObject parse_stylesheet_doc(ThreadContext context, IRubyObject klazz, IRubyObject[] args) { - - Ruby runtime = context.getRuntime(); + return orig; + } + + @JRubyMethod(meta = true, rest = true) + public static IRubyObject + parse_stylesheet_doc(ThreadContext context, IRubyObject klazz, IRubyObject[] args) + { - ensureFirstArgIsDocument(runtime, args[0]); + Ruby runtime = context.getRuntime(); - XmlDocument xmlDoc = (XmlDocument) args[0]; - ensureDocumentHasNoError(context, xmlDoc); - - Document doc = ((XmlDocument) xmlDoc.dup_implementation(context, true)).getDocument(); + ensureFirstArgIsDocument(runtime, args[0]); - XsltStylesheet xslt = - (XsltStylesheet) NokogiriService.XSLT_STYLESHEET_ALLOCATOR.allocate(runtime, (RubyClass)klazz); + XmlDocument xmlDoc = (XmlDocument) args[0]; + ensureDocumentHasNoError(context, xmlDoc); - try { - xslt.init(args[1], doc); - } catch (TransformerConfigurationException ex) { - throw runtime.newRuntimeError("could not parse xslt stylesheet"); - } + Document doc = ((XmlDocument) xmlDoc.dup_implementation(context, true)).getDocument(); - return xslt; + XsltStylesheet xslt = + (XsltStylesheet) NokogiriService.XSLT_STYLESHEET_ALLOCATOR.allocate(runtime, (RubyClass)klazz); + + try { + xslt.init(args[1], doc); + } catch (TransformerConfigurationException ex) { + throw runtime.newRuntimeError("could not parse xslt stylesheet"); } - - private void init(IRubyObject stylesheet, Document document) throws TransformerConfigurationException { - this.stylesheet = stylesheet; // either RubyString or RubyFile - if (factory == null) factory = TransformerFactory.newInstance(); - NokogiriXsltErrorListener elistener = new NokogiriXsltErrorListener(); - factory.setErrorListener(elistener); - sheet = factory.newTemplates(new DOMSource(document)); + + return xslt; + } + + private void + init(IRubyObject stylesheet, Document document) throws TransformerConfigurationException + { + this.stylesheet = stylesheet; // either RubyString or RubyFile + if (factory == null) { factory = TransformerFactory.newInstance(); } + NokogiriXsltErrorListener elistener = new NokogiriXsltErrorListener(); + factory.setErrorListener(elistener); + sheet = factory.newTemplates(new DOMSource(document)); + } + + private static void + ensureFirstArgIsDocument(Ruby runtime, IRubyObject arg) + { + if (arg instanceof XmlDocument) { return; } + throw runtime.newArgumentError("doc must be a Nokogiri::XML::Document instance"); + } + + private static void + ensureDocumentHasNoError(ThreadContext context, XmlDocument xmlDoc) + { + Ruby runtime = context.getRuntime(); + RubyArray errors_of_xmlDoc = (RubyArray) xmlDoc.getInstanceVariable("@errors"); + if (!errors_of_xmlDoc.isEmpty()) { + throw runtime.newRuntimeError(errors_of_xmlDoc.first().asString().asJavaString()); } - - private static void ensureFirstArgIsDocument(Ruby runtime, IRubyObject arg) { - if (arg instanceof XmlDocument) { - return; - } else { - throw runtime.newArgumentError("doc must be a Nokogiri::XML::Document instance"); - } + } + + @JRubyMethod + public IRubyObject + serialize(ThreadContext context, IRubyObject doc) throws IOException + { + XmlDocument xmlDoc = (XmlDocument) doc; + ByteArrayOutputStream writer = new ByteArrayOutputStream(); + + Serializer serializer = SerializerFactory.getSerializer(this.sheet.getOutputProperties()); + serializer.setOutputStream(writer); + ((SerializationHandler) serializer).serialize(xmlDoc.getNode()); + return context.getRuntime().newString(writer.toString()); + } + + @JRubyMethod(rest = true, required = 1, optional = 2) + public IRubyObject + transform(ThreadContext context, IRubyObject[] args) + { + Ruby runtime = context.getRuntime(); + + argumentTypeCheck(runtime, args[0]); + + NokogiriXsltErrorListener elistener = new NokogiriXsltErrorListener(); + DOMSource domSource = new DOMSource(((XmlDocument) args[0]).getDocument()); + final DOMResult result; + String stringResult = null; + try { + result = tryXsltTransformation(context, args, domSource, elistener); // DOMResult + if (result.getNode().getFirstChild() == null) { + stringResult = retryXsltTransformation(context, args, domSource, elistener); // StreamResult + } + } catch (TransformerConfigurationException ex) { + throw runtime.newRuntimeError(ex.getMessage()); + } catch (TransformerException ex) { + throw runtime.newRuntimeError(ex.getMessage()); + } catch (IOException ex) { + throw runtime.newRuntimeError(ex.getMessage()); } - - private static void ensureDocumentHasNoError(ThreadContext context, XmlDocument xmlDoc) { - Ruby runtime = context.getRuntime(); - RubyArray errors_of_xmlDoc = (RubyArray) xmlDoc.getInstanceVariable("@errors"); - if (!errors_of_xmlDoc.isEmpty()) { - throw runtime.newRuntimeError(errors_of_xmlDoc.first().asString().asJavaString()); - } + + switch (elistener.getErrorType()) { + case ERROR: + case FATAL: + throw runtime.newRuntimeError(elistener.getErrorMessage()); + case WARNING: + default: + // no-op } - @JRubyMethod - public IRubyObject serialize(ThreadContext context, IRubyObject doc) throws IOException, TransformerException { - XmlDocument xmlDoc = (XmlDocument) doc; - TransformerImpl transformer = (TransformerImpl) this.sheet.newTransformer(); - ByteArrayOutputStream writer = new ByteArrayOutputStream(); - StreamResult streamResult = new StreamResult(writer); - SerializationHandler serializationHandler = transformer.createSerializationHandler(streamResult); - serializationHandler.serialize(xmlDoc.getNode()); - return context.getRuntime().newString(writer.toString()); + if (stringResult == null) { + return createDocumentFromDomResult(context, runtime, result); + } else { + return createDocumentFromString(context, runtime, stringResult); } + } - @JRubyMethod(rest = true, required=1, optional=2) - public IRubyObject transform(ThreadContext context, IRubyObject[] args) { - Ruby runtime = context.getRuntime(); - - argumentTypeCheck(runtime, args[0]); - - NokogiriXsltErrorListener elistener = new NokogiriXsltErrorListener(); - DOMSource domSource = new DOMSource(((XmlDocument) args[0]).getDocument()); - final DOMResult result; String stringResult = null; - try{ - result = tryXsltTransformation(context, args, domSource, elistener); // DOMResult - if (result.getNode().getFirstChild() == null) { - stringResult = retryXsltTransformation(context, args, domSource, elistener); // StreamResult - } - } catch(TransformerConfigurationException ex) { - throw runtime.newRuntimeError(ex.getMessage()); - } catch(TransformerException ex) { - throw runtime.newRuntimeError(ex.getMessage()); - } catch (IOException ex) { - throw runtime.newRuntimeError(ex.getMessage()); - } - - switch (elistener.getErrorType()) { - case ERROR: - case FATAL: - throw runtime.newRuntimeError(elistener.getErrorMessage()); - case WARNING: - default: - // no-op - } - - if (stringResult == null) { - return createDocumentFromDomResult(context, runtime, result); - } else { - return createDocumentFromString(context, runtime, stringResult); - } + private DOMResult + tryXsltTransformation(ThreadContext context, IRubyObject[] args, DOMSource domSource, + NokogiriXsltErrorListener elistener) throws TransformerException + { + Transformer transf = sheet.newTransformer(); + transf.reset(); + transf.setErrorListener(elistener); + if (args.length > 1) { + addParametersToTransformer(context, transf, args[1]); } - - private DOMResult tryXsltTransformation(ThreadContext context, IRubyObject[] args, DOMSource domSource, NokogiriXsltErrorListener elistener) throws TransformerException { - Transformer transf = sheet.newTransformer(); - transf.reset(); - transf.setErrorListener(elistener); - if (args.length > 1) { - addParametersToTransformer(context, transf, args[1]); - } - - DOMResult result = new DOMResult(); - transf.transform(domSource, result); - return result; + + DOMResult result = new DOMResult(); + transf.transform(domSource, result); + return result; + } + + private String + retryXsltTransformation(ThreadContext context, + IRubyObject[] args, + DOMSource domSource, + NokogiriXsltErrorListener elistener) + throws TransformerException, IOException + { + Templates templates = getTemplatesFromStreamSource(); + Transformer transf = templates.newTransformer(); + transf.setErrorListener(elistener); + if (args.length > 1) { + addParametersToTransformer(context, transf, args[1]); } - - private String retryXsltTransformation(ThreadContext context, - IRubyObject[] args, - DOMSource domSource, - NokogiriXsltErrorListener elistener) - throws TransformerException, IOException { - Templates templates = getTemplatesFromStreamSource(); - Transformer transf = templates.newTransformer(); - transf.setErrorListener(elistener); - if (args.length > 1) { - addParametersToTransformer(context, transf, args[1]); - } - PipedWriter pwriter = new PipedWriter(); - PipedReader preader = new PipedReader(); - pwriter.connect(preader); - StreamResult result = new StreamResult(pwriter); - transf.transform(domSource, result); - - char[] cbuf = new char[1024]; - int len = preader.read(cbuf, 0, 1024); - StringBuilder builder = new StringBuilder(len); + PipedWriter pwriter = new PipedWriter(); + PipedReader preader = new PipedReader(); + pwriter.connect(preader); + StreamResult result = new StreamResult(pwriter); + transf.transform(domSource, result); + + char[] cbuf = new char[1024]; + int len = preader.read(cbuf, 0, 1024); + StringBuilder builder = new StringBuilder(len); + builder.append(cbuf, 0, len); + htmlish = isHtml(builder); // judge from the first chunk + + while (len == 1024) { + len = preader.read(cbuf, 0, 1024); + if (len > 0) { builder.append(cbuf, 0, len); - htmlish = isHtml(builder); // judge from the first chunk - - while (len == 1024) { - len = preader.read(cbuf, 0, 1024); - if (len > 0) { - builder.append(cbuf, 0, len); - } - } - - preader.close(); - pwriter.close(); - - return builder.toString(); - } - - private IRubyObject createDocumentFromDomResult(ThreadContext context, Ruby runtime, DOMResult domResult) { - if ("html".equals(domResult.getNode().getFirstChild().getNodeName())) { - HtmlDocument htmlDocument = (HtmlDocument) getNokogiriClass(runtime, "Nokogiri::HTML::Document").allocate(); - htmlDocument.setDocumentNode(context, (Document) domResult.getNode()); - return htmlDocument; - } else { - XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document")); - xmlDocument.setDocumentNode(context, (Document) domResult.getNode()); - return xmlDocument; - } - } - - private Templates getTemplatesFromStreamSource() throws TransformerConfigurationException { - if (stylesheet instanceof RubyString) { - StringReader reader = new StringReader(stylesheet.asJavaString()); - StreamSource xsltStreamSource = new StreamSource(reader); - return factory.newTemplates(xsltStreamSource); - } - return null; + } } - - private static final Pattern HTML_TAG = Pattern.compile("<(%s)*html", Pattern.CASE_INSENSITIVE); - - private static boolean isHtml(CharSequence chunk) { - Matcher match = HTML_TAG.matcher(chunk); - return match.find(); - } - - private IRubyObject createDocumentFromString(ThreadContext context, Ruby runtime, String stringResult) { - IRubyObject[] args = new IRubyObject[4]; - args[0] = stringOrBlank(runtime, stringResult); - args[1] = runtime.getNil(); // url - args[2] = runtime.getNil(); // encoding - RubyClass parse_options = (RubyClass)runtime.getClassFromPath("Nokogiri::XML::ParseOptions"); - if (htmlish) { - args[3] = parse_options.getConstant("DEFAULT_HTML"); - RubyClass htmlDocumentClass = getNokogiriClass(runtime, "Nokogiri::HTML::Document"); - return Helpers.invoke(context, htmlDocumentClass, "parse", args); - } else { - args[3] = parse_options.getConstant("DEFAULT_XML"); - RubyClass xmlDocumentClass = getNokogiriClass(runtime, "Nokogiri::XML::Document"); - XmlDocument xmlDocument = (XmlDocument) Helpers.invoke(context, xmlDocumentClass, "parse", args); - if (((Document)xmlDocument.getNode()).getDocumentElement() == null) { - RubyArray errors = (RubyArray) xmlDocument.getInstanceVariable("@errors"); - Helpers.invoke(context, errors, "<<", args[0]); - } - return xmlDocument; - } + + preader.close(); + pwriter.close(); + + return builder.toString(); + } + + private IRubyObject + createDocumentFromDomResult(ThreadContext context, Ruby runtime, DOMResult domResult) + { + if ("html".equals(domResult.getNode().getFirstChild().getNodeName())) { + return new Html4Document(context.runtime, (Document) domResult.getNode()); + } else { + return new XmlDocument(context.runtime, (Document) domResult.getNode()); } - - private static void argumentTypeCheck(Ruby runtime, IRubyObject arg) { - if (arg instanceof XmlDocument) return; - throw runtime.newArgumentError("argument must be a Nokogiri::XML::Document"); + } + + private Templates + getTemplatesFromStreamSource() throws TransformerConfigurationException + { + if (stylesheet instanceof RubyString) { + StringReader reader = new StringReader(stylesheet.asJavaString()); + StreamSource xsltStreamSource = new StreamSource(reader); + return factory.newTemplates(xsltStreamSource); } - - @JRubyMethod(name = {"registr", "register"}, meta = true) - public static IRubyObject register(ThreadContext context, IRubyObject cls, IRubyObject uri, IRubyObject receiver) { - throw context.getRuntime().newNotImplementedError("Nokogiri::XSLT.register method is not implemented"); - /* When API conflict is solved, this method should be below: - // ThreadContext is used while executing xslt extension function - registry.put("context", context); - registry.put("receiver", receiver); - return context.getRuntime().getNil(); - */ + return null; + } + + private static final Pattern HTML_TAG = Pattern.compile("<(%s)*html", Pattern.CASE_INSENSITIVE); + + private static boolean + isHtml(CharSequence chunk) + { + Matcher match = HTML_TAG.matcher(chunk); + return match.find(); + } + + private IRubyObject + createDocumentFromString(ThreadContext context, Ruby runtime, String stringResult) + { + IRubyObject[] args = new IRubyObject[4]; + args[0] = stringOrBlank(runtime, stringResult); + args[1] = runtime.getNil(); // url + args[2] = runtime.getNil(); // encoding + RubyClass parse_options = (RubyClass)runtime.getClassFromPath("Nokogiri::XML::ParseOptions"); + if (htmlish) { + args[3] = parse_options.getConstant("DEFAULT_HTML"); + RubyClass htmlDocumentClass = getNokogiriClass(runtime, "Nokogiri::HTML4::Document"); + return Helpers.invoke(context, htmlDocumentClass, "parse", args); + } else { + args[3] = parse_options.getConstant("DEFAULT_XML"); + RubyClass xmlDocumentClass = getNokogiriClass(runtime, "Nokogiri::XML::Document"); + XmlDocument xmlDocument = (XmlDocument) Helpers.invoke(context, xmlDocumentClass, "parse", args); + if (((Document)xmlDocument.getNode()).getDocumentElement() == null) { + RubyArray errors = (RubyArray) xmlDocument.getInstanceVariable("@errors"); + Helpers.invoke(context, errors, "<<", args[0]); + } + return xmlDocument; } + } + + private static void + argumentTypeCheck(Ruby runtime, IRubyObject arg) + { + if (arg instanceof XmlDocument) { return; } + throw runtime.newArgumentError("argument must be a Nokogiri::XML::Document"); + } + + @JRubyMethod(name = {"registr", "register"}, meta = true) + public static IRubyObject + register(ThreadContext context, IRubyObject cls, IRubyObject uri, IRubyObject receiver) + { + throw context.getRuntime().newNotImplementedError("Nokogiri::XSLT.register method is not implemented"); + /* When API conflict is solved, this method should be below: + // ThreadContext is used while executing xslt extension function + registry.put("context", context); + registry.put("receiver", receiver); + return context.getRuntime().getNil(); + */ + } } diff --git a/ext/java/nokogiri/internals/ClosedStreamException.java b/ext/java/nokogiri/internals/ClosedStreamException.java index e5da5402a1..3466b5674c 100644 --- a/ext/java/nokogiri/internals/ClosedStreamException.java +++ b/ext/java/nokogiri/internals/ClosedStreamException.java @@ -1,9 +1,12 @@ package nokogiri.internals; @SuppressWarnings("serial") -public class ClosedStreamException extends Exception { +public class ClosedStreamException extends Exception +{ - public ClosedStreamException(String message) { + public + ClosedStreamException(String message) + { super(message); } diff --git a/ext/java/nokogiri/internals/HtmlDomParserContext.java b/ext/java/nokogiri/internals/HtmlDomParserContext.java index 2ed513f9b6..65b3d92a3c 100644 --- a/ext/java/nokogiri/internals/HtmlDomParserContext.java +++ b/ext/java/nokogiri/internals/HtmlDomParserContext.java @@ -1,43 +1,13 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; import static nokogiri.internals.NokogiriHelpers.isNamespace; import static nokogiri.internals.NokogiriHelpers.stringOrNil; -import nokogiri.HtmlDocument; + +import nokogiri.Html4Document; import nokogiri.NokogiriService; import nokogiri.XmlDocument; +import nokogiri.XmlSyntaxError; import org.apache.xerces.xni.Augmentations; import org.apache.xerces.xni.QName; @@ -45,11 +15,12 @@ import org.apache.xerces.xni.XNIException; import org.apache.xerces.xni.parser.XMLDocumentFilter; import org.apache.xerces.xni.parser.XMLParserConfiguration; -import org.cyberneko.html.HTMLConfiguration; -import org.cyberneko.html.filters.DefaultFilter; +import net.sourceforge.htmlunit.cyberneko.HTMLConfiguration; +import net.sourceforge.htmlunit.cyberneko.filters.DefaultFilter; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.Helpers; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; @@ -57,190 +28,225 @@ import org.w3c.dom.NodeList; /** - * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml. - * + * Parser for Html4Document. This class actually parses Html4Document using NekoHtml. + * * @author sergio * @author Patrick Mahoney * @author Yoko Harada */ -public class HtmlDomParserContext extends XmlDomParserContext { +public class HtmlDomParserContext extends XmlDomParserContext +{ + private static final long serialVersionUID = 1L; - public HtmlDomParserContext(Ruby runtime, IRubyObject options) { - super(runtime, options); - } - - public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) { - super(runtime, encoding, options); - } + public + HtmlDomParserContext(Ruby runtime, IRubyObject options) + { + this(runtime, runtime.getNil(), options); + } - @Override - protected void initErrorHandler() { - if (options.strict) { - errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning); - } else { - errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning); - } - } + public + HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) + { + super(runtime, encoding, options); + java_encoding = NokogiriHelpers.getValidEncoding(encoding); + } - @Override - protected void initParser(Ruby runtime) { - XMLParserConfiguration config = new HTMLConfiguration(); - //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter(); - XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler); - //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter}; - XMLDocumentFilter[] filters = { elementValidityCheckFilter}; - - config.setErrorHandler(this.errorHandler); - - parser = new NokogiriDomParser(config); - - // see http://nekohtml.sourceforge.net/settings.html for details - setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding); - setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); - setProperty("http://cyberneko.org/html/properties/names/attrs", "lower"); - setProperty("http://cyberneko.org/html/properties/filters", filters); - setFeature("http://cyberneko.org/html/features/report-errors", true); - setFeature("http://xml.org/sax/features/namespaces", false); - } - - @Override - public void setEncoding(String encoding) { - super.setEncoding(encoding); - } + @Override + protected void + initParser(Ruby runtime) + { + XMLParserConfiguration config = new HTMLConfiguration(); + //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter(); + XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler); + //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter}; + XMLDocumentFilter[] filters = { elementValidityCheckFilter}; + + config.setErrorHandler(this.errorHandler); + + parser = new NokogiriDomParser(config); + + // see http://nekohtml.sourceforge.net/settings.html for details + setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding); + setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); + setProperty("http://cyberneko.org/html/properties/names/attrs", "lower"); + setProperty("http://cyberneko.org/html/properties/filters", filters); + setFeature("http://cyberneko.org/html/features/report-errors", true); + setFeature("http://xml.org/sax/features/namespaces", false); + } + + @Override + public void + setEncoding(String encoding) + { + super.setEncoding(encoding); + } - /** - * Enable NekoHTML feature for balancing tags in a document fragment. - * - * This method is used in XmlNode#in_context method. - */ - public void enableDocumentFragment() { - setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); + /** + * Enable NekoHTML feature for balancing tags in a document fragment. + * + * This method is used in XmlNode#in_context method. + */ + public void + enableDocumentFragment() + { + setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); + } + + @Override + public XmlDocument + parse(ThreadContext context, RubyClass klass, IRubyObject url) + { + XmlDocument xmlDoc = super.parse(context, klass, url); + + // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT) + // https://github.com/sparklemotion/nokogiri/issues/2130 + if (!options.recover && errorHandler.getErrors().size() > 0) { + XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime); + String exceptionMsg = String.format("%s: '%s'", + "Parser without recover option encountered error or warning", + errorHandler.getErrors().get(0)); + xmlSyntaxError.setException(new Exception(exceptionMsg)); + throw xmlSyntaxError.toThrowable(); } - @Override - protected XmlDocument getNewEmptyDocument(ThreadContext context) { - IRubyObject[] args = IRubyObject.NULL_ARRAY; - return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"), args); + return xmlDoc; + } + + @Override + protected XmlDocument + wrapDocument(ThreadContext context, RubyClass klass, Document document) + { + Html4Document htmlDocument = new Html4Document(context.runtime, klass, document); + htmlDocument.setDocumentNode(context.runtime, document); + Helpers.invoke(context, htmlDocument, "initialize"); + + if (ruby_encoding.isNil()) { + // ruby_encoding might have detected by Html4Document::EncodingReader + if (detected_encoding != null && !detected_encoding.isNil()) { + ruby_encoding = detected_encoding; + } else { + // no encoding given & no encoding detected, then try to get it + String charset = tryGetCharsetFromHtml5MetaTag(document); + ruby_encoding = stringOrNil(context.runtime, charset); + } } + htmlDocument.setEncoding(ruby_encoding); + htmlDocument.setParsedEncoding(java_encoding); + return htmlDocument; + } - @Override - protected XmlDocument wrapDocument(ThreadContext context, RubyClass klazz, Document document) { - HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz); - htmlDocument.setDocumentNode(context, document); - if (ruby_encoding.isNil()) { - // ruby_encoding might have detected by HtmlDocument::EncodingReader - if (detected_encoding != null && !detected_encoding.isNil()) { - ruby_encoding = detected_encoding; - } else { - // no encoding given & no encoding detected, then try to get it - String charset = tryGetCharsetFromHtml5MetaTag(document); - ruby_encoding = stringOrNil(context.getRuntime(), charset); + // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset + // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree + // so, this method attempts to find the charset. + private static String + tryGetCharsetFromHtml5MetaTag(Document document) + { + if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) { return null; } + NodeList list = document.getDocumentElement().getChildNodes(); + Node item; + for (int i = 0; i < list.getLength(); i++) { + if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) { + NodeList headers = item.getChildNodes(); + for (int j = 0; j < headers.getLength(); j++) { + if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) { + NamedNodeMap nodeMap = item.getAttributes(); + for (int k = 0; k < nodeMap.getLength(); k++) { + if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) { + return item.getNodeValue(); + } } + } } - htmlDocument.setEncoding(ruby_encoding); - htmlDocument.setParsedEncoding(java_encoding); - return htmlDocument; + } } - - // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset - // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree - // so, this method attempts to find the charset. - private static String tryGetCharsetFromHtml5MetaTag(Document document) { - if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null; - NodeList list = document.getDocumentElement().getChildNodes(); Node item; - for (int i = 0; i < list.getLength(); i++) { - if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) { - NodeList headers = item.getChildNodes(); - for (int j = 0; j < headers.getLength(); j++) { - if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) { - NamedNodeMap nodeMap = item.getAttributes(); - for (int k = 0; k < nodeMap.getLength(); k++) { - if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) { - return item.getNodeValue(); - } - } - } - } - } + return null; + } + + /** + * Filter to strip out attributes that pertain to XML namespaces. + */ + public static class RemoveNSAttrsFilter extends DefaultFilter + { + @Override + public void + startElement(QName element, XMLAttributes attrs, + Augmentations augs) throws XNIException + { + int i; + for (i = 0; i < attrs.getLength(); ++i) { + if (isNamespace(attrs.getQName(i))) { + attrs.removeAttributeAt(i); + --i; } - return null; + } + + element.uri = null; + super.startElement(element, attrs, augs); } + } - /** - * Filter to strip out attributes that pertain to XML namespaces. - */ - public static class RemoveNSAttrsFilter extends DefaultFilter { - @Override - public void startElement(QName element, XMLAttributes attrs, - Augmentations augs) throws XNIException { - int i; - for (i = 0; i < attrs.getLength(); ++i) { - if (isNamespace(attrs.getQName(i))) { - attrs.removeAttributeAt(i); - --i; - } - } + public static class ElementValidityCheckFilter extends DefaultFilter + { + private NokogiriErrorHandler errorHandler; - element.uri = null; - super.startElement(element, attrs, augs); - } + private + ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) + { + this.errorHandler = errorHandler; } - - public static class ElementValidityCheckFilter extends DefaultFilter { - private NokogiriErrorHandler errorHandler; - - private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) { - this.errorHandler = errorHandler; - } - - // element names from xhtml1-strict.dtd - private static String[][] element_names = { - {"a", "abbr", "acronym", "address", "area"}, - {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"}, - {"caption", "cite", "code", "col", "colgroup"}, - {"dd", "del", "dfn", "div", "dl", "dt"}, - {"em"}, - {"fieldset", "font", "form", "frame", "frameset"}, - {}, // g - {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"}, - {"i", "iframe", "img", "input", "ins"}, - {}, // j - {"kbd"}, - {"label", "legend", "li", "link"}, - {"map", "meta"}, - {"noframes", "noscript"}, - {"object", "ol", "optgroup", "option"}, - {"p", "param", "pre"}, - {"q"}, - {}, // r - {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"}, - {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"}, - {"u", "ul"}, - {"var"}, - {}, // w - {}, // x - {}, // y - {} // z - }; - - private static boolean isValid(final String name) { - int index = name.charAt(0) - 97; - if (index >= element_names.length) return false; - String[] elementNames = element_names[index]; - for (int i=0; i= element_names.length) { return false; } + String[] elementNames = element_names[index]; + for (int i = 0; i < elementNames.length; i++) { + if (name.equals(elementNames[i])) { + return true; } + } + return false; + } + + @Override + public void + startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException + { + if (!isValid(name.rawname)) { + errorHandler.addError(new Exception("Tag " + name.rawname + " invalid")); + } + super.startElement(name, attrs, augs); } + } } diff --git a/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java b/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java index a2aa391229..71c0ef173f 100644 --- a/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +++ b/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java @@ -4,17 +4,24 @@ import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; -public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler { +public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler +{ - @Override - public void warning(SAXParseException exception) throws SAXException { - } + @Override + public void + warning(SAXParseException exception) throws SAXException + { + } - @Override - public void error(SAXParseException exception) throws SAXException { - } + @Override + public void + error(SAXParseException exception) throws SAXException + { + } - @Override - public void fatalError(SAXParseException exception) throws SAXException { - } + @Override + public void + fatalError(SAXParseException exception) throws SAXException + { + } } diff --git a/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java b/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java index e77dcea45c..7f6374263a 100644 --- a/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +++ b/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java @@ -23,7 +23,8 @@ * * @author John Shahid */ -public class NokogiriBlockingQueueInputStream extends InputStream { +public class NokogiriBlockingQueueInputStream extends InputStream +{ private final LinkedBlockingQueue queue; protected Task currentTask; protected ByteArrayInputStream currentStream; @@ -32,10 +33,13 @@ public class NokogiriBlockingQueueInputStream extends InputStream { public static final ByteArrayInputStream END = new ByteArrayInputStream(new byte[0]); - private static class Task extends FutureTask { + private static class Task extends FutureTask + { private final ByteArrayInputStream stream; - public Task(ByteArrayInputStream stream) { + public + Task(ByteArrayInputStream stream) + { super(new Callable() { @Override public Void call() throws Exception { @@ -46,28 +50,38 @@ public Void call() throws Exception { this.stream = stream; } - public ByteArrayInputStream getStream() { + public ByteArrayInputStream + getStream() + { return stream; } @Override - public void run() { + public void + run() + { // don't do anything } @Override - public boolean runAndReset() { + public boolean + runAndReset() + { // don't do anything return true; } @Override - public void set(Void v) { + public void + set(Void v) + { super.set(v); } } - public NokogiriBlockingQueueInputStream() { + public + NokogiriBlockingQueueInputStream() + { queue = new LinkedBlockingQueue(); } @@ -77,7 +91,9 @@ public NokogiriBlockingQueueInputStream() { * that the read method will block indefinitely. */ @Override - public synchronized void close() { + public synchronized void + close() + { closed = true; List tasks = new LinkedList(); queue.drainTo(tasks); @@ -98,9 +114,12 @@ public synchronized void close() { * * @return */ - public synchronized Future addChunk(ByteArrayInputStream stream) throws ClosedStreamException { - if (closed) + public synchronized Future + addChunk(ByteArrayInputStream stream) throws ClosedStreamException + { + if (closed) { throw new ClosedStreamException("Cannot add a chunk to a closed stream"); + } Task task = new Task(stream); queue.add(task); return task; @@ -112,10 +131,13 @@ public synchronized Future addChunk(ByteArrayInputStream stream) throws Cl * @see java.io.InputStream#read() */ @Override - public int read() throws IOException { + public int + read() throws IOException + { if (currentTask == null || currentStream.available() == 0) - if (getNextTask() == -1) + if (getNextTask() == -1) { return -1; + } return currentStream.read(); } @@ -125,7 +147,9 @@ public int read() throws IOException { * @see java.io.InputStream#read(byte[], int, int) */ @Override - public int read(byte[] bytes, int off, int len) { + public int + read(byte[] bytes, int off, int len) + { if (currentTask == null || currentStream.available() == 0) { if (getNextTask() == -1) { currentTask.set(null); @@ -135,11 +159,14 @@ public int read(byte[] bytes, int off, int len) { return currentStream.read(bytes, off, len); } - protected int getNextTask() { + protected int + getNextTask() + { while (true) { try { - if (currentTask != null) + if (currentTask != null) { currentTask.set(null); + } currentTask = queue.take(); currentStream = currentTask.getStream(); return currentStream.available() == 0 ? -1 : currentStream.available(); diff --git a/ext/java/nokogiri/internals/NokogiriDomParser.java b/ext/java/nokogiri/internals/NokogiriDomParser.java index 186ff27b5f..b184da5817 100644 --- a/ext/java/nokogiri/internals/NokogiriDomParser.java +++ b/ext/java/nokogiri/internals/NokogiriDomParser.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import java.io.IOException; @@ -53,64 +21,79 @@ * * @author Patrick Mahoney */ -public class NokogiriDomParser extends DOMParser { - protected DOMParser dtd; - protected boolean xInclude; - protected XMLParserConfiguration config; +public class NokogiriDomParser extends DOMParser +{ + protected DOMParser dtd; + protected boolean xInclude; + protected XMLParserConfiguration config; - public NokogiriDomParser(XMLParserConfiguration config) { - super(config); - this.config = config; - initialize(); - } + public + NokogiriDomParser(XMLParserConfiguration config) + { + super(config); + this.config = config; + initialize(); + } + + public + NokogiriDomParser(ParserContext.Options options) + { + xInclude = options.xInclude; + initialize(); + } - public NokogiriDomParser(ParserContext.Options options) { - xInclude = options.xInclude; - initialize(); + protected void + initialize() + { + if (config == null) { + if (xInclude) { + config = new XIncludeParserConfiguration(); + } else { + config = getXMLParserConfiguration(); + } } - protected void initialize() { - if (config == null) { - if (xInclude) { - config = new XIncludeParserConfiguration(); - } else { - config = getXMLParserConfiguration(); - } - } + DTDConfiguration dtdConfig = new DTDConfiguration(); + dtd = new DOMParser(dtdConfig); - DTDConfiguration dtdConfig = new DTDConfiguration(); - dtd = new DOMParser(dtdConfig); + config.setDTDHandler(dtdConfig); + config.setDTDContentModelHandler(dtdConfig); + } - config.setDTDHandler(dtdConfig); - config.setDTDContentModelHandler(dtdConfig); + @Override + public void + parse(InputSource source) throws SAXException, IOException + { + dtd.reset(); + if (xInclude) { + setEntityResolver(new NokogiriXInlcudeEntityResolver(source)); + } + super.parse(source); + Document doc = getDocument(); + if (doc == null) { + throw new RuntimeException("null document"); } - @Override - public void parse(InputSource source) throws SAXException, IOException { - dtd.reset(); - if (xInclude) { - setEntityResolver(new NokogiriXInlcudeEntityResolver(source)); - } - super.parse(source); - Document doc = getDocument(); - if (doc == null) - throw new RuntimeException("null document"); + doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null); + } - doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null); + private static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver + { + InputSource source; + private + NokogiriXInlcudeEntityResolver(InputSource source) + { + this.source = source; } - private static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver { - InputSource source; - private NokogiriXInlcudeEntityResolver(InputSource source) { - this.source = source; - } - - @Override - public InputSource resolveEntity(String publicId, String systemId) - throws SAXException, IOException { - if (systemId != null) source.setSystemId(systemId); - if (publicId != null) source.setPublicId(publicId); - return source; - } + @Override + public InputSource + resolveEntity(String publicId, String systemId) + throws SAXException, IOException + { + if (systemId != null) { source.setSystemId(systemId); } + if (publicId != null) { source.setPublicId(publicId); } + return source; } + } } diff --git a/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java b/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java deleted file mode 100644 index 10f15519b2..0000000000 --- a/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +++ /dev/null @@ -1,107 +0,0 @@ -package nokogiri.internals; - -import java.io.InputStream; - -import org.jruby.Ruby; -import org.jruby.RubyObject; -import org.jruby.exceptions.RaiseException; -import org.jruby.runtime.Helpers; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.jruby.util.ByteList; - -/** - * This class wraps the EncodingReader which act like a rewinding input stream, - * it tries to read the first 1K of data to detect the encoding, but save - * this data in a buffer for the subsequent read. Unfortunately, the EncodingReader - * will behave as expected only if encoding was detected, otherwise, the read data - * won't be stored and EncodingReader will fallback to read directory from the io stream. - * this is kind of lame, since we need to have similar logic in both layers. The alternative - * is to implement the encoding detection similar to the way C-Nokogiri does it; it starts - * parsing assuming encoding is unknown and if encoding is detected it will throw an exception - * causing parsing to stop, in which case we have to intercept the exception and set the encoding. - * Also in this case we don't have to restart the parsing since html/document.rb does that for us. - * - * @author John Shahid - * - */ -public class NokogiriEncodingReaderWrapper extends InputStream { - private final ThreadContext context; - private final IRubyObject encodingReader; - private final Ruby ruby; - private IRubyObject detectedEncoding; - private final byte[] firstChunk = new byte[1024]; - private int firstChunkOff = 0; - private int firstChunkLength = 0; - - public NokogiriEncodingReaderWrapper(ThreadContext context, RubyObject encodingReader) { - this.context = context; - this.encodingReader = encodingReader; - this.ruby = context.getRuntime(); - - if (!Helpers.invoke(context, encodingReader, "respond_to?", ruby.newSymbol("read")).isTrue() - || encodingReader.getInstanceVariable("@io") == null) { - throw ruby.newArgumentError("Argument doesn't respond to read or doesn't have instance variable @io"); - } - } - - public boolean detectEncoding() { - try { - firstChunkLength = read(firstChunk); - } catch (RaiseException e) { - detectedEncoding = e.getException().getInstanceVariable("@found_encoding"); - return true; - } - detectedEncoding = context.nil; - return false; - } - - public IRubyObject getEncoding() { - return detectedEncoding; - } - - @Override - public int read(byte b[]) { - return read(b, 0, b.length); - } - - @Override - public int read(byte b[], int off, int len) { - if (b == null) { - throw new NullPointerException(); - } else if (off < 0 || len < 0 || len > b.length - off) { - throw new IndexOutOfBoundsException(); - } else if (len == 0) { - return 0; - } - - int copyLength = Math.min(firstChunkLength - firstChunkOff, len); - if (copyLength > 0) { - System.arraycopy(firstChunk, firstChunkOff, b, off, copyLength); - len -= copyLength; - firstChunkOff += copyLength; - } - - if (len <= 0) - return copyLength; - - IRubyObject returnValue = encodingReader.callMethod(context, "read", ruby.newFixnum(len)); - if (returnValue.isNil()) - return -1; - - ByteList bytes = returnValue.asString().getByteList(); - int length = bytes.length(); - System.arraycopy(bytes.unsafeBytes(), bytes.getBegin(), b, off + copyLength, length); - return length + copyLength; - } - - @Override - public int read() { - byte[] bytes = new byte[1]; - int count = read(bytes, 0, 1); - if (count < 1) - return count; - return bytes[0]; - } - -} diff --git a/ext/java/nokogiri/internals/NokogiriEntityResolver.java b/ext/java/nokogiri/internals/NokogiriEntityResolver.java index d97da66667..eb9a8b5b3d 100644 --- a/ext/java/nokogiri/internals/NokogiriEntityResolver.java +++ b/ext/java/nokogiri/internals/NokogiriEntityResolver.java @@ -18,104 +18,123 @@ * entity resolution that uses relative paths must be translated * to be relative to the current directory of the Ruby runtime. */ -public class NokogiriEntityResolver implements EntityResolver2 { - protected final Ruby runtime; - private final NokogiriErrorHandler handler; - private final Options options; +public class NokogiriEntityResolver implements EntityResolver2 +{ + protected final Ruby runtime; + private final NokogiriErrorHandler handler; + private final Options options; - public NokogiriEntityResolver(Ruby runtime, NokogiriErrorHandler handler, Options options) { - super(); - this.runtime = runtime; - this.handler = handler; - this.options = options; - } + public + NokogiriEntityResolver(Ruby runtime, NokogiriErrorHandler handler, Options options) + { + super(); + this.runtime = runtime; + this.handler = handler; + this.options = options; + } - @Override - public InputSource getExternalSubset(String name, String baseURI) - throws SAXException, IOException { - return null; - } + @Override + public InputSource + getExternalSubset(String name, String baseURI) + throws SAXException, IOException + { + return null; + } - @Override - public InputSource resolveEntity(String publicId, String systemId) - throws SAXException, IOException { - return resolveEntity(runtime, null, publicId, null, systemId); - } + @Override + public InputSource + resolveEntity(String publicId, String systemId) + throws SAXException, IOException + { + return resolveEntity(runtime, null, publicId, null, systemId); + } - @Override - public InputSource resolveEntity(String name, - String publicId, - String baseURI, - String systemId) - throws SAXException, IOException { - return resolveEntity(runtime, name, publicId, baseURI, systemId); - } + @Override + public InputSource + resolveEntity(String name, + String publicId, + String baseURI, + String systemId) + throws SAXException, IOException + { + return resolveEntity(runtime, name, publicId, baseURI, systemId); + } - private static File join(String parent, String child) { - if (new File(parent).isFile()) { - parent = new File(parent).getParent(); - } - return new File(parent, child); + private static File + join(String parent, String child) + { + if (new File(parent).isFile()) { + parent = new File(parent).getParent(); } + return new File(parent, child); + } - private static InputSource emptyInputSource(InputSource source) { - source.setByteStream(new ByteArrayInputStream(new byte[0])); - return source; - } + private static InputSource + emptyInputSource(InputSource source) + { + source.setByteStream(new ByteArrayInputStream(new byte[0])); + return source; + } - private boolean shouldLoadDtd() { - return options.dtdLoad || options.dtdValid; - } + private boolean + shouldLoadDtd() + { + return options.dtdLoad || options.dtdValid; + } - private void addError(String errorMessage) { - if (handler != null) handler.errors.add(new Exception(errorMessage)); - } + private void + addError(String errorMessage) + { + if (handler != null) { handler.addError(new Exception(errorMessage)); } + } - /** - * Create a file base input source taking into account the current - * directory of runtime. - * @throws SAXException - */ - protected InputSource resolveEntity(Ruby runtime, String name, String publicId, String baseURI, String systemId) - throws IOException, SAXException { - InputSource s = new InputSource(); - if (name.equals("[dtd]") && !shouldLoadDtd()) { - return emptyInputSource(s); - } else if (!name.equals("[dtd]") && !options.noEnt) { - return emptyInputSource(s); - } - String adjustedSystemId; - URI uri = URI.create(systemId); - if (options.noNet && uri.getHost() != null) { - addError("Attempt to load network entity " + systemId); - return emptyInputSource(s); - } - // if this is a url or absolute file name then use it - if (uri.isAbsolute() && !uri.isOpaque()) { - adjustedSystemId = uri.toURL().toString(); - } else if (new File(uri.getPath()).isAbsolute()) { - adjustedSystemId = uri.getPath(); - } else if (baseURI != null) { - URI baseuri = URI.create(baseURI); - if (options.noNet && baseuri.getHost() != null) { - addError("Attempt to load network entity " + systemId); - return emptyInputSource(s); - } - if (baseuri.getHost() == null) { - // this is a local file - adjustedSystemId = join(baseuri.getPath(), uri.getPath()).getCanonicalPath(); - } else { - // this is a url, then resolve uri using baseuri - adjustedSystemId = baseuri.resolve(systemId).toURL().toString(); - } - } else { - // baseURI is null we have to use the current working directory to resolve the entity - String pwd = runtime.getCurrentDirectory(); - adjustedSystemId = join(pwd, uri.getPath()).getCanonicalPath(); - } - s.setSystemId(adjustedSystemId); - s.setPublicId(publicId); - return s; + /** + * Create a file base input source taking into account the current + * directory of runtime. + * @throws SAXException + */ + protected InputSource + resolveEntity(Ruby runtime, String name, String publicId, String baseURI, String systemId) + throws IOException, SAXException + { + InputSource s = new InputSource(); + if (name.equals("[dtd]") && !shouldLoadDtd()) { + return emptyInputSource(s); + } else if (!name.equals("[dtd]") && !options.noEnt) { + return emptyInputSource(s); + } + String adjustedSystemId; + URI uri = URI.create(systemId); + if (options.noNet && uri.getHost() != null) { + addError("Attempt to load network entity " + systemId); + return emptyInputSource(s); + } + // if this is a url or absolute file name then use it + if (uri.isAbsolute() && !uri.isOpaque()) { + adjustedSystemId = uri.toURL().toString(); + } else if (new File(uri.getPath()).isAbsolute()) { + adjustedSystemId = uri.getPath(); + } else if (baseURI != null) { + URI baseuri = URI.create(baseURI); + if (options.noNet && baseuri.getHost() != null) { + addError("Attempt to load network entity " + systemId); + return emptyInputSource(s); + } + if (baseuri.getHost() == null) { + // this is a local file + adjustedSystemId = join(baseuri.getPath(), uri.getPath()).getCanonicalPath(); + } else { + // this is a url, then resolve uri using baseuri + adjustedSystemId = baseuri.resolve(systemId).toURL().toString(); + } + } else { + // baseURI is null we have to use the current working directory to resolve the entity + String pwd = runtime.getCurrentDirectory(); + adjustedSystemId = join(pwd, uri.getPath()).getCanonicalPath(); } + s.setSystemId(adjustedSystemId); + s.setPublicId(publicId); + return s; + } } diff --git a/ext/java/nokogiri/internals/NokogiriErrorHandler.java b/ext/java/nokogiri/internals/NokogiriErrorHandler.java index 7980107a28..9c4683ee48 100644 --- a/ext/java/nokogiri/internals/NokogiriErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriErrorHandler.java @@ -1,69 +1,65 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; -import java.util.ArrayList; -import java.util.List; - +import nokogiri.XmlSyntaxError; import org.apache.xerces.xni.parser.XMLErrorHandler; +import org.jruby.Ruby; +import org.jruby.RubyException; +import org.jruby.exceptions.RaiseException; import org.xml.sax.ErrorHandler; +import java.util.ArrayList; +import java.util.List; + /** * Super class of error handlers. - * + * * XMLErrorHandler is used by nokogiri.internals.HtmlDomParserContext since NekoHtml * uses this type of the error handler. - * + * * @author sergio * @author Yoko Harada */ -public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler { - protected final List errors; - protected boolean noerror; - protected boolean nowarning; +public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler +{ + private final Ruby runtime; + protected final List errors; + protected boolean noerror; + protected boolean nowarning; + + public + NokogiriErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) + { + this.runtime = runtime; + this.errors = new ArrayList(4); + this.noerror = noerror; + this.nowarning = nowarning; + } + + public List + getErrors() { return errors; } - public NokogiriErrorHandler(boolean noerror, boolean nowarning) { - this.errors = new ArrayList(4); - this.noerror = noerror; - this.nowarning = nowarning; - } + public void + addError(Exception ex) + { + addError(XmlSyntaxError.createXMLSyntaxError(runtime, ex)); + } - List getErrors() { return errors; } + public void + addError(RubyException ex) + { + errors.add(ex); + } - public void addError(Exception ex) { errors.add(ex); } + public void + addError(RaiseException ex) + { + addError(ex.getException()); + } - protected boolean usesNekoHtml(String domain) { - return "http://cyberneko.org/html".equals(domain); - } + protected boolean + usesNekoHtml(String domain) + { + return "http://cyberneko.org/html".equals(domain); + } } diff --git a/ext/java/nokogiri/internals/NokogiriHandler.java b/ext/java/nokogiri/internals/NokogiriHandler.java index 5e34be1985..e258d10770 100644 --- a/ext/java/nokogiri/internals/NokogiriHandler.java +++ b/ext/java/nokogiri/internals/NokogiriHandler.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import static nokogiri.internals.NokogiriHelpers.getLocalPart; @@ -63,265 +31,309 @@ * @author sergio * @author Yoko Harada */ -public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler { - - StringBuilder charactersBuilder; - private final Ruby runtime; - private final RubyClass attrClass; - private final IRubyObject object; - - /** - * Stores parse errors with the most-recent error last. - * - * TODO: should these be stored in the document 'errors' array? - * Currently only string messages are stored there. - */ - private final LinkedList errors = new LinkedList(); - - private Locator locator; - private boolean needEmptyAttrCheck; - - public NokogiriHandler(Ruby runtime, IRubyObject object) { - assert object != null; - this.runtime = runtime; - this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute"); - this.object = object; - charactersBuilder = new StringBuilder(); - String objectName = object.getMetaClass().getName(); - if ("Nokogiri::HTML::SAX::Parser".equals(objectName)) needEmptyAttrCheck = true; - } - - @Override - public void skippedEntity(String skippedEntity) { - call("error", runtime.newString("Entity '" + skippedEntity + "' not defined\n")); - } - - @Override - public void setDocumentLocator(Locator locator) { - this.locator = locator; - } - - @Override - public void startDocument() { - call("start_document"); - } - - @Override - public void xmlDecl(String version, String encoding, String standalone) { - call("xmldecl", stringOrNil(runtime, version), stringOrNil(runtime, encoding), stringOrNil(runtime, standalone)); - } - - @Override - public void endDocument() { - populateCharacters(); - call("end_document"); - } - - @Override - public void processingInstruction(String target, String data) { - call("processing_instruction", runtime.newString(target), runtime.newString(data)); - } - - /* - * This calls "start_element_namespace". - * - * Attributes that define namespaces are passed in a separate - * array of [:prefix, :uri] arrays and are not - * passed with the other attributes. - */ - @Override - public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { - final Ruby runtime = this.runtime; - final ThreadContext context = runtime.getCurrentContext(); - - // for attributes other than namespace attrs - RubyArray rubyAttr = RubyArray.newArray(runtime); - // for namespace defining attributes - RubyArray rubyNSAttr = RubyArray.newArray(runtime); - - boolean fromFragmentHandler = false; // isFromFragmentHandler(); - - for (int i = 0; i < attrs.getLength(); i++) { - String u = attrs.getURI(i); - String qn = attrs.getQName(i); - String ln = attrs.getLocalName(i); - String val = attrs.getValue(i); - String pre; - - pre = getPrefix(qn); - if (ln == null || ln.isEmpty()) ln = getLocalPart(qn); - - if (isNamespace(qn) && !fromFragmentHandler) { - // I haven't figured the reason out yet, but, in somewhere, - // namespace is converted to array in array and cause - // TypeError at line 45 in fragment_handler.rb - if (ln.equals("xmlns")) ln = null; - rubyNSAttr.append( runtime.newArray( stringOrNil(runtime, ln), runtime.newString(val) ) ); - } else { - IRubyObject[] args = null; - if (needEmptyAttrCheck) { - if (isEmptyAttr(ln)) { - args = new IRubyObject[] { - stringOrNil(runtime, ln), - stringOrNil(runtime, pre), - stringOrNil(runtime, u) - }; - } - } - if (args == null) { - args = new IRubyObject[] { - stringOrNil(runtime, ln), - stringOrNil(runtime, pre), - stringOrNil(runtime, u), - stringOrNil(runtime, val) - }; - } - - rubyAttr.append( Helpers.invoke(context, attrClass, "new", args) ); - } +public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler +{ + + StringBuilder charactersBuilder; + private final Ruby runtime; + private final RubyClass attrClass; + private final IRubyObject object; + private NokogiriErrorHandler errorHandler; + + private Locator locator; + private boolean needEmptyAttrCheck; + + public + NokogiriHandler(Ruby runtime, IRubyObject object, NokogiriErrorHandler errorHandler) + { + assert object != null; + this.runtime = runtime; + this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute"); + this.object = object; + this.errorHandler = errorHandler; + charactersBuilder = new StringBuilder(); + String objectName = object.getMetaClass().getName(); + if ("Nokogiri::HTML4::SAX::Parser".equals(objectName)) { needEmptyAttrCheck = true; } + } + + @Override + public void + skippedEntity(String skippedEntity) + { + call("error", runtime.newString("Entity '" + skippedEntity + "' not defined\n")); + } + + @Override + public void + setDocumentLocator(Locator locator) + { + this.locator = locator; + } + + @Override + public void + startDocument() + { + call("start_document"); + } + + @Override + public void + xmlDecl(String version, String encoding, String standalone) + { + call("xmldecl", stringOrNil(runtime, version), stringOrNil(runtime, encoding), stringOrNil(runtime, standalone)); + } + + @Override + public void + endDocument() + { + populateCharacters(); + call("end_document"); + } + + @Override + public void + processingInstruction(String target, String data) + { + call("processing_instruction", runtime.newString(target), runtime.newString(data)); + } + + /* + * This calls "start_element_namespace". + * + * Attributes that define namespaces are passed in a separate + * array of [:prefix, :uri] arrays and are not + * passed with the other attributes. + */ + @Override + public void + startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException + { + final Ruby runtime = this.runtime; + final ThreadContext context = runtime.getCurrentContext(); + + // for attributes other than namespace attrs + RubyArray rubyAttr = RubyArray.newArray(runtime); + // for namespace defining attributes + RubyArray rubyNSAttr = RubyArray.newArray(runtime); + + boolean fromFragmentHandler = false; // isFromFragmentHandler(); + + for (int i = 0; i < attrs.getLength(); i++) { + String u = attrs.getURI(i); + String qn = attrs.getQName(i); + String ln = attrs.getLocalName(i); + String val = attrs.getValue(i); + String pre; + + pre = getPrefix(qn); + if (ln == null || ln.isEmpty()) { ln = getLocalPart(qn); } + + if (isNamespace(qn) && !fromFragmentHandler) { + // I haven't figured the reason out yet, but, in somewhere, + // namespace is converted to array in array and cause + // TypeError at line 45 in fragment_handler.rb + if (ln.equals("xmlns")) { ln = null; } + rubyNSAttr.append(runtime.newArray(stringOrNil(runtime, ln), runtime.newString(val))); + } else { + IRubyObject[] args = null; + if (needEmptyAttrCheck) { + if (isEmptyAttr(ln)) { + args = new IRubyObject[] { + stringOrNil(runtime, ln), + stringOrNil(runtime, pre), + stringOrNil(runtime, u) + }; + } } - - if (localName == null || localName.isEmpty()) localName = getLocalPart(qName); - populateCharacters(); - call("start_element_namespace", - stringOrNil(runtime, localName), - rubyAttr, - stringOrNil(runtime, getPrefix(qName)), - stringOrNil(runtime, uri), - rubyNSAttr); - } - - static final Set EMPTY_ATTRS; - static { - final String[] emptyAttrs = { - "checked", "compact", "declare", "defer", "disabled", "ismap", "multiple", - "noresize", "nohref", "noshade", "nowrap", "readonly", "selected" - }; - EMPTY_ATTRS = new HashSet(Arrays.asList(emptyAttrs)); - } - - private static boolean isEmptyAttr(String name) { - return EMPTY_ATTRS.contains(name); - } - - public final Integer getLine() { // -1 if none is available - final int line = locator.getLineNumber(); - return line == -1 ? null : line; - } - - public final Integer getColumn() { // -1 if none is available - final int column = locator.getColumnNumber(); - return column == -1 ? null : column - 1; - } - - @Override - public void endElement(String uri, String localName, String qName) { - populateCharacters(); - call("end_element_namespace", - stringOrNil(runtime, localName), - stringOrNil(runtime, getPrefix(qName)), - stringOrNil(runtime, uri)); - } - - @Override - public void characters(char[] ch, int start, int length) { - charactersBuilder.append(ch, start, length); - } - - @Override - public void comment(char[] ch, int start, int length) { - populateCharacters(); - call("comment", runtime.newString(new String(ch, start, length))); - } - - @Override - public void startCDATA() { - populateCharacters(); - } - - @Override - public void endCDATA() { - call("cdata_block", runtime.newString(charactersBuilder.toString())); - charactersBuilder.setLength(0); - } - - void handleError(SAXParseException ex) { - try { - final String msg = ex.getMessage(); - call("error", runtime.newString(msg == null ? "" : msg)); - addError(new RaiseException(XmlSyntaxError.createError(runtime, ex), true)); - } catch( RaiseException e) { - addError(e); - throw e; + if (args == null) { + args = new IRubyObject[] { + stringOrNil(runtime, ln), + stringOrNil(runtime, pre), + stringOrNil(runtime, u), + stringOrNil(runtime, val) + }; } - } - - @Override - public void error(SAXParseException ex) { - handleError(ex); - } - - @Override - public void fatalError(SAXParseException ex) { - handleError(ex); - } - - @Override - public void warning(SAXParseException ex) { - final String msg = ex.getMessage(); - call("warning", runtime.newString(msg == null ? "" : msg)); - } - protected synchronized void addError(RaiseException e) { - errors.add(e); + rubyAttr.append(Helpers.invoke(context, attrClass, "new", args)); + } } - public synchronized int getErrorCount() { - return errors.size(); + if (localName == null || localName.isEmpty()) { localName = getLocalPart(qName); } + populateCharacters(); + call("start_element_namespace", + stringOrNil(runtime, localName), + rubyAttr, + stringOrNil(runtime, getPrefix(qName)), + stringOrNil(runtime, uri), + rubyNSAttr); + } + + static final Set EMPTY_ATTRS; + static + { + final String[] emptyAttrs = { + "checked", "compact", "declare", "defer", "disabled", "ismap", "multiple", + "noresize", "nohref", "noshade", "nowrap", "readonly", "selected" + }; + EMPTY_ATTRS = new HashSet(Arrays.asList(emptyAttrs)); + } + + private static boolean + isEmptyAttr(String name) + { + return EMPTY_ATTRS.contains(name); + } + + public final Integer + getLine() // -1 if none is available + { + final int line = locator.getLineNumber(); + return line == -1 ? null : line; + } + + public final Integer + getColumn() // -1 if none is available + { + final int column = locator.getColumnNumber(); + return column == -1 ? null : column - 1; + } + + @Override + public void + endElement(String uri, String localName, String qName) + { + populateCharacters(); + call("end_element_namespace", + stringOrNil(runtime, localName), + stringOrNil(runtime, getPrefix(qName)), + stringOrNil(runtime, uri)); + } + + @Override + public void + characters(char[] ch, int start, int length) + { + charactersBuilder.append(ch, start, length); + } + + @Override + public void + comment(char[] ch, int start, int length) + { + populateCharacters(); + call("comment", runtime.newString(new String(ch, start, length))); + } + + @Override + public void + startCDATA() + { + populateCharacters(); + } + + @Override + public void + endCDATA() + { + call("cdata_block", runtime.newString(charactersBuilder.toString())); + charactersBuilder.setLength(0); + } + + void + handleError(SAXParseException ex) + { + try { + final String msg = ex.getMessage(); + call("error", runtime.newString(msg == null ? "" : msg)); + errorHandler.addError(ex); + } catch (RaiseException e) { + errorHandler.addError(e); + throw e; } - - public synchronized RaiseException getLastError() { - return errors.getLast(); - } - - private void call(String methodName) { - ThreadContext context = runtime.getCurrentContext(); - Helpers.invoke(context, document(context), methodName); - } - - private void call(String methodName, IRubyObject argument) { - ThreadContext context = runtime.getCurrentContext(); - Helpers.invoke(context, document(context), methodName, argument); - } - - private void call(String methodName, IRubyObject arg1, IRubyObject arg2) { - ThreadContext context = runtime.getCurrentContext(); - Helpers.invoke(context, document(context), methodName, arg1, arg2); - } - - private void call(String methodName, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3) { - ThreadContext context = runtime.getCurrentContext(); - Helpers.invoke(context, document(context), methodName, arg1, arg2, arg3); - } - - private void call(String methodName, - IRubyObject arg0, - IRubyObject arg1, - IRubyObject arg2, - IRubyObject arg3, - IRubyObject arg4) { - ThreadContext context = runtime.getCurrentContext(); - Helpers.invoke(context, document(context), methodName, arg0, arg1, arg2, arg3, arg4); - } - - private IRubyObject document(ThreadContext context) { - return object.getInstanceVariables().getInstanceVariable("@document"); - } - - protected void populateCharacters() { - if (charactersBuilder.length() > 0) { - call("characters", runtime.newString(charactersBuilder.toString())); - charactersBuilder.setLength(0); - } + } + + @Override + public void + error(SAXParseException ex) + { + handleError(ex); + } + + @Override + public void + fatalError(SAXParseException ex) + { + handleError(ex); + } + + @Override + public void + warning(SAXParseException ex) + { + final String msg = ex.getMessage(); + call("warning", runtime.newString(msg == null ? "" : msg)); + } + + public synchronized int + getErrorCount() + { + return errorHandler.getErrors().size(); + } + + private void + call(String methodName) + { + ThreadContext context = runtime.getCurrentContext(); + Helpers.invoke(context, document(context), methodName); + } + + private void + call(String methodName, IRubyObject argument) + { + ThreadContext context = runtime.getCurrentContext(); + Helpers.invoke(context, document(context), methodName, argument); + } + + private void + call(String methodName, IRubyObject arg1, IRubyObject arg2) + { + ThreadContext context = runtime.getCurrentContext(); + Helpers.invoke(context, document(context), methodName, arg1, arg2); + } + + private void + call(String methodName, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3) + { + ThreadContext context = runtime.getCurrentContext(); + Helpers.invoke(context, document(context), methodName, arg1, arg2, arg3); + } + + private void + call(String methodName, + IRubyObject arg0, + IRubyObject arg1, + IRubyObject arg2, + IRubyObject arg3, + IRubyObject arg4) + { + ThreadContext context = runtime.getCurrentContext(); + Helpers.invoke(context, document(context), methodName, arg0, arg1, arg2, arg3, arg4); + } + + private IRubyObject + document(ThreadContext context) + { + return object.getInstanceVariables().getInstanceVariable("@document"); + } + + protected void + populateCharacters() + { + if (charactersBuilder.length() > 0) { + call("characters", runtime.newString(charactersBuilder.toString())); + charactersBuilder.setLength(0); } + } } diff --git a/ext/java/nokogiri/internals/NokogiriHelpers.java b/ext/java/nokogiri/internals/NokogiriHelpers.java index c642ede862..3efeb90b50 100644 --- a/ext/java/nokogiri/internals/NokogiriHelpers.java +++ b/ext/java/nokogiri/internals/NokogiriHelpers.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2014: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import java.io.ByteArrayInputStream; @@ -39,6 +7,7 @@ import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; +import java.util.List; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -53,11 +22,10 @@ import org.w3c.dom.Attr; import org.w3c.dom.DOMException; import org.w3c.dom.Document; -import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; -import nokogiri.HtmlDocument; +import nokogiri.Html4Document; import nokogiri.NokogiriService; import nokogiri.XmlAttr; import nokogiri.XmlCdata; @@ -74,725 +42,776 @@ /** * A class for various utility methods. - * + * * @author serabe * @author Patrick Mahoney * @author Yoko Harada */ -public class NokogiriHelpers { - public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE"; - public static final String VALID_ROOT_NODE = "NOKOGIRI_VALIDE_ROOT_NODE"; - public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING"; - - public static XmlNode getCachedNode(Node node) { - return (XmlNode) node.getUserData(CACHED_NODE); - } - - public static void clearCachedNode(Node node) { - node.setUserData(CACHED_NODE, null, null); - } - - public static void clearXpathContext(Node node) { - if (node == null) return; - - Node ownerDocument = node.getOwnerDocument(); - if (ownerDocument == null) { - ownerDocument = node; - } - ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null); - } - - /** - * Get the XmlNode associated with the underlying - * node. Creates a new XmlNode (or appropriate subclass) - * or XmlNamespace wrapping node if there is no cached - * value. - */ - public static IRubyObject getCachedNodeOrCreate(Ruby ruby, Node node) { - if(node == null) return ruby.getNil(); - if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) { - XmlDocument xmlDocument = (XmlDocument)node.getOwnerDocument().getUserData(CACHED_NODE); - if (!(xmlDocument instanceof HtmlDocument)) { - String prefix = getLocalNameForNamespace(((Attr)node).getName()); - prefix = prefix != null ? prefix : ""; - String href = ((Attr)node).getValue(); - XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href); - if (xmlNamespace != null) return xmlNamespace; - else return XmlNamespace.createFromAttr(ruby, (Attr)node); - } - } - XmlNode xmlNode = getCachedNode(node); - if(xmlNode == null) { - xmlNode = (XmlNode)constructNode(ruby, node); - node.setUserData(CACHED_NODE, xmlNode, null); - } +public class NokogiriHelpers +{ + public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE"; + public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID"; + public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING"; + + public static XmlNode + getCachedNode(Node node) + { + return (XmlNode) node.getUserData(CACHED_NODE); + } + + public static void + clearCachedNode(Node node) + { + node.setUserData(CACHED_NODE, null, null); + } + + public static void + clearXpathContext(Node node) + { + if (node == null) { return; } + + Node ownerDocument = node.getOwnerDocument(); + if (ownerDocument == null) { + ownerDocument = node; + } + ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null); + } + + /** + * Get the XmlNode associated with the underlying + * node. Creates a new XmlNode (or appropriate subclass) + * or XmlNamespace wrapping node if there is no cached + * value. + */ + public static IRubyObject + getCachedNodeOrCreate(Ruby runtime, Node node) + { + if (node == null) { return runtime.getNil(); } + if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) { + XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE); + if (!(xmlDocument instanceof Html4Document)) { + String prefix = getLocalNameForNamespace(((Attr) node).getName(), null); + String href = ((Attr) node).getValue(); + XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href); + if (xmlNamespace != null) { return xmlNamespace; } + return XmlNamespace.createFromAttr(runtime, (Attr) node); + } + } + XmlNode xmlNode = getCachedNode(node); + if (xmlNode == null) { + xmlNode = (XmlNode) constructNode(runtime, node); + node.setUserData(CACHED_NODE, xmlNode, null); + } + return xmlNode; + } + + /** + * Construct a new XmlNode wrapping node. The proper + * subclass of XmlNode is chosen based on the type of + * node. + */ + public static IRubyObject + constructNode(Ruby runtime, Node node) + { + if (node == null) { return runtime.getNil(); } + // this is slow; need a way to cache nokogiri classes/modules somewhere + switch (node.getNodeType()) { + case Node.ELEMENT_NODE: + XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Element")); + xmlElement.setNode(runtime, node); + return xmlElement; + case Node.ATTRIBUTE_NODE: + XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Attr")); + xmlAttr.setNode(runtime, node); + return xmlAttr; + case Node.TEXT_NODE: + XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Text")); + xmlText.setNode(runtime, node); + return xmlText; + case Node.COMMENT_NODE: + XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Comment")); + xmlComment.setNode(runtime, node); + return xmlComment; + case Node.ENTITY_NODE: + return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node); + case Node.ENTITY_REFERENCE_NODE: + XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, + getNokogiriClass(runtime, "Nokogiri::XML::EntityReference")); + xmlEntityRef.setNode(runtime, node); + return xmlEntityRef; + case Node.PROCESSING_INSTRUCTION_NODE: + XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) + NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::ProcessingInstruction")); + xmlProcessingInstruction.setNode(runtime, node); + return xmlProcessingInstruction; + case Node.CDATA_SECTION_NODE: + XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::CDATA")); + xmlCdata.setNode(runtime, node); + return xmlCdata; + case Node.DOCUMENT_NODE: + XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, + getNokogiriClass(runtime, "Nokogiri::XML::Document")); + xmlDocument.setDocumentNode(runtime, (Document) node); + return xmlDocument; + case Node.DOCUMENT_TYPE_NODE: + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, node); + return xmlDtd; + default: + XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Node")); + xmlNode.setNode(runtime, node); return xmlNode; } - - /** - * Construct a new XmlNode wrapping node. The proper - * subclass of XmlNode is chosen based on the type of - * node. - */ - public static IRubyObject constructNode(Ruby runtime, Node node) { - if (node == null) return runtime.getNil(); - // this is slow; need a way to cache nokogiri classes/modules somewhere - switch (node.getNodeType()) { - case Node.ELEMENT_NODE: - XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Element")); - xmlElement.setNode(runtime.getCurrentContext(), node); - return xmlElement; - case Node.ATTRIBUTE_NODE: - XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Attr")); - xmlAttr.setNode(runtime.getCurrentContext(), node); - return xmlAttr; - case Node.TEXT_NODE: - XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Text")); - xmlText.setNode(runtime.getCurrentContext(), node); - return xmlText; - case Node.COMMENT_NODE: - XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Comment")); - xmlComment.setNode(runtime.getCurrentContext(), node); - return xmlComment; - case Node.ENTITY_NODE: - return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node); - case Node.ENTITY_REFERENCE_NODE: - XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityReference")); - xmlEntityRef.setNode(runtime.getCurrentContext(), node); - return xmlEntityRef; - case Node.PROCESSING_INSTRUCTION_NODE: - XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ProcessingInstruction")); - xmlProcessingInstruction.setNode(runtime.getCurrentContext(), node); - return xmlProcessingInstruction; - case Node.CDATA_SECTION_NODE: - XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::CDATA")); - xmlCdata.setNode(runtime.getCurrentContext(), node); - return xmlCdata; - case Node.DOCUMENT_NODE: - XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document")); - xmlDocument.setDocumentNode(runtime.getCurrentContext(), node); - return xmlDocument; - case Node.DOCUMENT_TYPE_NODE: - XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); - xmlDtd.setNode(runtime, node); - return xmlDtd; - default: - XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Node")); - xmlNode.setNode(runtime.getCurrentContext(), node); - return xmlNode; + } + + public static RubyClass + getNokogiriClass(Ruby ruby, String name) + { + return NokogiriService.getNokogiriClassCache(ruby).get(name); + } + + public static IRubyObject + stringOrNil(Ruby runtime, String str) + { + return str == null ? runtime.getNil() : convertString(runtime, str); + } + + public static IRubyObject + stringOrNil(Ruby runtime, CharSequence str) + { + return str == null ? runtime.getNil() : convertString(runtime, str); + } + + public static IRubyObject + stringOrNil(Ruby runtime, byte[] bytes) + { + return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes); + } + + public static IRubyObject + stringOrBlank(Ruby runtime, String str) + { + return str == null ? runtime.newString() : convertString(runtime, str); + } + + public static RubyString + convertString(Ruby runtime, String str) + { + return RubyString.newUTF8String(runtime, str); + } + + public static RubyString + convertString(Ruby runtime, CharSequence str) + { + return RubyString.newUTF8String(runtime, str); + } + + /** + * Convert s to a RubyString, or if s is null or + * empty return RubyNil. + */ + public static IRubyObject + nonEmptyStringOrNil(Ruby runtime, String s) + { + if (s == null || s.length() == 0) { return runtime.getNil(); } + return RubyString.newString(runtime, s); + } + + /** + * Return the prefix of a qualified name like "prefix:local". + * Returns null if there is no prefix. + */ + public static String + getPrefix(String qName) + { + if (qName == null) { return null; } + + final int pos = qName.indexOf(':'); + return pos > 0 ? qName.substring(0, pos) : null; + } + + /** + * Return the local part of a qualified name like "prefix:local". + * Returns qName if there is no prefix. + */ + public static String + getLocalPart(String qName) + { + if (qName == null) { return null; } + + final int pos = qName.indexOf(':'); + return pos > 0 ? qName.substring(pos + 1) : qName; + } + + public static String + getLocalNameForNamespace(String name, String defValue) + { + String localName = getLocalPart(name); + return ("xmlns".equals(localName)) ? defValue : localName; + } + + public static String + rubyStringToString(IRubyObject str) + { + if (str.isNil()) { return null; } + return str.convertToString().decodeString(); + } + + public static String + rubyStringToString(RubyString str) + { + return str.decodeString(); // if encoding UTF-8 will decode UTF-8 + } + + public static ByteArrayInputStream + stringBytesToStream(final IRubyObject str) + { + if (str instanceof RubyString || str.respondsTo("to_str")) { + final ByteList bytes = str.convertToString().getByteList(); + return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()); + } + return null; + } + + public static String + getNodeCompletePath(Node node) + { + + Node cur, tmp, next; + + String buffer = ""; + + cur = node; + + do { + String name = ""; + String sep = "?"; + int occur = 0; + boolean generic = false; + + if (cur.getNodeType() == Node.DOCUMENT_NODE) { + if (buffer.startsWith("/")) { break; } + + sep = "/"; + next = null; + } else if (cur.getNodeType() == Node.ELEMENT_NODE) { + generic = false; + sep = "/"; + + name = cur.getLocalName(); + if (name == null) { name = cur.getNodeName(); } + if (cur.getNamespaceURI() != null) { + if (cur.getPrefix() != null) { + name = cur.getPrefix() + ":" + name; + } else { + generic = true; + name = "*"; + } } - } - - public static RubyClass getNokogiriClass(Ruby ruby, String name) { - return NokogiriService.getNokogiriClassCache(ruby).get(name); - } - public static IRubyObject stringOrNil(Ruby runtime, String str) { - return str == null ? runtime.getNil() : convertString(runtime, str); - } + next = cur.getParentNode(); - public static IRubyObject stringOrNil(Ruby runtime, CharSequence str) { - return str == null ? runtime.getNil() : convertString(runtime, str); - } - - public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) { - return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes); - } + /* + * Thumbler index computation + */ - public static IRubyObject stringOrBlank(Ruby runtime, String str) { - return str == null ? runtime.newString() : convertString(runtime, str); - } - - public static RubyString convertString(Ruby runtime, String str) { - return RubyString.newUTF8String(runtime, str); - } - - public static RubyString convertString(Ruby runtime, CharSequence str) { - return RubyString.newUTF8String(runtime, str); - } + tmp = cur.getPreviousSibling(); - /** - * Convert s to a RubyString, or if s is null or - * empty return RubyNil. - */ - public static IRubyObject nonEmptyStringOrNil(Ruby runtime, String s) { - if (s == null || s.length() == 0) return runtime.getNil(); - return RubyString.newString(runtime, s); - } - - /** - * Return the prefix of a qualified name like "prefix:local". - * Returns null if there is no prefix. - */ - public static String getPrefix(String qName) { - if (qName == null) return null; - - final int pos = qName.indexOf(':'); - return pos > 0 ? qName.substring(0, pos) : null; - } - - /** - * Return the local part of a qualified name like "prefix:local". - * Returns qName if there is no prefix. - */ - public static String getLocalPart(String qName) { - if (qName == null) return null; - - final int pos = qName.indexOf(':'); - return pos > 0 ? qName.substring(pos + 1) : qName; - } - - public static String getLocalNameForNamespace(String name) { - String localName = getLocalPart(name); - return ("xmlns".equals(localName)) ? null : localName; - } - - private static final Charset UTF8 = Charset.forName("UTF-8"); - - /** - * Converts a RubyString in to a Java String. Assumes the - * RubyString is encoded as UTF-8. This is generally the case for - * RubyStrings created with getRuntime().newString("java string"). - * It also seems to be the case for strings created within Ruby - * where $KCODE has not been set. - * - * Note that RubyString#toString() decodes the string data as - * ISO-8859-1 (See org.jruby.util.ByteList.java). This is not - * what you want if you have any multibyte characters in your - * UTF-8 string. - * - * FIXME: This really needs to be more robust in terms of - * detecting the encoding and properly converting to a Java - * String. It's unfortunate that RubyString#toString() doesn't do - * this for us. - */ - public static String rubyStringToString(IRubyObject str) { - if (str.isNil()) return null; - //return rubyStringToString(str.convertToString()); - return toJavaString(str.convertToString()); - } - - private static String toJavaString(RubyString str) { - return str.decodeString(); // toString() - } - - public static String rubyStringToString(RubyString str) { - ByteList byteList = str.getByteList(); - byte[] data = byteList.unsafeBytes(); - int offset = byteList.begin(); - int len = byteList.length(); - ByteBuffer buf = ByteBuffer.wrap(data, offset, len); - return UTF8.decode(buf).toString(); - } - - public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) { - if (str instanceof RubyString || str.respondsTo("to_str")) { - final ByteList bytes = str.convertToString().getByteList(); - return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()); + while (tmp != null) { + if ((tmp.getNodeType() == Node.ELEMENT_NODE) && + (generic || fullNamesMatch(tmp, cur))) { + occur++; + } + tmp = tmp.getPreviousSibling(); } - return null; - } - public static String getNodeCompletePath(Node node) { - - Node cur, tmp, next; - - // TODO: Rename buffer to path. - String buffer = ""; - - cur = node; - - do { - String name = ""; - String sep = "?"; - int occur = 0; - boolean generic = false; - - if(cur.getNodeType() == Node.DOCUMENT_NODE) { - if(buffer.startsWith("/")) break; - - sep = "/"; - next = null; - } else if(cur.getNodeType() == Node.ELEMENT_NODE) { - generic = false; - sep = "/"; - - name = cur.getLocalName(); - if (name == null) name = cur.getNodeName(); - if(cur.getNamespaceURI() != null) { - if(cur.getPrefix() != null) { - name = cur.getPrefix() + ":" + name; - } else { - generic = true; - name = "*"; - } - } - - next = cur.getParentNode(); - - /* - * Thumbler index computation - */ - - tmp = cur.getPreviousSibling(); - - while(tmp != null) { - if((tmp.getNodeType() == Node.ELEMENT_NODE) && - (generic || fullNamesMatch(tmp, cur))) { - occur++; - } - tmp = tmp.getPreviousSibling(); - } - - if(occur == 0) { - tmp = cur.getNextSibling(); - - while(tmp != null && occur == 0) { - if((tmp.getNodeType() == Node.ELEMENT_NODE) && - (generic || fullNamesMatch(tmp,cur))) { - occur++; - } - tmp = tmp.getNextSibling(); - } - - if(occur != 0) occur = 1; - - } else { - occur++; - } - } else if(cur.getNodeType() == Node.COMMENT_NODE) { - sep = "/"; - name = "comment()"; - next = cur.getParentNode(); - - /* - * Thumbler index computation. - */ - - tmp = cur.getPreviousSibling(); - - while(tmp != null) { - if(tmp.getNodeType() == Node.COMMENT_NODE) { - occur++; - } - tmp = tmp.getPreviousSibling(); - } - - if(occur == 0) { - tmp = cur.getNextSibling(); - while(tmp != null && occur == 0) { - if(tmp.getNodeType() == Node.COMMENT_NODE) { - occur++; - } - tmp = tmp.getNextSibling(); - } - if(occur != 0) occur = 1; - } else { - occur = 1; - } - - } else if(cur.getNodeType() == Node.TEXT_NODE || - cur.getNodeType() == Node.CDATA_SECTION_NODE) { - // I'm here. gist:129 - // http://gist.github.com/144923 - - sep = "/"; - name = "text()"; - next = cur.getParentNode(); - - /* - * Thumbler index computation. - */ - - tmp = cur.getPreviousSibling(); - while(tmp != null) { - if(tmp.getNodeType() == Node.TEXT_NODE || - tmp.getNodeType() == Node.CDATA_SECTION_NODE) { - occur++; - } - tmp = tmp.getPreviousSibling(); - } - - if(occur == 0) { - tmp = cur.getNextSibling(); - - while(tmp != null && occur == 0) { - if(tmp.getNodeType() == Node.TEXT_NODE || - tmp.getNodeType() == Node.CDATA_SECTION_NODE) { - occur++; - } - tmp = tmp.getNextSibling(); - } - } else { - occur++; - } - - } else if(cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) { - sep = "/"; - name = "processing-instruction('"+cur.getLocalName()+"')"; - next = cur.getParentNode(); - - /* - * Thumbler index computation. - */ - - tmp = cur.getParentNode(); - - while(tmp != null) { - if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE && - tmp.getLocalName().equals(cur.getLocalName())) { - occur++; - } - tmp = tmp.getPreviousSibling(); - } - - if(occur == 0) { - tmp = cur.getNextSibling(); - - while(tmp != null && occur == 0) { - if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE && - tmp.getLocalName().equals(cur.getLocalName())){ - occur++; - } - tmp = tmp.getNextSibling(); - } - - if(occur != 0) { - occur = 1; - } - - } else { - occur++; - } - - } else if(cur.getNodeType() == Node.ATTRIBUTE_NODE) { - sep = "/@"; - name = cur.getLocalName(); - - if(cur.getNamespaceURI() != null) { - if(cur.getPrefix() != null) { - name = cur.getPrefix() + ":" + name; - } - } - - next = ((Attr) cur).getOwnerElement(); - - } else { - next = cur.getParentNode(); - } + if (occur == 0) { + tmp = cur.getNextSibling(); - if(occur == 0){ - buffer = sep+name+buffer; - } else { - buffer = sep+name+"["+occur+"]"+buffer; + while (tmp != null && occur == 0) { + if ((tmp.getNodeType() == Node.ELEMENT_NODE) && + (generic || fullNamesMatch(tmp, cur))) { + occur++; } + tmp = tmp.getNextSibling(); + } - cur = next; - - } while(cur != null); - - return buffer; - } - - protected static boolean compareTwoNodes(Node m, Node n) { - return nodesAreEqual(m.getLocalName(), n.getLocalName()) && - nodesAreEqual(m.getPrefix(), n.getPrefix()); - } - - protected static boolean fullNamesMatch(Node a, Node b) { - return a.getNodeName().equals(b.getNodeName()); - } + if (occur != 0) { occur = 1; } - protected static String getFullName(Node n) { - String lname = n.getLocalName(); - String prefix = n.getPrefix(); - if (lname != null) { - if (prefix != null) - return prefix + ":" + lname; - else - return lname; } else { - return n.getNodeName(); + occur++; + } + } else if (cur.getNodeType() == Node.COMMENT_NODE) { + sep = "/"; + name = "comment()"; + next = cur.getParentNode(); + + /* + * Thumbler index computation. + */ + + tmp = cur.getPreviousSibling(); + + while (tmp != null) { + if (tmp.getNodeType() == Node.COMMENT_NODE) { + occur++; + } + tmp = tmp.getPreviousSibling(); } - } - - private static boolean nodesAreEqual(Object a, Object b) { - return (((a == null) && (b == null)) || - ((a != null) && (b != null) && (b.equals(a)))); - } - private static final Pattern encoded_pattern = Pattern.compile("&|>|<| "); - private static final String[] encoded = {"&", ">", "<", " "}; - private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r"); - private static final String[] decoded = {"&", ">", "<", "\r"}; - - private static StringBuffer convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars) { - Matcher matcher = ptn.matcher(input); - boolean result = matcher.find(); - StringBuffer sb = new StringBuffer(input.length() + 8); - while (result) { - String matched = matcher.group(); - String replacement = ""; - for (int i=0; i charsetNames = Charset.availableCharsets().keySet(); - - private static String ignoreInvalidEncoding(Ruby runtime, IRubyObject encoding) { - String givenEncoding = rubyStringToString(encoding); - if (charsetNames.contains(givenEncoding)) return givenEncoding; - else return guessEncoding(); - } + } else if (cur.getNodeType() == Node.ATTRIBUTE_NODE) { + sep = "/@"; + name = cur.getLocalName(); - public static String adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId) { - if (systemId == null) return systemId; - File file = new File(systemId); - if (file.isAbsolute()) return systemId; - String path = resolveSystemId(baseURI, systemId); - if (path != null) return path; - path = resolveSystemId(currentDir, systemId); - if (path != null) return path; - return resolveSystemId(scriptFileName, systemId); - } - - private static String resolveSystemId(String baseName, String systemId) { - if (baseName == null || baseName.length() < 1) return null; - String parentName; - baseName = baseName.replace("%20", " "); - File base = new File(baseName); - if (base.isDirectory()) parentName = baseName; - else parentName = base.getParent(); - if (parentName == null) return null; - if (parentName.toLowerCase().startsWith("file:")) parentName = parentName.substring("file:".length()); - File dtdFile = new File(parentName + "/" + systemId); - if (dtdFile.exists()) return dtdFile.getPath(); - return null; - } - - public static boolean isUTF8(String encoding) { - if (encoding == null) return true; // no need to convert encoding - return Charset.forName(encoding).compareTo(UTF8) == 0; - } - - public static ByteBuffer convertEncoding(Charset output_charset, CharSequence input_string) { - return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters - } - - public static CharSequence convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str) { - if (!(doc instanceof HtmlDocument)) return str; - String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding(); - if (parsed_encoding == null) return str; - String ruby_encoding = rubyStringToString(doc.getEncoding()); - if (ruby_encoding == null) return str; - Charset encoding = Charset.forName(ruby_encoding); - if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) return str; - if (str.length() == 0) return str; // no need to convert - return NokogiriHelpers.nkf(context, encoding, str); - } - - private static final ByteList _Sw = new ByteList(new byte[] { '-','S','w' }, false); - private static final ByteList _Jw = new ByteList(new byte[] { '-','J','w' }, false); - private static final ByteList _Ew = new ByteList(new byte[] { '-','E','w' }, false); - private static final ByteList _Ww = new ByteList(new byte[] { '-','W','w' }, false); - - // This method is used from HTML documents. HTML meta tag with encoding specification - // might appear after non-ascii characters are used. For example, a title tag before - // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag. - // Nokogiri uses NKF library to convert characters correct encoding. This means the method - // works only for JIS/Shift_JIS/EUC-JP. - private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) { - final Ruby runtime = context.getRuntime(); - final ByteList opt; - if (NokogiriHelpers.shift_jis.compareTo(encoding) == 0) opt = _Sw; - else if (NokogiriHelpers.jis.compareTo(encoding) == 0) opt = _Jw; - else if (NokogiriHelpers.euc_jp.compareTo(encoding) == 0) opt = _Ew; - else opt = _Ww; // should not come here. should be treated before this method. - - Class nkfClass; - try { - nkfClass = runtime.getClassLoader().loadClass("org.jruby.RubyNKF"); - } catch (ClassNotFoundException e2) { - return str; - } - Method nkf_method; - try { - nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class); - RubyString r_str = - (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString())); - return NokogiriHelpers.rubyStringToString(r_str); - } catch (SecurityException e) { - return str; - } catch (NoSuchMethodException e) { - return str; - } catch (IllegalArgumentException e) { - return str; - } catch (IllegalAccessException e) { - return str; - } catch (InvocationTargetException e) { - return str; + if (cur.getNamespaceURI() != null) { + if (cur.getPrefix() != null) { + name = cur.getPrefix() + ":" + name; + } } - } - - private static final Charset shift_jis = Charset.forName("Shift_JIS"); - private static final Charset jis = Charset.forName("ISO-2022-JP"); - private static final Charset euc_jp = Charset.forName("EUC-JP"); - - public static boolean shouldEncode(Node text) { - final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING); - return encoded == null || ! encoded; - } - - public static boolean shouldDecode(Node text) { - return !shouldEncode(text); - } - public static NokogiriNamespaceCache getNamespaceCacheFormNode(Node n) { - XmlDocument xmlDoc = (XmlDocument)getCachedNode(n.getOwnerDocument()); - return xmlDoc.getNamespaceCache(); - } - - public static Node renameNode(Node n, String namespaceURI, String qualifiedName) throws DOMException { - Document doc = n.getOwnerDocument(); - NokogiriNamespaceCache nsCache = getNamespaceCacheFormNode(n); - Node result = doc.renameNode(n, namespaceURI, qualifiedName); - if (result != n) { - nsCache.replaceNode(n, result); + next = ((Attr) cur).getOwnerElement(); + + } else { + next = cur.getParentNode(); + } + + if (occur == 0) { + buffer = sep + name + buffer; + } else { + buffer = sep + name + "[" + occur + "]" + buffer; + } + + cur = next; + + } while (cur != null); + + return buffer; + } + + static boolean + compareTwoNodes(Node m, Node n) + { + return nodesAreEqual(m.getLocalName(), n.getLocalName()) && + nodesAreEqual(m.getPrefix(), n.getPrefix()); + } + + private static boolean + nodesAreEqual(Object a, Object b) + { + return (((a == null) && (b == null)) || + ((a != null) && (b != null) && (b.equals(a)))); + } + + private static boolean + fullNamesMatch(Node a, Node b) + { + return a.getNodeName().equals(b.getNodeName()); + } + + private static final Pattern encoded_pattern = Pattern.compile("&|>|<| "); + private static final String[] encoded = {"&", ">", "<", " "}; + private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r"); + private static final String[] decoded = {"&", ">", "<", "\r"}; + + private static StringBuffer + convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars) + { + Matcher matcher = ptn.matcher(input); + boolean result = matcher.find(); + StringBuffer sb = new StringBuffer(input.length() + 8); + while (result) { + String matched = matcher.group(); + String replacement = ""; + for (int i = 0; i < oldChars.length; i++) { + if (matched.contains(oldChars[i])) { + replacement = matched.replace(oldChars[i], newChars[i]); + break; } - return result; - } + } + matcher.appendReplacement(sb, replacement); + result = matcher.find(); + } + matcher.appendTail(sb); + return sb; + } + + public static CharSequence + encodeJavaString(CharSequence str) + { + return convert(decoded_pattern, str, decoded, encoded); + } + + public static CharSequence + decodeJavaString(CharSequence str) + { + return convert(encoded_pattern, str, encoded, decoded); + } + + public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/"; + public static boolean + isNamespace(Node node) + { + return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName())); + } + + public static boolean + isNamespace(String nodeName) + { + return (nodeName.startsWith("xmlns")); + } + + public static boolean + isNonDefaultNamespace(Node node) + { + return (isNamespace(node) && ! "xmlns".equals(node.getNodeName())); + } + + public static boolean + isXmlBase(String attrName) + { + return "xml:base".equals(attrName) || "xlink:href".equals(attrName); + } + + public static boolean + isBlank(IRubyObject obj) + { + if (!(obj instanceof XmlText)) { return false; } + + CharSequence content = ((XmlNode) obj).getContentImpl(); + return content == null || isBlank(content); + } + + public static boolean + isBlank(CharSequence str) + { + int len = str.length(); + int beg = 0; + while ((beg < len) && (str.charAt(beg) <= ' ')) { beg++; } + return beg == len; + } + + public static boolean + isBlank(String str) + { + return str.isEmpty() || isBlank((CharSequence) str); + } + + public static boolean + isNullOrEmpty(String str) + { + return str == null || str.isEmpty(); + } + + public static CharSequence + canonicalizeWhitespace(CharSequence str) + { + final int len = str.length(); + StringBuilder sb = new StringBuilder(len); + boolean newline_added = false; + for (int i = 0; i < len; i++) { + char c = str.charAt(i); + if (c == '\n') { + if (! newline_added) { + sb.append(c); + newline_added = true; + } + } else { + sb.append(c); + } + } + return sb; + } + + public static String + newQName(String newPrefix, Node node) + { + String tagName = getLocalPart(node.getNodeName()); + if (newPrefix == null) { return tagName; } + return newPrefix + ':' + tagName; + } + + public static IRubyObject[] + nodeListToRubyArray(Ruby runtime, NodeList nodes) + { + IRubyObject[] array = new IRubyObject[nodes.getLength()]; + for (int i = 0; i < nodes.getLength(); i++) { + array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i)); + } + return array; + } + + public static IRubyObject[] + nodeListToArray(Ruby ruby, List nodes) + { + IRubyObject[] result = new IRubyObject[nodes.size()]; + for (int i = 0; i < result.length; i++) { + result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i)); + } + return result; + } + + public static RubyArray + nodeArrayToRubyArray(Ruby ruby, Node[] nodes) + { + RubyArray n = RubyArray.newArray(ruby, nodes.length); + for (int i = 0; i < nodes.length; i++) { + n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i])); + } + return n; + } + + public static String + getValidEncodingOrNull(IRubyObject encoding) + { + if (encoding.isNil()) { return null; } // charsetNames does not like contains(null) + String enc = rubyStringToString(encoding.convertToString()); + if (CharsetNames.contains(enc)) { return enc; } + return null; + } + + public static String + getValidEncoding(IRubyObject encoding) + { + String validEncoding = getValidEncodingOrNull(encoding); + if (validEncoding != null) { return validEncoding; } + return Charset.defaultCharset().name(); + } + + private static final Set CharsetNames = Charset.availableCharsets().keySet(); + + public static String + adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId) + { + if (systemId == null) { return systemId; } + File file = new File(systemId); + if (file.isAbsolute()) { return systemId; } + String path = resolveSystemId(baseURI, systemId); + if (path != null) { return path; } + path = resolveSystemId(currentDir, systemId); + if (path != null) { return path; } + return resolveSystemId(scriptFileName, systemId); + } + + private static String + resolveSystemId(String baseName, String systemId) + { + if (baseName == null || baseName.length() < 1) { return null; } + String parentName; + baseName = baseName.replace("%20", " "); + File base = new File(baseName); + if (base.isDirectory()) { parentName = baseName; } + else { parentName = base.getParent(); } + if (parentName == null) { return null; } + if (parentName.toLowerCase().startsWith("file:")) { parentName = parentName.substring("file:".length()); } + File dtdFile = new File(parentName + "/" + systemId); + if (dtdFile.exists()) { return dtdFile.getPath(); } + return null; + } + + private static final Charset UTF8 = Charset.forName("UTF-8"); + + public static boolean + isUTF8(String encoding) + { + if (encoding == null) { return true; } // no need to convert encoding + + if ("UTF-8".equals(encoding)) { return true; } + return UTF8.aliases().contains(encoding); + } + + public static ByteBuffer + convertEncoding(Charset output_charset, CharSequence input_string) + { + return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters + } + + public static CharSequence + convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str) + { + if (!(doc instanceof Html4Document)) { return str; } + String parsed_encoding = ((Html4Document)doc).getPraedEncoding(); + if (parsed_encoding == null) { return str; } + String ruby_encoding = rubyStringToString(doc.getEncoding()); + if (ruby_encoding == null) { return str; } + Charset encoding = Charset.forName(ruby_encoding); + if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) { return str; } + if (str.length() == 0) { return str; } // no need to convert + return NokogiriHelpers.nkf(context, encoding, str); + } + + private static final ByteList _Sw = new ByteList(new byte[] { '-', 'S', 'w' }, false); + private static final ByteList _Jw = new ByteList(new byte[] { '-', 'J', 'w' }, false); + private static final ByteList _Ew = new ByteList(new byte[] { '-', 'E', 'w' }, false); + private static final ByteList _Ww = new ByteList(new byte[] { '-', 'W', 'w' }, false); + + // This method is used from HTML documents. HTML meta tag with encoding specification + // might appear after non-ascii characters are used. For example, a title tag before + // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag. + // Nokogiri uses NKF library to convert characters correct encoding. This means the method + // works only for JIS/Shift_JIS/EUC-JP. + private static CharSequence + nkf(ThreadContext context, Charset encoding, CharSequence str) + { + final Ruby runtime = context.getRuntime(); + final ByteList opt; + if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) { opt = _Sw; } + else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) { opt = _Jw; } + else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) { opt = _Ew; } + else { opt = _Ww; } // should not come here. should be treated before this method. + + Class nkfClass; + try { + nkfClass = Ruby.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF"); + } catch (ClassNotFoundException e1) { + return str; + } + Method nkf_method; + try { + nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class); + RubyString r_str = + (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString())); + return NokogiriHelpers.rubyStringToString(r_str); + } catch (SecurityException e) { + return str; + } catch (NoSuchMethodException e) { + return str; + } catch (IllegalArgumentException e) { + return str; + } catch (IllegalAccessException e) { + return str; + } catch (InvocationTargetException e) { + return str; + } + } + + private static final Charset Shift_JIS = Charset.forName("Shift_JIS"); + private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS + private static final Charset EUC_JP = Charset.forName("EUC-JP"); + + public static boolean + shouldEncode(Node text) + { + final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING); + return encoded == null || ! encoded; + } + + public static boolean + shouldDecode(Node text) + { + return !shouldEncode(text); + } + + public static NokogiriNamespaceCache + getNamespaceCache(Node node) + { + XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument()); + return xmlDoc.getNamespaceCache(); + } + + public static Node + renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException + { + Document doc = node.getOwnerDocument(); + NokogiriNamespaceCache nsCache = getNamespaceCache(node); + Node result = doc.renameNode(node, namespaceURI, qualifiedName); + if (result != node) { + nsCache.replaceNode(node, result); + } + return result; + } } diff --git a/ext/java/nokogiri/internals/NokogiriNamespaceCache.java b/ext/java/nokogiri/internals/NokogiriNamespaceCache.java index 7cb332ccd1..e2ef7a9755 100644 --- a/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +++ b/ext/java/nokogiri/internals/NokogiriNamespaceCache.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2014: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import static nokogiri.internals.NokogiriHelpers.isNamespace; @@ -46,140 +14,215 @@ import org.w3c.dom.Node; /** - * Cache of namespages of each node. XmlDocument has one cache of this class. - * + * Cache of namespaces of each node. XmlDocument has one cache of this class. + * * @author sergio * @author Yoko Harada */ -public class NokogiriNamespaceCache { - - private List keys; - private Map cache; // pair of the index of a given key and entry - private XmlNamespace defaultNamespace = null; - - public NokogiriNamespaceCache() { - keys = new ArrayList(); // keys are [prefix, href] - cache = new LinkedHashMap(); +public class NokogiriNamespaceCache +{ + + private final Map cache; // pair of the index of a given key and entry + private XmlNamespace defaultNamespace = null; + + public + NokogiriNamespaceCache() + { + this.cache = new LinkedHashMap(4); + } + + public + NokogiriNamespaceCache(NokogiriNamespaceCache cache) + { + this.cache = new LinkedHashMap(cache.size() + 2); + this.cache.putAll(cache.cache); + } + + public XmlNamespace + getDefault() + { + return defaultNamespace; + } + + public XmlNamespace + get(String prefix, String href) + { + if (href == null) { return null; } + + CacheEntry value = cache.get(new CacheKey(prefix, href)); + return value == null ? null : value.namespace; + } + + public XmlNamespace + get(Node node, String prefix) + { + if (prefix == null) { return defaultNamespace; } + for (Map.Entry entry : cache.entrySet()) { + if (entry.getKey().prefix.equals(prefix)) { + if (entry.getValue().isOwner(node)) { + return entry.getValue().namespace; + } + } } - - public XmlNamespace getDefault() { - return defaultNamespace; + return null; + } + + public List + get(String prefix) + { + List namespaces = new ArrayList(); + if (prefix == null) { + namespaces.add(defaultNamespace); + return namespaces; } - - private String[] getKey(String prefix, String href) { - for (String[] key : keys) { - if (key[0].equals(prefix) && key[1].equals(href)) return key; - } - return null; + for (Map.Entry entry : cache.entrySet()) { + if (entry.getKey().prefix.equals(prefix)) { + namespaces.add(entry.getValue().namespace); + } } - - public XmlNamespace get(String prefix, String href) { - // prefix should not be null. - // In case of a default namespace, an empty string should be given to prefix argument. - if (prefix == null || href == null) return null; - String[] key = getKey(prefix, href); - if (key != null) { - return cache.get(key).namespace; - } - return null; + return namespaces; + } + + public List + get(Node node) + { + List namespaces = new ArrayList(); + for (Map.Entry entry : cache.entrySet()) { + if (entry.getValue().isOwner(node)) { + namespaces.add(entry.getValue().namespace); + } } - - public XmlNamespace get(Node node, String prefix) { - if (prefix == null) return defaultNamespace; - for (String[] key : keys) { - if (key[0].equals(prefix) && cache.get(key) != null && cache.get(key).isOwner(node)) { - return cache.get(key).namespace; - } + return namespaces; + } + + public void + put(XmlNamespace namespace, Node ownerNode) + { + String prefix = namespace.getPrefix(); + String href = namespace.getHref(); + if (href == null) { return; } + + CacheKey key = new CacheKey(prefix, href); + if (cache.get(key) != null) { return; } + cache.put(key, new CacheEntry(namespace, ownerNode)); + if ("".equals(prefix)) { defaultNamespace = namespace; } + } + + public void + remove(Node ownerNode) + { + String prefix = ownerNode.getPrefix(); + String href = ownerNode.getNamespaceURI(); + if (href == null) { return; } + + cache.remove(new CacheKey(prefix, href)); + } + + public int + size() + { + return cache.size(); + } + + public void + clear() + { + // removes namespace declarations from node + for (CacheEntry entry : cache.values()) { + NamedNodeMap attributes = entry.ownerNode.getAttributes(); + for (int j = 0; j < attributes.getLength(); j++) { + String name = ((Attr) attributes.item(j)).getName(); + if (isNamespace(name)) { + attributes.removeNamedItem(name); } - return null; + } } - - public List get(String prefix) { - List namespaces = new ArrayList(); - if (prefix == null) { - namespaces.add(defaultNamespace); - return namespaces; - } - for (String[] key : keys) { - if (key[0].equals(prefix) && cache.get(key) != null) { - namespaces.add(cache.get(key).namespace); - } - } - return namespaces; + cache.clear(); + defaultNamespace = null; + } + + public void + replaceNode(Node oldNode, Node newNode) + { + for (Map.Entry entry : cache.entrySet()) { + if (entry.getValue().isOwner(oldNode)) { + entry.getValue().replaceOwner(newNode); + } } - - public List get(Node node) { - List namespaces = new ArrayList(); - for (String[] key : keys) { - CacheEntry entry = cache.get(key); - if (entry.isOwner(node)) { - namespaces.add(entry.namespace); - } - } - return namespaces; + } + + @Override + public String + toString() + { + return getClass().getName() + '@' + Integer.toHexString(hashCode()) + '(' + cache + "default=" + defaultNamespace + ')'; + } + + private static class CacheKey + { + final String prefix; + final String href; + + CacheKey(String prefix, String href) + { + this.prefix = prefix; + this.href = href; } - public void put(XmlNamespace namespace, Node ownerNode) { - // prefix should not be null. - // In case of a default namespace, an empty string should be given to prefix argument. - String prefixString = namespace.getPrefix(); - String hrefString = namespace.getHref(); - if (getKey(prefixString, hrefString) != null) return; - String[] key = {prefixString, hrefString}; - keys.add(key); - CacheEntry entry = new CacheEntry(namespace, ownerNode); - cache.put(key, entry); - if ("".equals(prefixString)) defaultNamespace = namespace; + @Override + public boolean + equals(final Object obj) + { + if (obj instanceof CacheKey) { + CacheKey that = (CacheKey) obj; + return prefix == null ? that.prefix == null : prefix.equals(that.prefix) && href.equals(that.href); + } + return false; } - public void remove(String prefix, String href) { - String[] key = getKey(prefix, href); - if (key == null) return; - keys.remove(key); - cache.remove(key); + @Override + public int + hashCode() + { + return (prefix == null ? 0 : prefix.hashCode()) + 37 * href.hashCode(); } - public void clear() { - // removes namespace declarations from node - for (String[] key : cache.keySet()) { - CacheEntry entry = cache.get(key); - NamedNodeMap attributes = entry.ownerNode.getAttributes(); - for (int j=0; j * */ -public final class NokogiriNamespaceContext implements NamespaceContext { +public final class NokogiriNamespaceContext implements NamespaceContext +{ - public static final String NOKOGIRI_PREFIX = "nokogiri"; - public static final String NOKOGIRI_URI = "http://www.nokogiri.org/default_ns/ruby/extensions_functions"; - public static final String NOKOGIRI_TEMPORARY_ROOT_TAG = "nokogiri-temporary-root-tag"; - - private final Map register; + /* + * these constants have matching declarations in + * ext/nokogiri/xml_xpath_context.c + */ + public static final String NOKOGIRI_PREFIX = "nokogiri"; + public static final String NOKOGIRI_URI = "http://www.nokogiri.org/default_ns/ruby/extensions_functions"; - public static NokogiriNamespaceContext create() { - return new NokogiriNamespaceContext(); + public static final String NOKOGIRI_BUILTIN_PREFIX = "nokogiri-builtin"; + public static final String NOKOGIRI_BUILTIN_URI = "https://www.nokogiri.org/default_ns/ruby/builtins"; + + private final Map register; + + public static NokogiriNamespaceContext + create() + { + return new NokogiriNamespaceContext(); + } + + private + NokogiriNamespaceContext() + { + register = new HashMap(6, 1); + register.put(NOKOGIRI_PREFIX, NOKOGIRI_URI); + register.put(NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI); + register.put("xml", "http://www.w3.org/XML/1998/namespace"); + register.put("xhtml", "http://www.w3.org/1999/xhtml"); + } + + public String + getNamespaceURI(String prefix) + { + if (prefix == null) { + throw new IllegalArgumentException(); } - - private NokogiriNamespaceContext() { - register = new HashMap(6, 1); - register.put(NOKOGIRI_PREFIX, NOKOGIRI_URI); - register.put("xml", "http://www.w3.org/XML/1998/namespace"); - register.put("xhtml", "http://www.w3.org/1999/xhtml"); + String uri = this.register.get(prefix); + if (uri != null) { + return uri; } - public String getNamespaceURI(String prefix) { - if (prefix == null) { - throw new IllegalArgumentException(); - } - String uri = this.register.get(prefix); - if (uri != null) { - return uri; - } - - if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) { - uri = this.register.get(XMLConstants.XMLNS_ATTRIBUTE); - return (uri == null) ? XMLConstants.XMLNS_ATTRIBUTE_NS_URI : uri; - } - - return XMLConstants.NULL_NS_URI; + if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) { + uri = this.register.get(XMLConstants.XMLNS_ATTRIBUTE); + return (uri == null) ? XMLConstants.XMLNS_ATTRIBUTE_NS_URI : uri; } - public String getPrefix(String uri) { - if (uri == null) { - throw new IllegalArgumentException("uri is null"); - } - Set> entries = register.entrySet(); - for (Entry entry : entries) { - if (uri.equals(entry.getValue())) { - return entry.getKey(); - } - } - return null; - } + return XMLConstants.NULL_NS_URI; + } - public Iterator getPrefixes(String uri) { - Set> entries = register.entrySet(); - ArrayList list = new ArrayList(entries.size()); - for (Entry entry : entries) { - if (uri.equals(entry.getValue())) { - list.add(entry.getKey()); - } - } - return list.iterator(); + public String + getPrefix(String uri) + { + if (uri == null) { + throw new IllegalArgumentException("uri is null"); } - - public Set getAllPrefixes() { - return register.keySet(); + Set> entries = register.entrySet(); + for (Entry entry : entries) { + if (uri.equals(entry.getValue())) { + return entry.getKey(); + } } + return null; + } - public void registerNamespace(String prefix, String uri) { - if ("xmlns".equals(prefix)) prefix = ""; - register.put(prefix, uri); + public Iterator + getPrefixes(String uri) + { + Set> entries = register.entrySet(); + ArrayList list = new ArrayList(entries.size()); + for (Entry entry : entries) { + if (uri.equals(entry.getValue())) { + list.add(entry.getKey()); + } } + return list.iterator(); + } + + public Set + getAllPrefixes() + { + return register.keySet(); + } + + public void + registerNamespace(String prefix, String uri) + { + if ("xmlns".equals(prefix)) { prefix = ""; } + register.put(prefix, uri); + } } diff --git a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java index 15b622c522..bc907ddf95 100644 --- a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java @@ -1,100 +1,86 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** * Error Handler for XML document when recover is true (default). - * + * * @author sergio * @author Yoko Harada */ -public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler{ - public NokogiriNonStrictErrorHandler(boolean noerror, boolean nowarning) { - super(noerror, nowarning); - } +public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler +{ + public + NokogiriNonStrictErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) + { + super(runtime, noerror, nowarning); + } - public void warning(SAXParseException ex) throws SAXException { - errors.add(ex); - } + public void + warning(SAXParseException ex) throws SAXException + { + addError(ex); + } - public void error(SAXParseException ex) throws SAXException { - errors.add(ex); - } + public void + error(SAXParseException ex) throws SAXException + { + addError(ex); + } - public void fatalError(SAXParseException ex) throws SAXException { - // fix #837 - // Xerces won't skip the reference entity (and other invalid) constructs - // found in the prolog, instead it will keep calling this method and we'll - // keep inserting the error in the document errors array until we run - // out of memory - errors.add(ex); - String message = ex.getMessage(); + public void + fatalError(SAXParseException ex) throws SAXException + { + // fix #837 + // Xerces won't skip the reference entity (and other invalid) constructs + // found in the prolog, instead it will keep calling this method and we'll + // keep inserting the error in the document errors array until we run + // out of memory + addError(ex); + String message = ex.getMessage(); - // The problem with Xerces is that some errors will cause the - // parser not to advance the reader and it will keep reporting - // the same error over and over, which will cause the parser - // to enter an infinite loop unless we throw the exception. - if (message != null && isFatal(message)) { - throw ex; - } + // The problem with Xerces is that some errors will cause the + // parser not to advance the reader and it will keep reporting + // the same error over and over, which will cause the parser + // to enter an infinite loop unless we throw the exception. + if (message != null && isFatal(message)) { + throw ex; } + } - public void error(String domain, String key, XMLParseException e) { - errors.add(e); - } + public void + error(String domain, String key, XMLParseException e) + { + addError(e); + } - public void fatalError(String domain, String key, XMLParseException e) { - errors.add(e); - } + public void + fatalError(String domain, String key, XMLParseException e) + { + addError(e); + } - public void warning(String domain, String key, XMLParseException e) { - errors.add(e); - } + public void + warning(String domain, String key, XMLParseException e) + { + addError(e); + } - /* - * Determine whether this is a fatal error that should cause - * the parsing to stop, or an error that can be ignored. - */ - private static boolean isFatal(String msg) { - String msgLowerCase = msg.toLowerCase(); - return - msgLowerCase.contains("in prolog") || - msgLowerCase.contains("limit") || - msgLowerCase.contains("preceding the root element must be well-formed") || - msgLowerCase.contains("following the root element must be well-formed"); - } + /* + * Determine whether this is a fatal error that should cause + * the parsing to stop, or an error that can be ignored. + */ + private static boolean + isFatal(String msg) + { + String msgLowerCase = msg.toLowerCase(); + return + msgLowerCase.contains("in prolog") || + msgLowerCase.contains("limit") || + msgLowerCase.contains("preceding the root element must be well-formed") || + msgLowerCase.contains("following the root element must be well-formed"); + } } diff --git a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java index 011af24326..152ee4657c 100644 --- a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +++ b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java @@ -1,121 +1,107 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** * Non-strict error handler for NekoHtml. - * + * * NekoHtml adds too many warnings, which makes later processing hard. For example, - * Nokogiri wants to know whether number of errors have been increased or not to judge - * availability of creating NodeSet from a given fragment. When the fragment nodes + * Nokogiri wants to know whether number of errors have been increased or not to judge + * availability of creating NodeSet from a given fragment. When the fragment nodes * are to be created from HTML document, which means NekoHtml is used, always errors * increases. As a result, even though the given fragment is correct HTML, NodeSet * base on the given fragment won't be created. This is why all warnings are eliminated. - * + * * @author Yoko Harada */ -public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler { - - public NokogiriNonStrictErrorHandler4NekoHtml(boolean nowarning) { - super(false, nowarning); - } - - public NokogiriNonStrictErrorHandler4NekoHtml(boolean noerror, boolean nowarning) { - super(noerror, nowarning); - } +public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler +{ + + public + NokogiriNonStrictErrorHandler4NekoHtml(Ruby runtime, boolean nowarning) + { + super(runtime, false, nowarning); + } + + public + NokogiriNonStrictErrorHandler4NekoHtml(Ruby runtime, boolean noerror, boolean nowarning) + { + super(runtime, noerror, nowarning); + } - public void warning(SAXParseException ex) throws SAXException { - //noop. NekoHtml adds too many warnings. - } + public void + warning(SAXParseException ex) throws SAXException + { + //noop. NekoHtml adds too many warnings. + } - public void error(SAXParseException ex) throws SAXException { - errors.add(ex); - } + public void + error(SAXParseException ex) throws SAXException + { + addError(ex); + } - public void fatalError(SAXParseException ex) throws SAXException { - errors.add(ex); - } + public void + fatalError(SAXParseException ex) throws SAXException + { + addError(ex); + } - /** - * Implementation of org.apache.xerces.xni.parser.XMLErrorHandler. This method - * is invoked during parsing fired by HtmlDomParserContext and is a NekoHtml requirement. - * - * @param domain The domain of the error. The domain can be any string but is - * suggested to be a valid URI. The domain can be used to conveniently - * specify a web site location of the relevant specification or - * document pertaining to this warning. - * @param key The error key. This key can be any string and is implementation - * dependent. - * @param e Exception. - */ - public void error(String domain, String key, XMLParseException e) { - errors.add(e); - } + /** + * Implementation of org.apache.xerces.xni.parser.XMLErrorHandler. This method + * is invoked during parsing fired by HtmlDomParserContext and is a NekoHtml requirement. + * + * @param domain The domain of the error. The domain can be any string but is + * suggested to be a valid URI. The domain can be used to conveniently + * specify a web site location of the relevant specification or + * document pertaining to this warning. + * @param key The error key. This key can be any string and is implementation + * dependent. + * @param e Exception. + */ + public void + error(String domain, String key, XMLParseException e) + { + addError(e); + } - /** - * Implementation of org.apache.xerces.xni.parser.XMLErrorHandler. This method - * is invoked during parsing fired by HtmlDomParserContext and is a NekoHtml requirement. - * - * @param domain The domain of the fatal error. The domain can be any string but is - * suggested to be a valid URI. The domain can be used to conveniently - * specify a web site location of the relevant specification or - * document pertaining to this warning. - * @param key The fatal error key. This key can be any string and is implementation - * dependent. - * @param e Exception. - */ - public void fatalError(String domain, String key, XMLParseException e) { - errors.add(e); - } + /** + * Implementation of org.apache.xerces.xni.parser.XMLErrorHandler. This method + * is invoked during parsing fired by HtmlDomParserContext and is a NekoHtml requirement. + * + * @param domain The domain of the fatal error. The domain can be any string but is + * suggested to be a valid URI. The domain can be used to conveniently + * specify a web site location of the relevant specification or + * document pertaining to this warning. + * @param key The fatal error key. This key can be any string and is implementation + * dependent. + * @param e Exception. + */ + public void + fatalError(String domain, String key, XMLParseException e) + { + addError(e); + } - /** - * Implementation of org.apache.xerces.xni.parser.XMLErrorHandler. This method - * is invoked during parsing fired by HtmlDomParserContext and is a NekoHtml requirement. - * - * @param domain The domain of the warning. The domain can be any string but is - * suggested to be a valid URI. The domain can be used to conveniently - * specify a web site location of the relevant specification or - * document pertaining to this warning. - * @param key The warning key. This key can be any string and is implementation - * dependent. - * @param e Exception. - */ - public void warning(String domain, String key, XMLParseException e) { - errors.add(e); - } + /** + * Implementation of org.apache.xerces.xni.parser.XMLErrorHandler. This method + * is invoked during parsing fired by HtmlDomParserContext and is a NekoHtml requirement. + * + * @param domain The domain of the warning. The domain can be any string but is + * suggested to be a valid URI. The domain can be used to conveniently + * specify a web site location of the relevant specification or + * document pertaining to this warning. + * @param key The warning key. This key can be any string and is implementation + * dependent. + * @param e Exception. + */ + public void + warning(String domain, String key, XMLParseException e) + { + addError(e); + } } diff --git a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java index 10315faebf..78118d1de7 100644 --- a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java @@ -1,78 +1,62 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** * Strict error handler. Even though strict is specified, Nokogiri allows to go further * when NOERROR or/both NOWARNING is/are true. - * + * * @author sergio * @author Yoko Harada */ -public class NokogiriStrictErrorHandler extends NokogiriErrorHandler { - public NokogiriStrictErrorHandler(boolean noerror, boolean nowarning) { - super(noerror, nowarning); - } - - public void warning(SAXParseException spex) throws SAXException { - if (!nowarning) throw spex; - else errors.add(spex); - } - - public void error(SAXParseException spex) throws SAXException { - if (!noerror) throw spex; - else errors.add(spex); - } - - public void fatalError(SAXParseException spex) throws SAXException { - throw spex; - } - - public void error(String domain, String key, XMLParseException e) throws XMLParseException { - if (!noerror) throw e; - else errors.add(e); - } - - public void fatalError(String domain, String key, XMLParseException e) throws XMLParseException { - throw e; - } - - public void warning(String domain, String key, XMLParseException e) throws XMLParseException { - if (!nowarning) throw e; - if (!usesNekoHtml(domain)) errors.add(e); - } +public class NokogiriStrictErrorHandler extends NokogiriErrorHandler +{ + public + NokogiriStrictErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) + { + super(runtime, noerror, nowarning); + } + + public void + warning(SAXParseException spex) throws SAXException + { + if (!nowarning) { throw spex; } + else { addError(spex); } + } + + public void + error(SAXParseException spex) throws SAXException + { + if (!noerror) { throw spex; } + else { addError(spex); } + } + + public void + fatalError(SAXParseException spex) throws SAXException + { + throw spex; + } + + public void + error(String domain, String key, XMLParseException e) throws XMLParseException + { + if (!noerror) { throw e; } + else { addError(e); } + } + + public void + fatalError(String domain, String key, XMLParseException e) throws XMLParseException + { + throw e; + } + + public void + warning(String domain, String key, XMLParseException e) throws XMLParseException + { + if (!nowarning) { throw e; } + else { addError(e); } + } } diff --git a/ext/java/nokogiri/internals/NokogiriXPathFunction.java b/ext/java/nokogiri/internals/NokogiriXPathFunction.java index 30b53780e4..4eab6bd854 100644 --- a/ext/java/nokogiri/internals/NokogiriXPathFunction.java +++ b/ext/java/nokogiri/internals/NokogiriXPathFunction.java @@ -1,41 +1,10 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import java.util.List; import javax.xml.xpath.XPathFunction; import javax.xml.xpath.XPathFunctionException; +import javax.xml.namespace.QName; import org.jruby.Ruby; import org.jruby.RubyArray; @@ -53,71 +22,144 @@ import nokogiri.XmlNode; import nokogiri.XmlNodeSet; +import static nokogiri.internals.NokogiriHelpers.nodeListToRubyArray; + /** * Xpath function handler. - * + * * @author sergio * @author Yoko Harada */ -public class NokogiriXPathFunction implements XPathFunction { - - private final IRubyObject handler; - private final String name; - private final int arity; - - public static NokogiriXPathFunction create(IRubyObject handler, String name, int arity) { - return new NokogiriXPathFunction(handler, name, arity); +public class NokogiriXPathFunction implements XPathFunction +{ + + private final IRubyObject handler; + private final QName name; + private final int arity; + + public static NokogiriXPathFunction + create(IRubyObject handler, QName name, int arity) + { + return new NokogiriXPathFunction(handler, name, arity); + } + + private + NokogiriXPathFunction(IRubyObject handler, QName name, int arity) + { + this.handler = handler; + this.name = name; + this.arity = arity; + } + + public Object + evaluate(List args) throws XPathFunctionException + { + if (args.size() != this.arity) { + throw new XPathFunctionException("arity does not match"); } - private NokogiriXPathFunction(IRubyObject handler, String name, int arity) { - this.handler = handler; - this.name = name; - this.arity = arity; + if (name.getNamespaceURI().equals(NokogiriNamespaceContext.NOKOGIRI_BUILTIN_URI)) { + if (name.getLocalPart().equals("css-class")) { + return builtinCssClass(args); + } } - public Object evaluate(List args) throws XPathFunctionException { - if (args.size() != this.arity) { - throw new XPathFunctionException("arity does not match"); - } - - final Ruby runtime = this.handler.getRuntime(); - ThreadContext context = runtime.getCurrentContext(); + if (this.handler.isNil()) { + throw new XPathFunctionException("no custom function handler declared for '" + name + "'"); + } - IRubyObject result = Helpers.invoke(context, this.handler, this.name, fromObjectToRubyArgs(runtime, args)); + final Ruby runtime = this.handler.getRuntime(); + ThreadContext context = runtime.getCurrentContext(); + IRubyObject result = Helpers.invoke(context, this.handler, this.name.getLocalPart(), + fromObjectToRubyArgs(runtime, args)); + return fromRubyToObject(runtime, result); + } - return fromRubyToObject(runtime, result); + private static IRubyObject[] + fromObjectToRubyArgs(final Ruby runtime, List args) + { + IRubyObject[] newArgs = new IRubyObject[args.size()]; + for (int i = 0; i < args.size(); i++) { + newArgs[i] = fromObjectToRuby(runtime, args.get(i)); } + return newArgs; + } - private static IRubyObject[] fromObjectToRubyArgs(final Ruby runtime, List args) { - IRubyObject[] newArgs = new IRubyObject[args.size()]; - for(int i = 0; i < args.size(); i++) { - newArgs[i] = fromObjectToRuby(runtime, args.get(i)); - } - return newArgs; + private static IRubyObject + fromObjectToRuby(final Ruby runtime, Object obj) + { + // argument object type is one of NodeList, String, Boolean, or Double. + if (obj instanceof NodeList) { + IRubyObject[] nodes = nodeListToRubyArray(runtime, (NodeList) obj); + return XmlNodeSet.newNodeSet(runtime, nodes); } + return JavaUtil.convertJavaToUsableRubyObject(runtime, obj); + } - private static IRubyObject fromObjectToRuby(final Ruby runtime, Object obj) { - // argument object type is one of NodeList, String, Boolean, or Double. - if (obj instanceof NodeList) { - XmlNodeSet xmlNodeSet = XmlNodeSet.newEmptyNodeSet(runtime.getCurrentContext()); - xmlNodeSet.setNodeList((NodeList) obj); - return xmlNodeSet; - } - return JavaUtil.convertJavaToUsableRubyObject(runtime, obj); + private static Object + fromRubyToObject(final Ruby runtime, IRubyObject obj) + { + if (obj instanceof RubyString) { return obj.asJavaString(); } + if (obj instanceof RubyBoolean) { return obj.toJava(Boolean.class); } + if (obj instanceof RubyFloat) { return obj.toJava(Double.class); } + if (obj instanceof RubyInteger) { + if (obj instanceof RubyFixnum) { return RubyFixnum.fix2long(obj); } + return obj.toJava(java.math.BigInteger.class); + } + if (obj instanceof XmlNodeSet) { return obj; } + if (obj instanceof RubyArray) { + return XmlNodeSet.newNodeSet(runtime, ((RubyArray) obj).toJavaArray()); } + /*if (o instanceof XmlNode)*/ return ((XmlNode) obj).getNode(); + } + + private static boolean + builtinCssClass(List args) throws XPathFunctionException + { + if (args.size() != 2) { + throw new XPathFunctionException("builtin function nokogiri:css-class takes two arguments"); + } + + String hay = args.get(0).toString(); + String needle = args.get(1).toString(); - private static Object fromRubyToObject(final Ruby runtime, IRubyObject obj) { - if (obj instanceof RubyString) return obj.asJavaString(); - if (obj instanceof RubyBoolean) return obj.toJava(Boolean.class); - if (obj instanceof RubyFloat) return obj.toJava(Double.class); - if (obj instanceof RubyInteger) { - if ( obj instanceof RubyFixnum ) return RubyFixnum.fix2long(obj); - return obj.toJava(java.math.BigInteger.class); + if (needle.length() == 0) { + return true; + } + + int j = 0; + int j_lim = hay.length() - needle.length(); + while (j <= j_lim) { + int k; + for (k = 0; k < needle.length(); k++) { + if (needle.charAt(k) != hay.charAt(j + k)) { + break; } - if (obj instanceof XmlNodeSet) return obj; - if (obj instanceof RubyArray) { - return XmlNodeSet.newXmlNodeSet(runtime.getCurrentContext(), ((RubyArray) obj).toJavaArray()); + } + if (k == needle.length()) { + if ((hay.length() == (j + k)) || isWhitespace(hay.charAt(j + k))) { + return true ; } - /*if (o instanceof XmlNode)*/ return ((XmlNode) obj).getNode(); + } + + /* advance str to whitespace */ + while (j <= j_lim && !isWhitespace(hay.charAt(j))) { + j++; + } + + /* advance str to start of next word or end of string */ + while (j <= j_lim && isWhitespace(hay.charAt(j))) { + j++; + } } + + return false; + } + + private static boolean + isWhitespace(char subject) + { + // see libxml2's xmlIsBlank_ch() + return ((subject == 0x09) || (subject == 0x0A) || (subject == 0x0D) || (subject == 0x20)); + } } diff --git a/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java b/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java index be6153d4b1..afcadcc834 100644 --- a/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +++ b/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import javax.xml.namespace.QName; @@ -39,32 +7,44 @@ import org.jruby.runtime.builtin.IRubyObject; /** - * Xpath function resolver class, which is used in XmlXpathContext. - * + * Xpath function resolver class, which is used in XmlXpathContext. + * * @author sergio * @author Yoko Harada */ -public final class NokogiriXPathFunctionResolver implements XPathFunctionResolver { - - private IRubyObject handler; - - public static NokogiriXPathFunctionResolver create(IRubyObject handler) { - NokogiriXPathFunctionResolver freshResolver = new NokogiriXPathFunctionResolver(); - freshResolver.setHandler(handler); - return freshResolver; - } - - private NokogiriXPathFunctionResolver() {} +public final class NokogiriXPathFunctionResolver implements XPathFunctionResolver +{ - public final IRubyObject getHandler() { - return handler; - } - - public void setHandler(IRubyObject handler) { - this.handler = handler; - } + private IRubyObject handler; - public XPathFunction resolveFunction(QName name, int arity) { - return NokogiriXPathFunction.create(handler, name.getLocalPart(), arity); + public static NokogiriXPathFunctionResolver + create(IRubyObject handler) + { + NokogiriXPathFunctionResolver freshResolver = new NokogiriXPathFunctionResolver(); + if (!handler.isNil()) { + freshResolver.setHandler(handler); } + return freshResolver; + } + + private + NokogiriXPathFunctionResolver() {} + + public final IRubyObject + getHandler() + { + return handler; + } + + public void + setHandler(IRubyObject handler) + { + this.handler = handler; + } + + public XPathFunction + resolveFunction(QName name, int arity) + { + return NokogiriXPathFunction.create(handler, name, arity); + } } diff --git a/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java b/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java index ca7bbe0575..0c8ca6d29b 100644 --- a/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +++ b/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java @@ -1,34 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ package nokogiri.internals; import java.util.HashMap; @@ -37,24 +6,32 @@ /** * XPath variable support - * + * * @author Ken Bloom * @author Yoko Harada */ -public class NokogiriXPathVariableResolver implements XPathVariableResolver { +public class NokogiriXPathVariableResolver implements XPathVariableResolver +{ + + private final HashMap variables = new HashMap(); + + public static NokogiriXPathVariableResolver + create() + { + return new NokogiriXPathVariableResolver(); + } - private final HashMap variables = new HashMap(); + private + NokogiriXPathVariableResolver() {} - public static NokogiriXPathVariableResolver create() { - return new NokogiriXPathVariableResolver(); - } - - private NokogiriXPathVariableResolver() {} - - public Object resolveVariable(QName variableName){ - return variables.get(variableName); - } - public void registerVariable(String name,String value){ - variables.put(new QName(name),value); - } + public Object + resolveVariable(QName variableName) + { + return variables.get(variableName); + } + public void + registerVariable(String name, String value) + { + variables.put(new QName(name), value); + } } diff --git a/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java b/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java index 020f7c2cad..614f742827 100644 --- a/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +++ b/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import javax.xml.transform.ErrorListener; @@ -37,51 +5,66 @@ /** * Error Listener for XSLT transformer - * + * * @author Yoko Harada */ -public class NokogiriXsltErrorListener implements ErrorListener { - public enum ErrorType { - SUCCESS, - WARNING, - ERROR, - FATAL - } +public class NokogiriXsltErrorListener implements ErrorListener +{ + public enum ErrorType { + SUCCESS, + WARNING, + ERROR, + FATAL + } + + private ErrorType type = ErrorType.SUCCESS; + private String errorMessage = null; + private Exception exception = null; + + public void + warning(TransformerException ex) + { + type = ErrorType.WARNING; + setError(ex); + } + + public void + error(TransformerException ex) + { + type = ErrorType.ERROR; + setError(ex); + } + + public void + fatalError(TransformerException ex) + { + type = ErrorType.FATAL; + setError(ex); + } - private ErrorType type = ErrorType.SUCCESS; - private String errorMessage = null; - private Exception exception = null; + private void + setError(TransformerException ex) + { + errorMessage = ex.getMessage(); + exception = ex; + } - public void warning(TransformerException ex) { - type = ErrorType.WARNING; - setError(ex); - } + public String + getErrorMessage() + { + return errorMessage; + } - public void error(TransformerException ex) { - type = ErrorType.ERROR; - setError(ex); - } + public ErrorType + getErrorType() + { + return type; + } - public void fatalError(TransformerException ex) { - type = ErrorType.FATAL; - setError(ex); - } - - private void setError(TransformerException ex) { - errorMessage = ex.getMessage(); - exception = ex; - } - - public String getErrorMessage() { - return errorMessage; - } - - public ErrorType getErrorType() { - return type; - } - - public Exception getException() { - return exception; - } + public Exception + getException() + { + return exception; + } } diff --git a/ext/java/nokogiri/internals/ParserContext.java b/ext/java/nokogiri/internals/ParserContext.java index eb97c65fb0..27fa835dc8 100644 --- a/ext/java/nokogiri/internals/ParserContext.java +++ b/ext/java/nokogiri/internals/ParserContext.java @@ -1,59 +1,22 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import static nokogiri.internals.NokogiriHelpers.rubyStringToString; -import static org.jruby.runtime.Helpers.invoke; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.net.URI; -import java.nio.charset.Charset; -import java.nio.charset.UnsupportedCharsetException; import java.util.concurrent.Callable; import org.jruby.Ruby; import org.jruby.RubyClass; -import org.jruby.RubyIO; import org.jruby.RubyObject; import org.jruby.RubyString; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; -import org.jruby.util.TypeConverter; +import org.jruby.util.IOInputStream; import org.xml.sax.InputSource; /** @@ -63,243 +26,237 @@ * @author Patrick Mahoney * @author Yoko Harada */ -public abstract class ParserContext extends RubyObject { - protected InputSource source = null; - protected IRubyObject detected_encoding = null; - protected int stringDataSize = -1; - - public ParserContext(Ruby runtime) { - // default to class 'Object' because this class isn't exposed to Ruby - super(runtime, runtime.getObject()); - } - - public ParserContext(Ruby runtime, RubyClass klass) { - super(runtime, klass); +public abstract class ParserContext extends RubyObject +{ + private static final long serialVersionUID = 1L; + + protected InputSource source = null; + protected IRubyObject detected_encoding = null; + protected int stringDataSize = -1; + protected String java_encoding; + + public + ParserContext(Ruby runtime) + { + // default to class 'Object' because this class isn't exposed to Ruby + super(runtime, runtime.getObject()); + } + + public + ParserContext(Ruby runtime, RubyClass klass) + { + super(runtime, klass); + } + + protected InputSource + getInputSource() + { + return source; + } + + public void + setIOInputSource(ThreadContext context, IRubyObject data, IRubyObject url) + { + source = new InputSource(); + ParserContext.setUrl(context, source, url); + + Ruby ruby = context.getRuntime(); + + if (!(data.respondsTo("read"))) { + throw ruby.newTypeError("must respond to :read"); } - protected InputSource getInputSource() { - return source; + source.setByteStream(new IOInputStream(data)); + if (java_encoding != null) { + source.setEncoding(java_encoding); } + } - /** - * Set the InputSource from url or data, - * which may be an IO object, a String, or a StringIO. - */ - public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) { - source = new InputSource(); + public void + setStringInputSource(ThreadContext context, IRubyObject data, IRubyObject url) + { + source = new InputSource(); + ParserContext.setUrl(context, source, url); - Ruby ruby = context.getRuntime(); + Ruby ruby = context.getRuntime(); - ParserContext.setUrl(context, source, url); - - // if setEncoding returned true, then the stream is set - // to the EncodingReaderInputStream - if (setEncoding(context, data)) - return; - - RubyString stringData = null; - if (invoke(context, data, "respond_to?", ruby.newSymbol("to_io")).isTrue()) { - RubyIO io = - (RubyIO) TypeConverter.convertToType(data, - ruby.getIO(), - "to_io"); - // use unclosedable input stream to fix #495 - source.setByteStream(new UncloseableInputStream(io.getInStream())); - - } else if (invoke(context, data, "respond_to?", ruby.newSymbol("read")).isTrue()) { - stringData = invoke(context, data, "read").convertToString(); - - } else if (invoke(context, data, "respond_to?", ruby.newSymbol("string")).isTrue()) { - stringData = invoke(context, data, "string").convertToString(); - - } else if (data instanceof RubyString) { - stringData = (RubyString) data; + if (!(data instanceof RubyString)) { + throw ruby.newTypeError("must be kind_of String"); + } - } else { - throw ruby.newArgumentError("must be kind_of String or respond to :to_io, :read, or :string"); - } + RubyString stringData = (RubyString) data; - if (stringData != null) { - String encName = null; - if (stringData.encoding(context) != null) { - encName = stringData.encoding(context).toString(); - } - Charset charset = null; - if (encName != null) { - try { - charset = Charset.forName(encName); - } catch (UnsupportedCharsetException e) { - // do nothing; - } - } - ByteList bytes = stringData.getByteList(); - if (charset != null) { - StringReader reader = new StringReader(new String(bytes.unsafeBytes(), bytes.begin(), bytes.length(), charset)); - source.setCharacterStream(reader); - source.setEncoding(charset.name()); - } else { - stringDataSize = bytes.length() - bytes.begin(); - ByteArrayInputStream stream = new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()); - source.setByteStream(stream); - } - } + if (stringData.encoding(context) != null) { + RubyString stringEncoding = stringData.encoding(context).asString(); + String encName = NokogiriHelpers.getValidEncodingOrNull(stringEncoding); + if (encName != null) { + java_encoding = encName; + } } - public static void setUrl(ThreadContext context, InputSource source, IRubyObject url) { - String path = rubyStringToString(url); - // Dir.chdir might be called at some point before this. - if (path != null) { + ByteList bytes = stringData.getByteList(); + + stringDataSize = bytes.length() - bytes.begin(); + ByteArrayInputStream stream = new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()); + source.setByteStream(stream); + source.setEncoding(java_encoding); + } + + public static void + setUrl(ThreadContext context, InputSource source, IRubyObject url) + { + String path = rubyStringToString(url); + // Dir.chdir might be called at some point before this. + if (path != null) { + try { + URI uri = URI.create(path); + source.setSystemId(uri.toURL().toString()); + } catch (Exception ex) { + // fallback to the old behavior + File file = new File(path); + if (file.isAbsolute()) { + source.setSystemId(path); + } else { + String pwd = context.getRuntime().getCurrentDirectory(); + String absolutePath; try { - URI uri = URI.create(path); - source.setSystemId(uri.toURL().toString()); - } catch (Exception ex) { - // fallback to the old behavior - File file = new File(path); - if (file.isAbsolute()) { - source.setSystemId(path); - } else { - String pwd = context.getRuntime().getCurrentDirectory(); - String absolutePath; - try { - absolutePath = new File(pwd, path).getCanonicalPath(); - } catch (IOException e) { - absolutePath = new File(pwd, path).getAbsolutePath(); - } - source.setSystemId(absolutePath); - } + absolutePath = new File(pwd, path).getCanonicalPath(); + } catch (IOException e) { + absolutePath = new File(pwd, path).getAbsolutePath(); } + source.setSystemId(absolutePath); } + } } - - private boolean setEncoding(ThreadContext context, IRubyObject data) { - if (data.getType().respondsTo("detect_encoding")) { - // in case of EncodingReader is used - // since EncodingReader won't respond to :to_io - NokogiriEncodingReaderWrapper reader = new NokogiriEncodingReaderWrapper(context, (RubyObject) data); - source.setByteStream(reader); - // data is EnocodingReader - if(reader.detectEncoding()) { - detected_encoding = reader.getEncoding(); - source.setEncoding(detected_encoding.asJavaString()); - } - return true; - } - return false; - } - - protected void setEncoding(String encoding) { - source.setEncoding(encoding); - } - - /** - * Set the InputSource to read from file, a String filename. - */ - public void setInputSourceFile(ThreadContext context, IRubyObject file) { - source = new InputSource(); - ParserContext.setUrl(context, source, file); + } + + protected void + setEncoding(String encoding) + { + source.setEncoding(encoding); + } + + /** + * Set the InputSource to read from file, a String filename. + */ + public void + setInputSourceFile(ThreadContext context, IRubyObject file) + { + source = new InputSource(); + ParserContext.setUrl(context, source, file); + } + + /** + * Set the InputSource from stream. + */ + public void + setInputSource(InputStream stream) + { + source = new InputSource(stream); + } + + /** + * Wrap Nokogiri parser options in a utility class. This is + * read-only. + */ + public static class Options + { + protected static final long STRICT = 0; + protected static final long RECOVER = 1; + protected static final long NOENT = 2; + protected static final long DTDLOAD = 4; + protected static final long DTDATTR = 8; + protected static final long DTDVALID = 16; + protected static final long NOERROR = 32; + protected static final long NOWARNING = 64; + protected static final long PEDANTIC = 128; + protected static final long NOBLANKS = 256; + protected static final long SAX1 = 512; + protected static final long XINCLUDE = 1024; + protected static final long NONET = 2048; + protected static final long NODICT = 4096; + protected static final long NSCLEAN = 8192; + protected static final long NOCDATA = 16384; + protected static final long NOXINCNODE = 32768; + + public final boolean strict; + public final boolean recover; + public final boolean noEnt; + public final boolean dtdLoad; + public final boolean dtdAttr; + public final boolean dtdValid; + public final boolean noError; + public final boolean noWarning; + public final boolean pedantic; + public final boolean noBlanks; + public final boolean sax1; + public final boolean xInclude; + public final boolean noNet; + public final boolean noDict; + public final boolean nsClean; + public final boolean noCdata; + public final boolean noXIncNode; + + protected static boolean + test(long options, long mask) + { + return ((options & mask) == mask); } - /** - * Set the InputSource from stream. - */ - public void setInputSource(InputStream stream) { - source = new InputSource(stream); + public + Options(long options) + { + strict = ((options & RECOVER) == STRICT); + recover = test(options, RECOVER); + noEnt = test(options, NOENT); + dtdLoad = test(options, DTDLOAD); + dtdAttr = test(options, DTDATTR); + dtdValid = test(options, DTDVALID); + noError = test(options, NOERROR); + noWarning = test(options, NOWARNING); + pedantic = test(options, PEDANTIC); + noBlanks = test(options, NOBLANKS); + sax1 = test(options, SAX1); + xInclude = test(options, XINCLUDE); + noNet = test(options, NONET); + noDict = test(options, NODICT); + nsClean = test(options, NSCLEAN); + noCdata = test(options, NOCDATA); + noXIncNode = test(options, NOXINCNODE); } - - /** - * Wrap Nokogiri parser options in a utility class. This is - * read-only. - */ - public static class Options { - protected static final long STRICT = 0; - protected static final long RECOVER = 1; - protected static final long NOENT = 2; - protected static final long DTDLOAD = 4; - protected static final long DTDATTR = 8; - protected static final long DTDVALID = 16; - protected static final long NOERROR = 32; - protected static final long NOWARNING = 64; - protected static final long PEDANTIC = 128; - protected static final long NOBLANKS = 256; - protected static final long SAX1 = 512; - protected static final long XINCLUDE = 1024; - protected static final long NONET = 2048; - protected static final long NODICT = 4096; - protected static final long NSCLEAN = 8192; - protected static final long NOCDATA = 16384; - protected static final long NOXINCNODE = 32768; - - public final boolean strict; - public final boolean recover; - public final boolean noEnt; - public final boolean dtdLoad; - public final boolean dtdAttr; - public final boolean dtdValid; - public final boolean noError; - public final boolean noWarning; - public final boolean pedantic; - public final boolean noBlanks; - public final boolean sax1; - public final boolean xInclude; - public final boolean noNet; - public final boolean noDict; - public final boolean nsClean; - public final boolean noCdata; - public final boolean noXIncNode; - - protected static boolean test(long options, long mask) { - return ((options & mask) == mask); - } - - public Options(long options) { - strict = ((options & RECOVER) == STRICT); - recover = test(options, RECOVER); - noEnt = test(options, NOENT); - dtdLoad = test(options, DTDLOAD); - dtdAttr = test(options, DTDATTR); - dtdValid = test(options, DTDVALID); - noError = test(options, NOERROR); - noWarning = test(options, NOWARNING); - pedantic = test(options, PEDANTIC); - noBlanks = test(options, NOBLANKS); - sax1 = test(options, SAX1); - xInclude = test(options, XINCLUDE); - noNet = test(options, NONET); - noDict = test(options, NODICT); - nsClean = test(options, NSCLEAN); - noCdata = test(options, NOCDATA); - noXIncNode = test(options, NOXINCNODE); - } + } + + /* + public static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver { + InputSource source; + public NokogiriXInlcudeEntityResolver(InputSource source) { + this.source = source; + } + + @Override + public InputSource resolveEntity(String publicId, String systemId) + throws SAXException, IOException { + if (systemId != null) source.setSystemId(systemId); + if (publicId != null) source.setPublicId(publicId); + return source; + } + } */ + + public static abstract class ParserTask implements Callable + { + + protected final ThreadContext context; // TODO does not seem like a good idea!? + protected final IRubyObject handler; + protected final T parser; + + protected + ParserTask(ThreadContext context, IRubyObject handler, T parser) + { + this.context = context; + this.handler = handler; + this.parser = parser; } - /* - public static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver { - InputSource source; - public NokogiriXInlcudeEntityResolver(InputSource source) { - this.source = source; - } - - @Override - public InputSource resolveEntity(String publicId, String systemId) - throws SAXException, IOException { - if (systemId != null) source.setSystemId(systemId); - if (publicId != null) source.setPublicId(publicId); - return source; - } - } */ - - public static abstract class ParserTask implements Callable { - - protected final ThreadContext context; // TODO does not seem like a good idea!? - protected final IRubyObject handler; - protected final T parser; - - protected ParserTask(ThreadContext context, IRubyObject handler, T parser) { - this.context = context; - this.handler = handler; - this.parser = parser; - } - - } + } } diff --git a/ext/java/nokogiri/internals/ReaderNode.java b/ext/java/nokogiri/internals/ReaderNode.java index 8fb6b403db..aef01a9681 100644 --- a/ext/java/nokogiri/internals/ReaderNode.java +++ b/ext/java/nokogiri/internals/ReaderNode.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import java.util.ArrayList; @@ -62,429 +30,535 @@ * @author Yoko Harada * */ -public abstract class ReaderNode { - - final Ruby ruby; - public ReaderAttributeList attributeList; - public Map namespaces; - public int depth, nodeType; - public String lang, localName, xmlBase, prefix, name, uri, value, xmlVersion = "1.0"; - public int startOffset, endOffset; - public boolean hasChildren = false; - private Document document = null; - - protected ReaderNode(final Ruby runtime) { - this.ruby = runtime; +public abstract class ReaderNode +{ + + final Ruby ruby; + public ReaderAttributeList attributeList; + public Map namespaces; + public int depth, nodeType; + public String lang, localName, xmlBase, prefix, name, uri, value, xmlVersion = "1.0"; + public int startOffset, endOffset; + public boolean hasChildren = false; + private Document document = null; + + protected + ReaderNode(final Ruby runtime) + { + this.ruby = runtime; + } + + public abstract String getString(); + + public IRubyObject + getAttributeByIndex(IRubyObject index) + { + if (index.isNil()) { return index; } + + long i = index.convertToInteger().getLongValue(); + if (i > Integer.MAX_VALUE) { + throw ruby.newArgumentError("value too long to be an array index"); } - public abstract String getString(); - - public IRubyObject getAttributeByIndex(IRubyObject index){ - if(index.isNil()) return index; - - long i = index.convertToInteger().getLongValue(); - if(i > Integer.MAX_VALUE) { - throw ruby.newArgumentError("value too long to be an array index"); + if (attributeList == null) { return ruby.getNil(); } + if (i < 0 || attributeList.length <= i) { return ruby.getNil(); } + return stringOrBlank(ruby, attributeList.values.get(((Long)i).intValue())); + } + + public IRubyObject + getAttributeByName(IRubyObject name) + { + if (attributeList == null) { return ruby.getNil(); } + String value = attributeList.getByName(rubyStringToString(name)); + return stringOrNil(ruby, value); + } + + public IRubyObject + getAttributeByName(String name) + { + if (attributeList == null) { return ruby.getNil(); } + String value = attributeList.getByName(name); + return stringOrNil(ruby, value); + } + + public IRubyObject + getAttributeCount() + { + if (attributeList == null) { return ruby.newFixnum(0); } + return ruby.newFixnum(attributeList.length); + } + + public IRubyObject + getAttributesNodes() + { + RubyArray array = RubyArray.newArray(ruby); + if (attributeList != null && attributeList.length > 0) { + if (document == null) { + document = XmlDocument.createNewDocument(ruby); + } + for (int i = 0; i < attributeList.length; i++) { + if (!isNamespace(attributeList.names.get(i))) { + Attr attr = document.createAttributeNS(attributeList.namespaces.get(i), attributeList.names.get(i)); + attr.setValue(attributeList.values.get(i)); + XmlAttr xmlAttr = new XmlAttr(ruby, attr); + array.append(xmlAttr); } - - if (attributeList == null) return ruby.getNil(); - if (i<0 || attributeList.length <= i) return ruby.getNil(); - return stringOrBlank(ruby, attributeList.values.get(((Long)i).intValue())); + } } - - public IRubyObject getAttributeByName(IRubyObject name){ - if(attributeList == null) return ruby.getNil(); - String value = attributeList.getByName(rubyStringToString(name)); - return stringOrNil(ruby, value); + return array; + } + + public IRubyObject + getAttributes(ThreadContext context) + { + final Ruby runtime = context.runtime; + RubyHash hash = RubyHash.newHash(runtime); + if (attributeList == null) { return hash; } + for (int i = 0; i < attributeList.length; i++) { + if (isNamespace(attributeList.names.get(i))) { continue; } + IRubyObject k = stringOrBlank(runtime, attributeList.names.get(i)); + IRubyObject v = stringOrBlank(runtime, attributeList.values.get(i)); + hash.fastASetCheckString(runtime, k, v); // hash.op_aset(context, k, v) } - - public IRubyObject getAttributeByName(String name){ - if(attributeList == null) return ruby.getNil(); - String value = attributeList.getByName(name); - return stringOrNil(ruby, value); + return hash; + } + + public IRubyObject + getDepth() + { + return ruby.newFixnum(depth); + } + + public IRubyObject + getLang() + { + return stringOrNil(ruby, lang); + } + + public IRubyObject + getLocalName() + { + return stringOrNil(ruby, localName); + } + + public IRubyObject + getName() + { + return stringOrNil(ruby, name); + } + + public IRubyObject + getNamespaces(ThreadContext context) + { + final Ruby runtime = context.runtime; + RubyHash hash = RubyHash.newHash(runtime); + if (namespaces == null) { return hash; } + for (Map.Entry entry : namespaces.entrySet()) { + IRubyObject k = stringOrBlank(runtime, entry.getKey()); + IRubyObject v = stringOrBlank(runtime, entry.getValue()); + hash.fastASetCheckString(runtime, k, v); // hash.op_aset(context, k, v) } - - public IRubyObject getAttributeCount(){ - if(attributeList == null) return ruby.newFixnum(0); - return ruby.newFixnum(attributeList.length); + return hash; + } + + public IRubyObject + getXmlBase() + { + return stringOrNil(ruby, xmlBase); + } + + public IRubyObject + getPrefix() + { + return stringOrNil(ruby, prefix); + } + + public IRubyObject + getUri() + { + return stringOrNil(ruby, uri); + } + + public IRubyObject + getValue() + { + return stringOrNil(ruby, value); + } + + public IRubyObject + getXmlVersion() + { + return ruby.newString(xmlVersion); + } + + public RubyBoolean + hasAttributes() + { + if (attributeList == null || attributeList.length == 0) { return ruby.getFalse(); } + return ruby.getTrue(); + } + + public abstract RubyBoolean hasValue(); + + public RubyBoolean + isDefault() + { + // TODO Implement. + return ruby.getFalse(); + } + + public boolean + isError() { return false; } + + protected void + parsePrefix(String qName) + { + int index = qName.indexOf(':'); + if (index != -1) { prefix = qName.substring(0, index); } + } + + public void + setLang(String lang) + { + this.lang = lang; + } + + public IRubyObject + toSyntaxError() { return ruby.getNil(); } + + public IRubyObject + getNodeType() { return ruby.newFixnum(nodeType); } + + public static enum ReaderNodeType { + NODE(0), + ELEMENT(1), + ATTRIBUTE(2), + TEXT(3), + CDATA(4), + ENTITY_REFERENCE(5), + ENTITY(6), + PROCESSING_INSTRUCTION(7), + COMMENT(8), + DOCUMENT(9), + DOCUMENT_TYPE(10), + DOCUMENTFRAGMENT(11), + NOTATION(12), + WHITESPACE(13), + SIGNIFICANT_WHITESPACE(14), + END_ELEMENT(15), + END_ENTITY(16), + XML_DECLARATION(17); + + private final int value; + ReaderNodeType(int value) + { + this.value = value; } - public IRubyObject getAttributesNodes() { - RubyArray array = RubyArray.newArray(ruby); - if (attributeList != null && attributeList.length > 0) { - if (document == null) { - XmlDocument doc = (XmlDocument) XmlDocument.rbNew(ruby.getCurrentContext(), getNokogiriClass(ruby, "Nokogiri::XML::Document"), new IRubyObject[0]); - document = doc.getDocument(); - } - for (int i=0; i langStack, + Stack xmlBaseStack) + { + return new ClosingNode(ruby, uri, localName, qName, depth, langStack, xmlBaseStack); + } + + public static class ClosingNode extends ReaderNode + { + + // public ClosingNode() {} + + ClosingNode(Ruby runtime, String uri, String localName, String qName, int depth, Stack langStack, + Stack xmlBaseStack) + { + super(runtime); + nodeType = ReaderNodeType.END_ELEMENT.getValue(); + this.uri = "".equals(uri) ? null : uri; + this.localName = ! isBlank(localName) ? localName : qName; + this.name = qName; + parsePrefix(qName); + this.depth = depth; + if (!langStack.isEmpty()) { this.lang = langStack.peek(); } + if (!xmlBaseStack.isEmpty()) { this.xmlBase = xmlBaseStack.peek(); } } - public IRubyObject getDepth() { - return ruby.newFixnum(depth); + @Override + public IRubyObject + getAttributeCount() + { + return ruby.newFixnum(0); } - public IRubyObject getLang() { - return stringOrNil(ruby, lang); + @Override + public RubyBoolean + hasValue() + { + return ruby.getFalse(); } - public IRubyObject getLocalName() { - return stringOrNil(ruby, localName); + @Override + public String + getString() + { + return "'; + } + } + + public static ElementNode + createElementNode(Ruby ruby, String uri, String localName, String qName, XMLAttributes attrs, int depth, + Stack langStack, Stack xmlBaseStack) + { + return new ElementNode(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack); + } + + public static class ElementNode extends ReaderNode + { + + // public ElementNode() {} + + ElementNode(Ruby runtime, String uri, String localName, String qName, XMLAttributes attrs, int depth, + Stack langStack, Stack xmlBaseStack) + { + super(runtime); + this.nodeType = ReaderNodeType.ELEMENT.getValue(); + this.uri = "".equals(uri) ? null : uri; + this.localName = ! isBlank(localName) ? localName : qName; + this.name = qName; + parsePrefix(qName); + this.depth = depth; + parseAttributes(attrs, langStack, xmlBaseStack); } - public IRubyObject getName() { - return stringOrNil(ruby, name); + @Override + public RubyBoolean + hasValue() + { + return ruby.getFalse(); } - public IRubyObject getNamespaces(ThreadContext context) { - final Ruby runtime = context.runtime; - if (namespaces == null) return runtime.getNil(); - RubyHash hash = RubyHash.newHash(runtime); - for (Map.Entry entry : namespaces.entrySet()) { - IRubyObject k = stringOrBlank(runtime, entry.getKey()); - IRubyObject v = stringOrBlank(runtime, entry.getValue()); - hash.fastASetCheckString(runtime, k, v); // hash.op_aset(context, k, v) + private void + parseAttributes(XMLAttributes attrs, Stack langStack, Stack xmlBaseStack) + { + if (attrs.getLength() > 0) { attributeList = new ReaderAttributeList(); } + String u, n, v; + for (int i = 0; i < attrs.getLength(); i++) { + u = attrs.getURI(i); + n = attrs.getQName(i); + v = attrs.getValue(i); + if (isNamespace(n)) { + if (namespaces == null) { namespaces = new HashMap(); } + namespaces.put(n, v); + } else { + if (lang == null) { lang = resolveLang(n, v, langStack); } + if (xmlBase == null) { xmlBase = resolveXmlBase(n, v, xmlBaseStack); } } - return hash; + attributeList.add(u, n, v); + } } - public IRubyObject getXmlBase() { - return stringOrNil(ruby, xmlBase); + private String + resolveLang(String n, String v, Stack langStack) + { + if ("xml:lang".equals(n)) { + return v; + } else if (!langStack.isEmpty()) { + return langStack.peek(); + } else { + return null; + } } - public IRubyObject getPrefix() { - return stringOrNil(ruby, prefix); + private String + resolveXmlBase(String n, String v, Stack xmlBaseStack) + { + if (isXmlBase(n)) { + return getXmlBaseUri(n, v, xmlBaseStack); + } else if (!xmlBaseStack.isEmpty()) { + return xmlBaseStack.peek(); + } else { + return null; + } } - public IRubyObject getUri() { - return stringOrNil(ruby, uri); + private String + getXmlBaseUri(String n, String v, Stack xmlBaseStack) + { + if ("xml:base".equals(n)) { + if (v.startsWith("http://")) { + return v; + } else if (v.startsWith("/") && v.endsWith("/")) { + String sub = v.substring(1, v.length() - 2); + String base = xmlBaseStack.peek(); + if (base.endsWith("/")) { + base = base.substring(0, base.length() - 1); + } + int pos = base.lastIndexOf("/"); + return base.substring(0, pos).concat(sub); + } else { + String base = xmlBaseStack.peek(); + if (base.endsWith("/")) { return base.concat(v); } + else { return base.concat("/").concat(v); } + } + } else if ("xlink:href".equals(n)) { + if (v.startsWith("http://")) { + return v; + } else if (!xmlBaseStack.isEmpty()) { + String base = xmlBaseStack.peek(); + return base; + } + } + return null; } - public IRubyObject getValue() { - return stringOrNil(ruby, value); + @Override + public String + getString() + { + StringBuffer sb = new StringBuffer(24); + sb.append('<').append(name); + if (attributeList != null) { + for (int i = 0; i < attributeList.length; i++) { + String n = attributeList.names.get(i); + String v = attributeList.values.get(i); + sb.append(' ').append(n).append('=') + .append('"').append(v).append('"'); + } + } + if (hasChildren) { sb.append('>'); } + else { sb.append("/>"); } + return sb.toString(); } - - public IRubyObject getXmlVersion() { - return ruby.newString(xmlVersion); + } + + private static class ReaderAttributeList + { + final List namespaces = new ArrayList(); + final List names = new ArrayList(); + final List values = new ArrayList(); + int length = 0; + + void + add(String namespace, String name, String value) + { + namespaces.add(namespace != null ? namespace : ""); + names.add(name != null ? name : ""); + values.add(value != null ? value : ""); + length++; } - public RubyBoolean hasAttributes() { - if (attributeList == null || attributeList.length == 0) return ruby.getFalse(); - return ruby.getTrue(); + String + getByName(String name) + { + for (int i = 0; i < names.size(); i++) { + if (name.equals(names.get(i))) { + return values.get(i); + } + } + return null; } + } - public abstract RubyBoolean hasValue(); + public static class EmptyNode extends ReaderNode + { - public RubyBoolean isDefault(){ - // TODO Implement. - return ruby.getFalse(); + public + EmptyNode(Ruby runtime) + { + super(runtime); + this.nodeType = ReaderNodeType.NODE.getValue(); } - public boolean isError() { return false; } - - protected void parsePrefix(String qName) { - int index = qName.indexOf(':'); - if(index != -1) prefix = qName.substring(0, index); + @Override + public IRubyObject + getXmlVersion() + { + return this.ruby.getNil(); } - public void setLang(String lang) { - this.lang = lang; + @Override + public RubyBoolean + hasValue() + { + return ruby.getFalse(); } - public IRubyObject toSyntaxError() { return ruby.getNil(); } - - public IRubyObject getNodeType() { return ruby.newFixnum(nodeType); } - - public static enum ReaderNodeType { - NODE(0), - ELEMENT(1), - ATTRIBUTE(2), - TEXT(3), - CDATA(4), - ENTITY_REFERENCE(5), - ENTITY(6), - PROCESSING_INSTRUCTION(7), - COMMENT(8), - DOCUMENT(9), - DOCUMENT_TYPE(10), - DOCUMENTFRAGMENT(11), - NOTATION(12), - WHITESPACE(13), - SIGNIFICANT_WHITESPACE(14), - END_ELEMENT(15), - END_ENTITY(16), - XML_DECLARATION(17); - - private final int value; - ReaderNodeType(int value) { - this.value = value; - } - - public int getValue() { - return value; - } + @Override + public String + getString() + { + return null; } - - public static ClosingNode createClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack langStack, Stack xmlBaseStack) { - return new ClosingNode(ruby, uri, localName, qName, depth, langStack, xmlBaseStack); + } + + public static class ExceptionNode extends EmptyNode + { + private final XmlSyntaxError exception; + + public + ExceptionNode(Ruby runtime, Exception ex) + { + super(runtime); + exception = XmlSyntaxError.createXMLSyntaxError(runtime); // Nokogiri::XML::SyntaxError + exception.setException(ex); } - public static class ClosingNode extends ReaderNode { - - // public ClosingNode() {} - - ClosingNode(Ruby runtime, String uri, String localName, String qName, int depth, Stack langStack, Stack xmlBaseStack) { - super(runtime); - nodeType = ReaderNodeType.END_ELEMENT.getValue(); - this.uri = "".equals(uri) ? null : uri; - this.localName = ! isBlank(localName) ? localName : qName; - this.name = qName; - parsePrefix(qName); - this.depth = depth; - if (!langStack.isEmpty()) this.lang = langStack.peek(); - if (!xmlBaseStack.isEmpty()) this.xmlBase = xmlBaseStack.peek(); - } - - @Override - public IRubyObject getAttributeCount() { - return ruby.newFixnum(0); - } - - @Override - public RubyBoolean hasValue() { - return ruby.getFalse(); - } - - @Override - public String getString() { - return "'; - } + @Override + public boolean + isError() + { + return true; } - public static ElementNode createElementNode(Ruby ruby, String uri, String localName, String qName, XMLAttributes attrs, int depth, Stack langStack, Stack xmlBaseStack) { - return new ElementNode(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack); + @Override + public IRubyObject + toSyntaxError() + { + return this.exception; } - - public static class ElementNode extends ReaderNode { - - // public ElementNode() {} - - ElementNode(Ruby runtime, String uri, String localName, String qName, XMLAttributes attrs, int depth, Stack langStack, Stack xmlBaseStack) { - super(runtime); - this.nodeType = ReaderNodeType.ELEMENT.getValue(); - this.uri = "".equals(uri) ? null : uri; - this.localName = ! isBlank(localName) ? localName : qName; - this.name = qName; - parsePrefix(qName); - this.depth = depth; - parseAttributes(attrs, langStack, xmlBaseStack); - } - - @Override - public RubyBoolean hasValue() { - return ruby.getFalse(); - } - - private void parseAttributes(XMLAttributes attrs, Stack langStack, Stack xmlBaseStack) { - if (attrs.getLength() > 0) attributeList = new ReaderAttributeList(); - String u, n, v; - for (int i = 0; i < attrs.getLength(); i++) { - u = attrs.getURI(i); - n = attrs.getQName(i); - v = attrs.getValue(i); - if (isNamespace(n)) { - if (namespaces == null) namespaces = new HashMap(); - namespaces.put(n, v); - } else { - if (lang == null) lang = resolveLang(n, v, langStack); - if (xmlBase == null) xmlBase = resolveXmlBase(n, v, xmlBaseStack); - } - attributeList.add(u, n, v); - } - } - - private String resolveLang(String n, String v, Stack langStack) { - if ("xml:lang".equals(n)) { - return v; - } else if (!langStack.isEmpty()) { - return langStack.peek(); - } else { - return null; - } - } - - private String resolveXmlBase(String n, String v, Stack xmlBaseStack) { - if (isXmlBase(n)) { - return getXmlBaseUri(n, v, xmlBaseStack); - } else if (!xmlBaseStack.isEmpty()) { - return xmlBaseStack.peek(); - } else { - return null; - } - } - - private String getXmlBaseUri(String n, String v, Stack xmlBaseStack) { - if ("xml:base".equals(n)) { - if (v.startsWith("http://")) { - return v; - } else if (v.startsWith("/") && v.endsWith("/")) { - String sub = v.substring(1, v.length() - 2); - String base = xmlBaseStack.peek(); - if (base.endsWith("/")) { - base = base.substring(0, base.length() - 1); - } - int pos = base.lastIndexOf("/"); - return base.substring(0, pos).concat(sub); - } else { - String base = xmlBaseStack.peek(); - if (base.endsWith("/")) return base.concat(v); - else return base.concat("/").concat(v); - } - } else if ("xlink:href".equals(n)) { - if (v.startsWith("http://")) { - return v; - } else if (!xmlBaseStack.isEmpty()) { - String base = xmlBaseStack.peek(); - return base; - } - } - return null; - } - - @Override - public String getString() { - StringBuffer sb = new StringBuffer(24); - sb.append('<').append(name); - if (attributeList != null) { - for (int i=0; i'); - else sb.append("/>"); - return sb.toString(); - } + } + + public static TextNode + createTextNode(Ruby ruby, String content, int depth, Stack langStack, Stack xmlBaseStack) + { + return new TextNode(ruby, content, depth, langStack, xmlBaseStack); + } + + public static class TextNode extends ReaderNode + { + + // public TextNode() {} + + TextNode(Ruby runtime, String content, int depth, Stack langStack, Stack xmlBaseStack) + { + super(runtime); + this.value = content; + this.localName = "#text"; + this.name = "#text"; + this.depth = depth; + if (!isBlank(content)) { nodeType = ReaderNodeType.TEXT.getValue(); } + else { nodeType = ReaderNodeType.SIGNIFICANT_WHITESPACE.getValue(); } + if (!langStack.isEmpty()) { this.lang = langStack.peek(); } + if (!xmlBaseStack.isEmpty()) { this.xmlBase = xmlBaseStack.peek(); } } - private static class ReaderAttributeList { - final List namespaces = new ArrayList(); - final List names = new ArrayList(); - final List values = new ArrayList(); - int length = 0; - - void add(String namespace, String name, String value) { - namespaces.add(namespace != null ? namespace : ""); - names.add(name != null ? name : ""); - values.add(value != null ? value : ""); - length++; - } - - String getByName(String name) { - for (int i=0; i langStack, Stack xmlBaseStack) { - return new TextNode(ruby, content, depth, langStack, xmlBaseStack); - } - - public static class TextNode extends ReaderNode { - - // public TextNode() {} - - TextNode(Ruby runtime, String content, int depth, Stack langStack, Stack xmlBaseStack) { - super(runtime); - this.value = content; - this.localName = "#text"; - this.name = "#text"; - this.depth = depth; - if (!isBlank(content)) nodeType = ReaderNodeType.TEXT.getValue(); - else nodeType = ReaderNodeType.SIGNIFICANT_WHITESPACE.getValue(); - if (!langStack.isEmpty()) this.lang = langStack.peek(); - if (!xmlBaseStack.isEmpty()) this.xmlBase = xmlBaseStack.peek(); - } - - @Override - public RubyBoolean hasValue() { - return ruby.getTrue(); - } - - @Override - public String getString() { - return value; - } + @Override + public String + getString() + { + return value; } + } } diff --git a/ext/java/nokogiri/internals/SaveContextVisitor.java b/ext/java/nokogiri/internals/SaveContextVisitor.java index ba7be98ceb..40708dbbba 100644 --- a/ext/java/nokogiri/internals/SaveContextVisitor.java +++ b/ext/java/nokogiri/internals/SaveContextVisitor.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import static nokogiri.internals.NokogiriHelpers.canonicalizeWhitespace; @@ -44,7 +12,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.cyberneko.html.HTMLElements; +import net.sourceforge.htmlunit.cyberneko.HTMLElements; import org.w3c.dom.Attr; import org.w3c.dom.CDATASection; import org.w3c.dom.Comment; @@ -66,713 +34,832 @@ * @author Patrick Mahoney * @author Yoko Harada */ -public class SaveContextVisitor { - - private final StringBuilder buffer; - private final Stack indentation; - private String encoding; - private final CharSequence indentString; - private boolean format; - private final boolean noDecl; - private final boolean noEmpty; - private final boolean noXhtml; - private final boolean asXhtml; - private boolean asXml; - private final boolean asHtml; - private final boolean asBuilder; - private boolean htmlDoc; - private final boolean fragment; - private final boolean canonical, incl_ns, with_comments; - private boolean subsets; - private boolean exclusive; - private final List c14nNodeList; - private final Deque c14nNamespaceStack; - private final Deque c14nAttrStack; - //private List c14nExclusiveInclusivePrefixes = null; - - /* - * U can't touch this. - * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q - * - * Taken from libxml save options. - */ - - public static final int FORMAT = 1; - public static final int NO_DECL = 2; - public static final int NO_EMPTY = 4; - public static final int NO_XHTML = 8; - public static final int AS_XHTML = 16; - public static final int AS_XML = 32; - public static final int AS_HTML = 64; - public static final int AS_BUILDER = 128; - - public static final int CANONICAL = 1; - public static final int INCL_NS = 2; - public static final int WITH_COMMENTS = 4; - public static final int SUBSETS = 8; - public static final int EXCLUSIVE = 16; - - public SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment, int canonicalOpts) { - buffer = new StringBuilder(); - this.encoding = encoding; - indentation = new Stack(); indentation.push(""); - this.htmlDoc = htmlDoc; - this.fragment = fragment; - c14nNodeList = new ArrayList(); - c14nNamespaceStack = new ArrayDeque(); - c14nAttrStack = new ArrayDeque(); - format = (options & FORMAT) == FORMAT; - - noDecl = (options & NO_DECL) == NO_DECL; - noEmpty = (options & NO_EMPTY) == NO_EMPTY; - noXhtml = (options & NO_XHTML) == NO_XHTML; - asXhtml = (options & AS_XHTML) == AS_XHTML; - asXml = (options & AS_XML) == AS_XML; - asHtml = (options & AS_HTML) == AS_HTML; - asBuilder = (options & AS_BUILDER) == AS_BUILDER; - - canonical = (canonicalOpts & CANONICAL) == CANONICAL; - incl_ns = (canonicalOpts & INCL_NS) == INCL_NS; - with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS; - subsets = (canonicalOpts & SUBSETS) == SUBSETS; - - if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces - if ((!format && indent != null) && indent.length() > 0) format = true; - if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces - indentString = indent; - if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true; - } - - @Override - public String toString() { - return buffer.toString(); - } - - public StringBuilder getInternalBuffer() { return buffer; } - - public void setHtmlDoc(boolean htmlDoc) { - this.htmlDoc = htmlDoc; - } - - public void setEncoding(String encoding) { - this.encoding = encoding; - } - - public boolean enter(Node node) { - if (node instanceof Document) { - return enter((Document)node); - } - if (node instanceof Element) { - return enter((Element)node); - } - if (node instanceof Attr) { - return enter((Attr)node); - } - if (node instanceof Text) { - return enter((Text)node); - } - if (node instanceof CDATASection) { - return enter((CDATASection)node); - } - if (node instanceof Comment) { - return enter((Comment)node); - } - if (node instanceof DocumentType) { - return enter((DocumentType)node); - } - if (node instanceof Entity) { - return enter((Entity)node); - } - if (node instanceof EntityReference) { - return enter((EntityReference) node); - } - if (node instanceof Notation) { - return enter((Notation)node); - } - if (node instanceof ProcessingInstruction) { - return enter((ProcessingInstruction)node); - } - return false; - } - - public void leave(Node node) { - if (node instanceof Document) { - leave((Document)node); - return; - } - if (node instanceof Element) { - leave((Element)node); - return; - } - if (node instanceof Attr) { - leave((Attr)node); - return; - } - if (node instanceof Text) { - return; - } - if (node instanceof CDATASection) { - leave((CDATASection)node); - return; - } - if (node instanceof Comment) { - leave((Comment)node); - return; - } - if (node instanceof DocumentType) { - leave((DocumentType)node); - return; - } - if (node instanceof Entity) { - leave((Entity)node); - return; - } - if (node instanceof EntityReference) { - leave((EntityReference) node); - return; - } - if (node instanceof Notation) { - leave((Notation)node); - return; - } - if (node instanceof ProcessingInstruction) { - leave((ProcessingInstruction)node); - return; - } - } - - public boolean enter(String string) { - buffer.append(string); - return true; - } - - public void leave(String string) { - // no-op - } - - public boolean enter(Attr attr) { - String name = attr.getName(); - buffer.append(name); - if (!asHtml || !isHtmlBooleanAttr(name)) { - buffer.append('='); - buffer.append('"'); - String value = replaceCharsetIfNecessary(attr); - buffer.append(serializeAttrTextContent(value, htmlDoc)); - buffer.append('"'); - } - return true; - } - - private static final Pattern CHARSET = - Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE); - - private String replaceCharsetIfNecessary(Attr attr) { - String value = attr.getValue(); - if (encoding == null) return value; // unable to replace in any case - if (!"content".equals(attr.getName().toLowerCase())) return value; // must be content attr - if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value; - Matcher m = CHARSET.matcher(value); - if (!m.find()) return value; - if (value.contains(encoding)) return value; // no need to replace - return value.replace(m.group(), "charset=" + encoding); - } - - static final Set HTML_BOOLEAN_ATTRS; - static { - final String[] _HTML_BOOLEAN_ATTRS = { - "checked", "compact", "declare", "defer", "disabled", "ismap", - "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", - "selected" - }; - HTML_BOOLEAN_ATTRS = new HashSet(Arrays.asList(_HTML_BOOLEAN_ATTRS)); - } - - private static boolean isHtmlBooleanAttr(String name) { - return HTML_BOOLEAN_ATTRS.contains(name); - } - - private static CharSequence serializeAttrTextContent(String str, boolean htmlDoc) { - if (str == null || str.length() == 0) return ""; - - StringBuilder buffer = new StringBuilder(str.length() + 16); - - for (int i = 0; i < str.length(); i++) { - char c; switch (c = str.charAt(i)) { - case '\n': buffer.append(" "); break; - case '\r': buffer.append(" "); break; - case '\t': buffer.append(" "); break; - case '"': if (htmlDoc) buffer.append("%22"); - else buffer.append("""); - break; - case '<': buffer.append("<"); break; - case '>': buffer.append(">"); break; - case '&': buffer.append("&"); break; - default: buffer.append(c); - } - } - - return buffer; - } - - public void leave(Attr attr) { - // no-op - } - - public boolean enter(CDATASection cdata) { - buffer.append(""); - return true; - } - - public void leave(CDATASection cdata) { - // no-op - } - - public boolean enter(Comment comment) { - if (canonical) { - c14nNodeList.add(comment); - if (!with_comments) return true; - } - buffer.append(""); - return true; - } - - public void leave(Comment comment) { - // no-op - } - - public boolean enter(Document document) { - if (!noDecl) { - buffer.append("\n"); - } - return true; - } - - public void leave(Document document) { - // no-op - } - - public boolean enter(DocumentType docType) { - if (canonical) { - c14nNodeList.add(docType); - return true; - } - String name = docType.getName(); - String pubId = docType.getPublicId(); - String sysId = docType.getSystemId(); - String internalSubset = docType.getInternalSubset(); - if (docType.getPreviousSibling() != null) { - buffer.append('\n'); - } - buffer.append("\n"); - return true; - } - - public void leave(DocumentType docType) { - // no-op - } - - public boolean enter(Element element) { - if (canonical) { - c14nNodeList.add(element); - if (element == element.getOwnerDocument().getDocumentElement()) { - c14nNodeList.add(element.getOwnerDocument()); - } - } - String current = indentation.peek(); - buffer.append(current); - if (needIndent(element)) { - indentation.push(current + indentString); - } - String name = element.getTagName(); - buffer.append('<').append(name); - Attr[] attrs = getAttrsAndNamespaces(element); - for (Attr attr : attrs) { - if (attr.getSpecified()) { - buffer.append(' '); - enter(attr); - leave(attr); - } - } - if (element.hasChildNodes()) { - buffer.append('>'); - if (needBreakInOpening(element)) buffer.append('\n'); - return true; - } - // no child - if (asHtml) { - buffer.append('>'); - } else if (asXml && noEmpty) { - buffer.append('>'); - } else if (asXhtml) { - if (isEmpty(name)) { - buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2 - } else { - buffer.append('>'); - } - } else { - buffer.append("/>"); - } - if (needBreakInOpening(element)) { - buffer.append('\n'); - } - return true; - } - - private boolean needIndent(Element element) { - if (containsText(element)) return false; - if (fragment) return false; // a given option might be fragment and format. fragment matters - if (format || asBuilder) return true; - return false; - } - - private boolean needBreakInOpening(Element element) { - if (containsText(element)) return false; - if (fragment) return false; - if (format) return true; - if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true; - if (format && element.getNextSibling() == null && element.hasChildNodes()) return true; - return false; - } - - private boolean isEmpty(String name) { - HTMLElements.Element element = HTMLElements.getElement(name); - return element.isEmpty(); - } - - private Attr[] getAttrsAndNamespaces(Element element) { - NamedNodeMap attrs = element.getAttributes(); - if (!canonical) { - if (attrs == null || attrs.getLength() == 0) return new Attr[0]; - Attr[] attrsAndNamespaces = new Attr[attrs.getLength()]; - for (int i=0; i indentation; + private String encoding; + private final CharSequence indentString; + private boolean format; + private final boolean noDecl; + private final boolean noEmpty; + private final boolean noXhtml; + private final boolean asXhtml; + private boolean asXml; + private final boolean asHtml; + private final boolean asBuilder; + private boolean htmlDoc; + private final boolean fragment; + private final boolean canonical, incl_ns, with_comments; + private boolean subsets; + private boolean exclusive; + private final List c14nNodeList; + private final Deque c14nNamespaceStack; + private final Deque c14nAttrStack; + //private List c14nExclusiveInclusivePrefixes = null; + + /* + * U can't touch this. + * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q + * + * Taken from libxml save options. + */ + + public static final int FORMAT = 1; + public static final int NO_DECL = 2; + public static final int NO_EMPTY = 4; + public static final int NO_XHTML = 8; + public static final int AS_XHTML = 16; + public static final int AS_XML = 32; + public static final int AS_HTML = 64; + public static final int AS_BUILDER = 128; + + public static final int CANONICAL = 1; + public static final int INCL_NS = 2; + public static final int WITH_COMMENTS = 4; + public static final int SUBSETS = 8; + public static final int EXCLUSIVE = 16; + + private static final HTMLElements htmlElements_ = new HTMLElements(); + + public + SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment, + int canonicalOpts) + { + buffer = new StringBuilder(); + this.encoding = encoding; + indentation = new Stack(); + indentation.push(""); + this.htmlDoc = htmlDoc; + this.fragment = fragment; + c14nNodeList = new ArrayList(); + c14nNamespaceStack = new ArrayDeque(); + c14nAttrStack = new ArrayDeque(); + format = (options & FORMAT) == FORMAT; + + noDecl = (options & NO_DECL) == NO_DECL; + noEmpty = (options & NO_EMPTY) == NO_EMPTY; + noXhtml = (options & NO_XHTML) == NO_XHTML; + asXhtml = (options & AS_XHTML) == AS_XHTML; + asXml = (options & AS_XML) == AS_XML; + asHtml = (options & AS_HTML) == AS_HTML; + asBuilder = (options & AS_BUILDER) == AS_BUILDER; + + canonical = (canonicalOpts & CANONICAL) == CANONICAL; + incl_ns = (canonicalOpts & INCL_NS) == INCL_NS; + with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS; + subsets = (canonicalOpts & SUBSETS) == SUBSETS; + + if ((format && indent == null) || (format && indent.length() == 0)) { indent = " "; } // default, two spaces + if ((!format && indent != null) && indent.length() > 0) { format = true; } + if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) { indent = " "; } // default, two spaces + indentString = indent; + if (!asXml && !asHtml && !asXhtml && !asBuilder) { asXml = true; } + } + + @Override + public String + toString() + { + return buffer.toString(); + } + + public StringBuilder + getInternalBuffer() { return buffer; } + + public void + setHtmlDoc(boolean htmlDoc) + { + this.htmlDoc = htmlDoc; + } + + public void + setEncoding(String encoding) + { + this.encoding = encoding; + } + + public boolean + enter(Node node) + { + if (node instanceof Document) { + return enter((Document)node); + } + if (node instanceof Element) { + return enter((Element)node); + } + if (node instanceof Attr) { + return enter((Attr)node); + } + if (node instanceof Text) { + return enter((Text)node); + } + if (node instanceof CDATASection) { + return enter((CDATASection)node); + } + if (node instanceof Comment) { + return enter((Comment)node); + } + if (node instanceof DocumentType) { + return enter((DocumentType)node); + } + if (node instanceof Entity) { + return enter((Entity)node); + } + if (node instanceof EntityReference) { + return enter((EntityReference) node); + } + if (node instanceof Notation) { + return enter((Notation)node); + } + if (node instanceof ProcessingInstruction) { + return enter((ProcessingInstruction)node); + } + return false; + } + + public void + leave(Node node) + { + if (node instanceof Document) { + leave((Document)node); + return; + } + if (node instanceof Element) { + leave((Element)node); + return; + } + if (node instanceof Attr) { + leave((Attr)node); + return; + } + if (node instanceof Text) { + return; + } + if (node instanceof CDATASection) { + leave((CDATASection)node); + return; + } + if (node instanceof Comment) { + leave((Comment)node); + return; + } + if (node instanceof DocumentType) { + leave((DocumentType)node); + return; + } + if (node instanceof Entity) { + leave((Entity)node); + return; + } + if (node instanceof EntityReference) { + leave((EntityReference) node); + return; + } + if (node instanceof Notation) { + leave((Notation)node); + return; + } + if (node instanceof ProcessingInstruction) { + leave((ProcessingInstruction)node); + return; + } + } + + public boolean + enter(String string) + { + buffer.append(string); + return true; + } + + public void + leave(String string) + { + // no-op + } + + public boolean + enter(Attr attr) + { + String name = attr.getName(); + buffer.append(name); + if (!asHtml || !isHtmlBooleanAttr(name)) { + buffer.append('='); + buffer.append('"'); + String value = replaceCharsetIfNecessary(attr); + buffer.append(serializeAttrTextContent(value, htmlDoc)); + buffer.append('"'); + } + return true; + } + + private static final Pattern CHARSET = + Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE); + + private String + replaceCharsetIfNecessary(Attr attr) + { + String value = attr.getValue(); + if (encoding == null) { return value; } // unable to replace in any case + if (!"content".equals(attr.getName().toLowerCase())) { return value; } // must be content attr + if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) { return value; } + Matcher m = CHARSET.matcher(value); + if (!m.find()) { return value; } + if (value.contains(encoding)) { return value; } // no need to replace + return value.replace(m.group(), "charset=" + encoding); + } + + static final Set HTML_BOOLEAN_ATTRS; + static + { + final String[] _HTML_BOOLEAN_ATTRS = { + "checked", "compact", "declare", "defer", "disabled", "ismap", + "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", + "selected" + }; + HTML_BOOLEAN_ATTRS = new HashSet(Arrays.asList(_HTML_BOOLEAN_ATTRS)); + } + + private static boolean + isHtmlBooleanAttr(String name) + { + return HTML_BOOLEAN_ATTRS.contains(name); + } + + private static CharSequence + serializeAttrTextContent(String str, boolean htmlDoc) + { + if (str == null || str.length() == 0) { return ""; } + + StringBuilder buffer = new StringBuilder(str.length() + 16); + + for (int i = 0; i < str.length(); i++) { + char c; + switch (c = str.charAt(i)) { + case '\n': + buffer.append(" "); + break; + case '\r': + buffer.append(" "); + break; + case '\t': + buffer.append(" "); + break; + case '"': + if (htmlDoc) { buffer.append("%22"); } + else { buffer.append("""); } + break; + case '<': + buffer.append("<"); + break; + case '>': + buffer.append(">"); + break; + case '&': + buffer.append("&"); + break; + default: + buffer.append(c); + } + } + + return buffer; + } + + public void + leave(Attr attr) + { + // no-op + } + + public boolean + enter(CDATASection cdata) + { + buffer.append(""); + return true; + } + + public void + leave(CDATASection cdata) + { + // no-op + } + + public boolean + enter(Comment comment) + { + if (canonical) { + c14nNodeList.add(comment); + if (!with_comments) { return true; } + } + buffer.append(""); + return true; + } + + public void + leave(Comment comment) + { + // no-op + } + + public boolean + enter(Document document) + { + if (!noDecl) { + buffer.append("\n"); + } + return true; + } + + public void + leave(Document document) + { + // no-op + } + + public boolean + enter(DocumentType docType) + { + if (canonical) { + c14nNodeList.add(docType); + return true; + } + String name = docType.getName(); + String pubId = docType.getPublicId(); + String sysId = docType.getSystemId(); + String internalSubset = docType.getInternalSubset(); + if (docType.getPreviousSibling() != null) { + buffer.append('\n'); + } + buffer.append("\n"); + return true; + } + + public void + leave(DocumentType docType) + { + // no-op + } + + public boolean + enter(Element element) + { + if (canonical) { + c14nNodeList.add(element); + if (element == element.getOwnerDocument().getDocumentElement()) { + c14nNodeList.add(element.getOwnerDocument()); + } + } + String current = indentation.peek(); + buffer.append(current); + if (needIndent(element)) { + indentation.push(current + indentString); + } + String name = element.getTagName(); + buffer.append('<').append(name); + Attr[] attrs = getAttrsAndNamespaces(element); + for (Attr attr : attrs) { + if (attr.getSpecified()) { + buffer.append(' '); + enter(attr); + leave(attr); + } + } + if (element.hasChildNodes()) { + buffer.append('>'); + if (needBreakInOpening(element)) { buffer.append('\n'); } + return true; + } + // no child + if (asHtml) { + buffer.append('>'); + } else if (asXml && noEmpty) { + buffer.append('>'); + } else if (asXhtml) { + if (isEmpty(name)) { + buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2 + } else { + buffer.append('>'); + } + } else { + buffer.append("/>"); + } + if (needBreakInOpening(element)) { + buffer.append('\n'); + } + return true; + } + + private boolean + needIndent(Element element) + { + if (containsText(element)) { return false; } + if (fragment) { return false; } // a given option might be fragment and format. fragment matters + if (format || asBuilder) { return true; } + return false; + } + + private boolean + needBreakInOpening(Element element) + { + if (containsText(element)) { return false; } + if (fragment) { return false; } + if (format) { return true; } + if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) { return true; } + if (format && element.getNextSibling() == null && element.hasChildNodes()) { return true; } + return false; + } + + private boolean + isEmpty(String name) + { + HTMLElements.Element element = htmlElements_.getElement(name); + return element.isEmpty(); + } + + private Attr[] + getAttrsAndNamespaces(Element element) + { + NamedNodeMap attrs = element.getAttributes(); + if (!canonical) { + if (attrs == null || attrs.getLength() == 0) { return new Attr[0]; } + Attr[] attrsAndNamespaces = new Attr[attrs.getLength()]; + for (int i = 0; i < attrs.getLength(); i++) { + attrsAndNamespaces[i] = (Attr) attrs.item(i); + } + return attrsAndNamespaces; + } else { + List namespaces = new ArrayList(); + List attributes = new ArrayList(); + if (subsets) { + getAttrsOfAncestors(element.getParentNode(), namespaces, attributes); + Attr[] namespaceOfAncestors = getSortedArray(namespaces); + Attr[] attributeOfAncestors = getSortedArray(attributes); + c14nNamespaceStack.push(namespaceOfAncestors); + c14nAttrStack.push(attributeOfAncestors); + subsets = false; // namespace propagation should be done only once on top level node. + } + + getNamespacesAndAttrs(element, namespaces, attributes); + + Attr[] namespaceArray = getSortedArray(namespaces); + Attr[] attributeArray = getSortedArray(attributes); + Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length]; + for (int i = 0; i < allAttrs.length; i++) { + if (i < namespaceArray.length) { + allAttrs[i] = namespaceArray[i]; } else { - List namespaces = new ArrayList(); - List attributes = new ArrayList(); - if (subsets) { - getAttrsOfAncestors(element.getParentNode(), namespaces, attributes); - Attr[] namespaceOfAncestors = getSortedArray(namespaces); - Attr[] attributeOfAncestors = getSortedArray(attributes); - c14nNamespaceStack.push(namespaceOfAncestors); - c14nAttrStack.push(attributeOfAncestors); - subsets = false; // namespace propagation should be done only once on top level node. - } - - getNamespacesAndAttrs(element, namespaces, attributes); - - Attr[] namespaceArray = getSortedArray(namespaces); - Attr[] attributeArray = getSortedArray(attributes); - Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length]; - for (int i=0; i namespaces, List attributes) { - if (parent == null) return; - NamedNodeMap attrs = parent.getAttributes(); - if (attrs == null || attrs.getLength() == 0) return; - for (int i=0; i < attrs.getLength(); i++) { - Attr attr = (Attr)attrs.item(i); - if (isNamespace(attr.getNodeName())) namespaces.add(attr); - else attributes.add(attr); - } - getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes); - } - - private void getNamespacesAndAttrs(Node current, List namespaces, List attributes) { - NamedNodeMap attrs = current.getAttributes(); - for (int i=0; i namespaces, Attr attr) { - boolean newNamespace = true; - Iterator iter = c14nNamespaceStack.iterator(); - while (iter.hasNext()) { - Attr[] parentNamespaces = iter.next(); - for (int n=0; n < parentNamespaces.length; n++) { - if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) { - if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) { - // exactly the same namespace should not be added - newNamespace = false; - } else { - // in case of namespace url change, propagated namespace will be override - namespaces.remove(parentNamespaces[n]); - } - } - } - if (newNamespace && !namespaces.contains(attr)) namespaces.add(attr); - } - } - - private void getAttributesWithPropagated(List attributes, Attr attr) { - boolean newAttribute = true; - Iterator iter = c14nAttrStack.iterator(); - while (iter.hasNext()) { - Attr[] parentAttr = iter.next(); - for (int n=0; n < parentAttr.length; n++) { - if (!parentAttr[n].getNodeName().startsWith("xml:")) continue; - if (parentAttr[n].getNodeName().equals(attr.getNodeName())) { - if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) { - // exactly the same attribute should not be added - newAttribute = false; - } else { - // in case of attribute value change, propagated attribute will be override - attributes.remove(parentAttr[n]); - } - } - } - if (newAttribute) attributes.add(attr); - } - } - - private void verifyXmlSpace(List attributes, NamedNodeMap attrs) { - Attr attr = (Attr) attrs.getNamedItem("xml:space"); - if (attr == null) { - for (int i=0; i < attributes.size(); i++) { - if (attributes.get(i).getNodeName().equals("xml:space")) { - attributes.remove(i); - break; - } - } - } - } - - private Attr[] getSortedArray(List attrList) { - Attr[] attrArray = attrList.toArray(new Attr[0]); - Arrays.sort(attrArray, new Comparator() { - @Override - public int compare(Attr attr0, Attr attr1) { - return attr0.getNodeName().compareTo(attr1.getNodeName()); - } - }); - return attrArray; - } - - public void leave(Element element) { - if (canonical) { - c14nNamespaceStack.poll(); - c14nAttrStack.poll(); - } - String name = element.getTagName(); - if (element.hasChildNodes()) { - if (needIndentInClosing(element)) { - indentation.pop(); - buffer.append(indentation.peek()); - } else if (asBuilder) { - if (!containsText(element)) indentation.pop(); - } - buffer.append("'); - if (needBreakInClosing(element)) { - buffer.append('\n'); - } - return; - } - // no child, but HTML might need a closing tag. - if (asHtml || noEmpty) { - if (!isEmpty(name) && noEmpty) { - buffer.append("'); - } - } - if (needBreakInClosing(element)) { - if (!containsText(element)) indentation.pop(); - buffer.append('\n'); - } - } - - private boolean needIndentInClosing(Element element) { - if (containsText(element)) return false; - - if (fragment) return false; // a given option might be fragment and format. fragment matters - if (format) return true; - if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true; - return false; - } - - private boolean needBreakInClosing(Element element) { - if (fragment) return false; - if (format || asBuilder) return true; - return false; - } - - private boolean containsText(Element element) { - return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE); - } - - public boolean enter(Entity entity) { - String name = entity.getNodeName(); - String pubId = entity.getPublicId(); - String sysId = entity.getSystemId(); - String notation = entity.getNotationName(); - buffer.append(""); - return true; - } - - public void leave(Entity entity) { - // no-op - } - - public boolean enter(EntityReference entityRef) { - buffer.append('&').append(entityRef.getNodeName()).append(';'); - return true; - } - public void leave(EntityReference entityRef) { - // no-op - } - - public boolean enter(Notation notation) { - String name = notation.getNodeName(); - String pubId = notation.getPublicId(); - String sysId = notation.getSystemId(); - buffer.append(""); + allAttrs[i] = attributeArray[i - namespaceArray.length]; + } + } + c14nNamespaceStack.push(namespaceArray); + c14nAttrStack.push(attributeArray); + return allAttrs; + } + + } + + private void + getAttrsOfAncestors(Node parent, List namespaces, List attributes) + { + if (parent == null) { return; } + NamedNodeMap attrs = parent.getAttributes(); + if (attrs == null || attrs.getLength() == 0) { return; } + for (int i = 0; i < attrs.getLength(); i++) { + Attr attr = (Attr)attrs.item(i); + if (isNamespace(attr.getNodeName())) { namespaces.add(attr); } + else { attributes.add(attr); } + } + getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes); + } + + private void + getNamespacesAndAttrs(Node current, List namespaces, List attributes) + { + NamedNodeMap attrs = current.getAttributes(); + for (int i = 0; i < attrs.getLength(); i++) { + Attr attr = (Attr)attrs.item(i); + if (isNamespace(attr.getNodeName())) { + getNamespacesWithPropagated(namespaces, attr); + } else { + getAttributesWithPropagated(attributes, attr); + } + if (exclusive) { + verifyXmlSpace(attributes, attrs); + } + } + } + + private void + getNamespacesWithPropagated(List namespaces, Attr attr) + { + boolean newNamespace = true; + Iterator iter = c14nNamespaceStack.iterator(); + while (iter.hasNext()) { + Attr[] parentNamespaces = iter.next(); + for (int n = 0; n < parentNamespaces.length; n++) { + if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) { + if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) { + // exactly the same namespace should not be added + newNamespace = false; + } else { + // in case of namespace url change, propagated namespace will be override + namespaces.remove(parentNamespaces[n]); + } + } + } + if (newNamespace && !namespaces.contains(attr)) { namespaces.add(attr); } + } + } + + private void + getAttributesWithPropagated(List attributes, Attr attr) + { + boolean newAttribute = true; + Iterator iter = c14nAttrStack.iterator(); + while (iter.hasNext()) { + Attr[] parentAttr = iter.next(); + for (int n = 0; n < parentAttr.length; n++) { + if (!parentAttr[n].getNodeName().startsWith("xml:")) { continue; } + if (parentAttr[n].getNodeName().equals(attr.getNodeName())) { + if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) { + // exactly the same attribute should not be added + newAttribute = false; + } else { + // in case of attribute value change, propagated attribute will be override + attributes.remove(parentAttr[n]); + } + } + } + if (newAttribute) { attributes.add(attr); } + } + } + + private void + verifyXmlSpace(List attributes, NamedNodeMap attrs) + { + Attr attr = (Attr) attrs.getNamedItem("xml:space"); + if (attr == null) { + for (int i = 0; i < attributes.size(); i++) { + if (attributes.get(i).getNodeName().equals("xml:space")) { + attributes.remove(i); + break; + } + } + } + } + + private Attr[] + getSortedArray(List attrList) + { + Attr[] attrArray = attrList.toArray(new Attr[0]); + Arrays.sort(attrArray, new Comparator() { + @Override + public int compare(Attr attr0, Attr attr1) { + return attr0.getNodeName().compareTo(attr1.getNodeName()); + } + }); + return attrArray; + } + + public void + leave(Element element) + { + if (canonical) { + c14nNamespaceStack.poll(); + c14nAttrStack.poll(); + } + String name = element.getTagName(); + if (element.hasChildNodes()) { + if (needIndentInClosing(element)) { + indentation.pop(); + buffer.append(indentation.peek()); + } else if (asBuilder) { + if (!containsText(element)) { indentation.pop(); } + } + buffer.append("'); + if (needBreakInClosing(element)) { + buffer.append('\n'); + } + return; + } + // no child, but HTML might need a closing tag. + if (asHtml || noEmpty) { + if (!isEmpty(name) && noEmpty) { + buffer.append("'); + } + } else if (asXhtml && !isEmpty(name)) { + buffer.append("'); + } + if (needBreakInClosing(element)) { + if (!containsText(element)) { indentation.pop(); } + buffer.append('\n'); + } + } + + private boolean + needIndentInClosing(Element element) + { + if (containsText(element)) { return false; } + + if (fragment) { return false; } // a given option might be fragment and format. fragment matters + if (format) { return true; } + if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) { return true; } + return false; + } + + private boolean + needBreakInClosing(Element element) + { + if (fragment) { return false; } + if (format || asBuilder) { return true; } + return false; + } + + private boolean + containsText(Element element) + { + return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE); + } + + public boolean + enter(Entity entity) + { + String name = entity.getNodeName(); + String pubId = entity.getPublicId(); + String sysId = entity.getSystemId(); + String notation = entity.getNotationName(); + buffer.append(""); + return true; + } + + public void + leave(Entity entity) + { + // no-op + } + + public boolean + enter(EntityReference entityRef) + { + buffer.append('&').append(entityRef.getNodeName()).append(';'); + return true; + } + public void + leave(EntityReference entityRef) + { + // no-op + } + + public boolean + enter(Notation notation) + { + String name = notation.getNodeName(); + String pubId = notation.getPublicId(); + String sysId = notation.getSystemId(); + buffer.append(""); + return true; + } + + public void + leave(Notation notation) + { + // no-op + } + + public boolean + enter(ProcessingInstruction pi) + { + buffer.append(""); } + else { buffer.append("?>"); } + buffer.append("\n"); + if (canonical) { c14nNodeList.add(pi); } + return true; + } + + public void + leave(ProcessingInstruction pi) + { + // no-op + } + + private boolean + isHtmlScript(Text text) + { + return htmlDoc && text.getParentNode().getNodeName().equals("script"); + } + + private boolean + isHtmlStyle(Text text) + { + return htmlDoc && text.getParentNode().getNodeName().equals("style"); + } + + public boolean + enter(Text text) + { + CharSequence textContent = text.getNodeValue(); + if (canonical) { + c14nNodeList.add(text); + if (isBlank(textContent)) { + buffer.append(canonicalizeWhitespace(textContent)); return true; - } - - public void leave(Notation notation) { - // no-op - } - - public boolean enter(ProcessingInstruction pi) { - buffer.append(""); - else buffer.append("?>"); - buffer.append("\n"); - if (canonical) c14nNodeList.add(pi); - return true; - } - - public void leave(ProcessingInstruction pi) { - // no-op - } - - private boolean isHtmlScript(Text text) { - return htmlDoc && text.getParentNode().getNodeName().equals("script"); - } - - private boolean isHtmlStyle(Text text) { - return htmlDoc && text.getParentNode().getNodeName().equals("style"); - } - - public boolean enter(Text text) { - CharSequence textContent = text.getNodeValue(); - if (canonical) { - c14nNodeList.add(text); - if (isBlank(textContent)) { - buffer.append(canonicalizeWhitespace(textContent)); - return true; - } - } - - if (shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) { - textContent = encodeJavaString(textContent); - } - - textContent = encodeStringToHtmlEntity(textContent); - buffer.append(textContent); - return true; - } - - private CharSequence encodeStringToHtmlEntity(CharSequence text) { - if (encoding == null) return text; - - CharsetEncoder encoder = Charset.forName(encoding).newEncoder(); - StringBuilder sb = new StringBuilder(text.length() + 16); - // make sure we can handle code points that are higher than 2 bytes - for ( int i = 0; i < text.length(); ) { - int code = Character.codePointAt(text, i); - // TODO not sure about bigger offset then 2 ?! - int offset = code > 65535 ? 2 : 1; - CharSequence substr = text.subSequence(i, i + offset); - boolean canEncode = encoder.canEncode(substr); - if (canEncode) { - sb.append(substr); - } - else { - sb.append("&#x").append(Integer.toHexString(code)).append(';'); - } - i += offset; - } - return sb; - } + } + } + + if (shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) { + textContent = encodeJavaString(textContent); + } + + textContent = encodeStringToHtmlEntity(textContent); + buffer.append(textContent); + return true; + } + + private CharSequence + encodeStringToHtmlEntity(CharSequence text) + { + if (encoding == null) { return text; } + + CharsetEncoder encoder = Charset.forName(encoding).newEncoder(); + StringBuilder sb = new StringBuilder(text.length() + 16); + // make sure we can handle code points that are higher than 2 bytes + for (int i = 0; i < text.length();) { + int code = Character.codePointAt(text, i); + // TODO not sure about bigger offset then 2 ?! + int offset = code > 65535 ? 2 : 1; + CharSequence substr = text.subSequence(i, i + offset); + boolean canEncode = encoder.canEncode(substr); + if (canEncode) { + sb.append(substr); + } else { + sb.append("&#x").append(Integer.toHexString(code)).append(';'); + } + i += offset; + } + return sb; + } } diff --git a/ext/java/nokogiri/internals/SchemaErrorHandler.java b/ext/java/nokogiri/internals/SchemaErrorHandler.java index 098f671459..a6a8bf669a 100644 --- a/ext/java/nokogiri/internals/SchemaErrorHandler.java +++ b/ext/java/nokogiri/internals/SchemaErrorHandler.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; @@ -44,30 +12,39 @@ /** * Error handler for Relax and W3C XML Schema. - * + * * @author sergio * @author Yoko Harada */ -public class SchemaErrorHandler implements ErrorHandler { - - private final Ruby runtime; - final RubyArray errors; - - public SchemaErrorHandler(Ruby ruby, RubyArray array) { - this.runtime = ruby; - this.errors = array; - } - - public void warning(SAXParseException ex) throws SAXException { - errors.append( XmlSyntaxError.createWarning(runtime, ex) ); - } - - public void error(SAXParseException ex) throws SAXException { - errors.append( XmlSyntaxError.createError(runtime, ex) ); - } - - public void fatalError(SAXParseException ex) throws SAXException { - throw ex; - } +public class SchemaErrorHandler implements ErrorHandler +{ + + private final Ruby runtime; + final RubyArray errors; + + public + SchemaErrorHandler(Ruby ruby, RubyArray array) + { + this.runtime = ruby; + this.errors = array; + } + + public void + warning(SAXParseException ex) throws SAXException + { + errors.append(XmlSyntaxError.createWarning(runtime, ex)); + } + + public void + error(SAXParseException ex) throws SAXException + { + errors.append(XmlSyntaxError.createError(runtime, ex)); + } + + public void + fatalError(SAXParseException ex) throws SAXException + { + throw ex; + } } diff --git a/ext/java/nokogiri/internals/UncloseableInputStream.java b/ext/java/nokogiri/internals/UncloseableInputStream.java deleted file mode 100644 index e285e595df..0000000000 --- a/ext/java/nokogiri/internals/UncloseableInputStream.java +++ /dev/null @@ -1,102 +0,0 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package nokogiri.internals; - -import java.io.IOException; -import java.io.InputStream; - -/** - * Delegates all the methods to another InputStream except the - * close() method, which is ignored. This is used to fix #495. - * - * @author John Shahid - */ -public class UncloseableInputStream extends InputStream { - private final InputStream delegate; - - /** - * Create a new uncloseable stream. - * - * @param delegate The InputStream to which all methods (except close) - * will be delegated. - */ - public UncloseableInputStream(InputStream delegate) { - this.delegate = delegate; - } - - @Override - public int read() throws IOException { - return delegate.read(); - } - - @Override - public int read(byte []b) throws IOException { - return delegate.read(b); - } - - @Override - public int read(byte []b, int offset, int len) throws IOException { - return delegate.read(b, offset, len); - } - - @Override - public long skip(long n) throws IOException { - return delegate.skip(n); - } - - @Override - public int available() throws IOException { - return delegate.available(); - } - - @Override - public void close() { - // don't forward this to the InputStream we're delegating from - // we don't want the InputStream of the RubyIO to be closed - } - - @Override - public void mark(int readlimit) { - delegate.mark(readlimit); - } - - @Override - public void reset() throws IOException { - delegate.reset(); - } - - @Override - public boolean markSupported() { - return delegate.markSupported(); - } -} diff --git a/ext/java/nokogiri/internals/XalanDTMManagerPatch.java b/ext/java/nokogiri/internals/XalanDTMManagerPatch.java index f85675b0ee..da01de1111 100644 --- a/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +++ b/ext/java/nokogiri/internals/XalanDTMManagerPatch.java @@ -32,137 +32,143 @@ /** * @author kares */ -public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault { - - /** - * Given a W3C DOM node, try and return a DTM handle. - * Note: calling this may be non-optimal, and there is no guarantee that - * the node will be found in any particular DTM. - * - * @param node Non-null reference to a DOM node. - * - * @return a valid DTM handle. - */ - @Override - public /* synchronized */ int getDTMHandleFromNode(org.w3c.dom.Node node) { - //if (node == null) // "node must be non-null for getDTMHandleFromNode!"); - // throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null)); - assert node != null; - - if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) { - return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber(); - } +public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault +{ + + /** + * Given a W3C DOM node, try and return a DTM handle. + * Note: calling this may be non-optimal, and there is no guarantee that + * the node will be found in any particular DTM. + * + * @param node Non-null reference to a DOM node. + * + * @return a valid DTM handle. + */ + @Override + public /* synchronized */ int + getDTMHandleFromNode(org.w3c.dom.Node node) + { + //if (node == null) // "node must be non-null for getDTMHandleFromNode!"); + // throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null)); + assert node != null; + + if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) { + return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber(); + } - // Find the DOM2DTMs wrapped around this Document (if any) - // and check whether they contain the Node in question. - // - // NOTE that since a DOM2DTM may represent a subtree rather - // than a full document, we have to be prepared to check more - // than one -- and there is no guarantee that we will find - // one that contains ancestors or siblings of the node we're - // seeking. - // - // %REVIEW% We could search for the one which contains this - // node at the deepest level, and thus covers the widest - // subtree, but that's going to entail additional work - // checking more DTMs... and getHandleOfNode is not a - // cheap operation in most implementations. - // - // TODO: %REVIEW% If overflow addressing, we may recheck a DTM - // already examined. Ouch. But with the increased number of DTMs, - // scanning back to check this is painful. - // POSSIBLE SOLUTIONS: - // Generate a list of _unique_ DTM objects? - // Have each DTM cache last DOM node search? - for(int i = 0; i < m_dtms.length; i++) { - DTM thisDTM = m_dtms[i]; - if (thisDTM instanceof DOM2DTM) { - int handle = ((DOM2DTM) thisDTM).getHandleOfNode(node); - if (handle != DTM.NULL) { - return handle; - } - } + // Find the DOM2DTMs wrapped around this Document (if any) + // and check whether they contain the Node in question. + // + // NOTE that since a DOM2DTM may represent a subtree rather + // than a full document, we have to be prepared to check more + // than one -- and there is no guarantee that we will find + // one that contains ancestors or siblings of the node we're + // seeking. + // + // %REVIEW% We could search for the one which contains this + // node at the deepest level, and thus covers the widest + // subtree, but that's going to entail additional work + // checking more DTMs... and getHandleOfNode is not a + // cheap operation in most implementations. + // + // TODO: %REVIEW% If overflow addressing, we may recheck a DTM + // already examined. Ouch. But with the increased number of DTMs, + // scanning back to check this is painful. + // POSSIBLE SOLUTIONS: + // Generate a list of _unique_ DTM objects? + // Have each DTM cache last DOM node search? + for (int i = 0; i < m_dtms.length; i++) { + DTM thisDTM = m_dtms[i]; + if (thisDTM instanceof DOM2DTM) { + int handle = ((DOM2DTM) thisDTM).getHandleOfNode(node); + if (handle != DTM.NULL) { + return handle; } + } + } - // Not found; generate a new DTM. - // - // %REVIEW% Is this really desirable, or should we return null - // and make folks explicitly instantiate from a DOMSource? The - // latter is more work but gives the caller the opportunity to - // explicitly add the DTM to a DTMManager... and thus to know when - // it can be discarded again, which is something we need to pay much - // more attention to. (Especially since only DTMs which are assigned - // to a manager can use the overflow addressing scheme.) + // Not found; generate a new DTM. + // + // %REVIEW% Is this really desirable, or should we return null + // and make folks explicitly instantiate from a DOMSource? The + // latter is more work but gives the caller the opportunity to + // explicitly add the DTM to a DTMManager... and thus to know when + // it can be discarded again, which is something we need to pay much + // more attention to. (Especially since only DTMs which are assigned + // to a manager can use the overflow addressing scheme.) + // + // %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode + // and the DTM wasn't registered with this DTMManager, we will create + // a new DTM and _still_ not be able to find the node (since it will + // be resynthesized). Another reason to push hard on making all DTMs + // be managed DTMs. + + // Since the real root of our tree may be a DocumentFragment, we need to + // use getParent to find the root, instead of getOwnerDocument. Otherwise + // DOM2DTM#getHandleOfNode will be very unhappy. + Node root = node; + int rootType = root.getNodeType(); + Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode(); + for (; p != null; p = p.getParentNode()) { root = p; } + + // DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null); + DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/); + + int handle; + + if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode + || node instanceof DOM2DTMdefaultNamespaceDeclarationNode) { + // Can't return the same node since it's unique to a specific DTM, + // but can return the equivalent node -- find the corresponding + // Document Element, then ask it for the xml: namespace decl. + handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement()); + handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName()); + } else { + handle = dtm.getHandleOfNode(node); + + rootType = root.getNodeType(); + // Is Node actually within the same document? If not, don't search! + // This would be easier if m_root was always the Document node, but + // we decided to allow wrapping a DTM around a subtree. + if ((root == node) || + (rootType == Node.DOCUMENT_NODE && root == node.getOwnerDocument()) || + (rootType != Node.DOCUMENT_NODE && root.getOwnerDocument() == node.getOwnerDocument()) + ) { + // If node _is_ in m_root's tree, find its handle // - // %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode - // and the DTM wasn't registered with this DTMManager, we will create - // a new DTM and _still_ not be able to find the node (since it will - // be resynthesized). Another reason to push hard on making all DTMs - // be managed DTMs. - - // Since the real root of our tree may be a DocumentFragment, we need to - // use getParent to find the root, instead of getOwnerDocument. Otherwise - // DOM2DTM#getHandleOfNode will be very unhappy. - Node root = node; int rootType = root.getNodeType(); - Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode(); - for (; p != null; p = p.getParentNode()) root = p; - - // DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null); - DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/); - - int handle; - - if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode - || node instanceof DOM2DTMdefaultNamespaceDeclarationNode) { - // Can't return the same node since it's unique to a specific DTM, - // but can return the equivalent node -- find the corresponding - // Document Element, then ask it for the xml: namespace decl. - handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement()); - handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName()); - } - else { - handle = dtm.getHandleOfNode(node); - - rootType = root.getNodeType(); - // Is Node actually within the same document? If not, don't search! - // This would be easier if m_root was always the Document node, but - // we decided to allow wrapping a DTM around a subtree. - if((root==node) || - (rootType==Node.DOCUMENT_NODE && root==node.getOwnerDocument()) || - (rootType!=Node.DOCUMENT_NODE && root.getOwnerDocument()==node.getOwnerDocument()) - ) - { - // If node _is_ in m_root's tree, find its handle - // - // %OPT% This check may be improved significantly when DOM - // Level 3 nodeKey and relative-order tests become - // available! - for (Node cursor = node; cursor != null; - cursor = (cursor.getNodeType()!=Node.ATTRIBUTE_NODE) - ? cursor.getParentNode() - : ((org.w3c.dom.Attr)cursor).getOwnerElement()) { - if (cursor==root) { - // We know this node; find its handle. - return (dtm).getHandleFromNode(node); - } - } // for ancestors of node - } // if node and m_root in same Document - } - - if (DTM.NULL == handle) - throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE, null)); //"Could not resolve the node to a handle!"); + // %OPT% This check may be improved significantly when DOM + // Level 3 nodeKey and relative-order tests become + // available! + for (Node cursor = node; cursor != null; + cursor = (cursor.getNodeType() != Node.ATTRIBUTE_NODE) + ? cursor.getParentNode() + : ((org.w3c.dom.Attr)cursor).getOwnerElement()) { + if (cursor == root) { + // We know this node; find its handle. + return (dtm).getHandleFromNode(node); + } + } // for ancestors of node + } // if node and m_root in same Document + } - return handle; + if (DTM.NULL == handle) { + throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE, + null)); //"Could not resolve the node to a handle!"); } - private DOM2DTM getDTM(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/) { - int dtmPos = getFirstFreeDTMID(); - int documentID = dtmPos << IDENT_DTM_NODE_BITS; + return handle; + } - DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true); + private DOM2DTM + getDTM(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/) + { + int dtmPos = getFirstFreeDTMID(); + int documentID = dtmPos << IDENT_DTM_NODE_BITS; - addDTM(dtm, dtmPos, 0); - return dtm; - } + DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true); + + addDTM(dtm, dtmPos, 0); + return dtm; + } } diff --git a/ext/java/nokogiri/internals/XmlDeclHandler.java b/ext/java/nokogiri/internals/XmlDeclHandler.java index e59720613c..a306c25419 100644 --- a/ext/java/nokogiri/internals/XmlDeclHandler.java +++ b/ext/java/nokogiri/internals/XmlDeclHandler.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; /** @@ -37,6 +5,7 @@ * * @author Patrick Mahoney */ -public interface XmlDeclHandler { - public void xmlDecl(String version, String encoding, String standalone); +public interface XmlDeclHandler +{ + public void xmlDecl(String version, String encoding, String standalone); } diff --git a/ext/java/nokogiri/internals/XmlDomParserContext.java b/ext/java/nokogiri/internals/XmlDomParserContext.java index 89af2bc162..557c2f18e3 100644 --- a/ext/java/nokogiri/internals/XmlDomParserContext.java +++ b/ext/java/nokogiri/internals/XmlDomParserContext.java @@ -1,55 +1,12 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2012: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static nokogiri.internals.NokogiriHelpers.isBlank; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import nokogiri.NokogiriService; import nokogiri.XmlDocument; import nokogiri.XmlDtd; import nokogiri.XmlSyntaxError; - import org.apache.xerces.parsers.DOMParser; -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.RubyClass; -import org.jruby.RubyFixnum; +import org.jruby.*; import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; @@ -57,233 +14,252 @@ import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static nokogiri.internals.NokogiriHelpers.isBlank; + /** * Parser class for XML DOM processing. This class actually parses XML document * and creates DOM tree in Java side. However, DOM tree in Ruby side is not since * we delay creating objects for performance. - * + * * @author sergio * @author Yoko Harada */ -public class XmlDomParserContext extends ParserContext { +public class XmlDomParserContext extends ParserContext +{ + private static final long serialVersionUID = 1L; - protected static final String FEATURE_LOAD_EXTERNAL_DTD = - "http://apache.org/xml/features/nonvalidating/load-external-dtd"; - protected static final String FEATURE_LOAD_DTD_GRAMMAR = - "http://apache.org/xml/features/nonvalidating/load-dtd-grammar"; - protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE = - "http://apache.org/xml/features/dom/include-ignorable-whitespace"; - protected static final String CONTINUE_AFTER_FATAL_ERROR = - "http://apache.org/xml/features/continue-after-fatal-error"; - protected static final String FEATURE_NOT_EXPAND_ENTITY = - "http://apache.org/xml/features/dom/create-entity-ref-nodes"; - protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation"; - private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude"; - private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager"; + protected static final String FEATURE_LOAD_EXTERNAL_DTD = + "http://apache.org/xml/features/nonvalidating/load-external-dtd"; + protected static final String FEATURE_LOAD_DTD_GRAMMAR = + "http://apache.org/xml/features/nonvalidating/load-dtd-grammar"; + protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE = + "http://apache.org/xml/features/dom/include-ignorable-whitespace"; + protected static final String CONTINUE_AFTER_FATAL_ERROR = + "http://apache.org/xml/features/continue-after-fatal-error"; + protected static final String FEATURE_NOT_EXPAND_ENTITY = + "http://apache.org/xml/features/dom/create-entity-ref-nodes"; + protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation"; + private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager"; - protected ParserContext.Options options; - protected DOMParser parser; - protected NokogiriErrorHandler errorHandler; - protected String java_encoding; - protected IRubyObject ruby_encoding; + protected ParserContext.Options options; + protected DOMParser parser; + protected NokogiriErrorHandler errorHandler; + protected IRubyObject ruby_encoding; - public XmlDomParserContext(Ruby runtime, IRubyObject options) { - this(runtime, runtime.getNil(), options); - } - - public XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) { - super(runtime); - this.options = new ParserContext.Options(RubyFixnum.fix2long(options)); - java_encoding = NokogiriHelpers.getValidEncoding(runtime, encoding); - ruby_encoding = encoding; - initErrorHandler(); - initParser(runtime); - } + public + XmlDomParserContext(Ruby runtime, IRubyObject options) + { + this(runtime, runtime.getNil(), options); + } - protected void initErrorHandler() { - if (options.recover) { - errorHandler = new NokogiriNonStrictErrorHandler(options.noError, options.noWarning); - } else { - errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning); - } - } + public + XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) + { + super(runtime); + this.options = new ParserContext.Options(RubyFixnum.fix2long(options)); + java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding); + ruby_encoding = encoding; + initErrorHandler(runtime); + initParser(runtime); + } - protected void initParser(Ruby runtime) { - if (options.xInclude) { - System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration", - "org.apache.xerces.parsers.XIncludeParserConfiguration"); - } + protected void + initErrorHandler(Ruby runtime) + { + if (options.recover) { + errorHandler = new NokogiriNonStrictErrorHandler(runtime, options.noError, options.noWarning); + } else { + errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning); + } + } - parser = new NokogiriDomParser(options); - parser.setErrorHandler(errorHandler); + protected void + initParser(Ruby runtime) + { + if (options.xInclude) { + System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration", + "org.apache.xerces.parsers.XIncludeParserConfiguration"); + } - // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000. - setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager()); + parser = new NokogiriDomParser(options); + parser.setErrorHandler(errorHandler); - if (options.noBlanks) { - setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false); - } + // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000. + setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager()); - if (options.recover) { - setFeature(CONTINUE_AFTER_FATAL_ERROR, true); - } + if (options.noBlanks) { + setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false); + } - if (options.dtdValid) { - setFeature(FEATURE_VALIDATION, true); - } + if (options.recover) { + setFeature(CONTINUE_AFTER_FATAL_ERROR, true); + } - if (!options.noEnt) { - setFeature(FEATURE_NOT_EXPAND_ENTITY, true); - } - // If we turn off loading of external DTDs complete, we don't - // getthe publicID. Instead of turning off completely, we use - // an entity resolver that returns empty documents. - if (options.dtdLoad) { - setFeature(FEATURE_LOAD_EXTERNAL_DTD, true); - setFeature(FEATURE_LOAD_DTD_GRAMMAR, true); - } - parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options)); + if (options.dtdValid) { + setFeature(FEATURE_VALIDATION, true); } - /** - * Convenience method that catches and ignores SAXException - * (unrecognized and unsupported exceptions). - */ - protected void setFeature(String feature, boolean value) { - try { - parser.setFeature(feature, value); - } catch (SAXException e) { - // ignore - } + if (!options.noEnt) { + setFeature(FEATURE_NOT_EXPAND_ENTITY, true); + } + // If we turn off loading of external DTDs complete, we don't + // getthe publicID. Instead of turning off completely, we use + // an entity resolver that returns empty documents. + if (options.dtdLoad) { + setFeature(FEATURE_LOAD_EXTERNAL_DTD, true); + setFeature(FEATURE_LOAD_DTD_GRAMMAR, true); } + parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options)); + } - /** - * Convenience method that catches and ignores SAXException - * (unrecognized and unsupported exceptions). - */ - protected void setProperty(String property, Object value) { - try { - parser.setProperty(property, value); - } catch (SAXException e) { - // ignore - } + /** + * Convenience method that catches and ignores SAXException + * (unrecognized and unsupported exceptions). + */ + protected void + setFeature(String feature, boolean value) + { + try { + parser.setFeature(feature, value); + } catch (SAXException e) { + // ignore } + } - public void addErrorsIfNecessary(ThreadContext context, XmlDocument doc) { - doc.setInstanceVariable("@errors", mapErrors(context, errorHandler)); + /** + * Convenience method that catches and ignores SAXException + * (unrecognized and unsupported exceptions). + */ + protected void + setProperty(String property, Object value) + { + try { + parser.setProperty(property, value); + } catch (SAXException e) { + // ignore } + } + public void + addErrorsIfNecessary(ThreadContext context, XmlDocument doc) + { + doc.setInstanceVariable("@errors", mapErrors(context, errorHandler)); + } - public static RubyArray mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler) { - final Ruby runtime = context.runtime; - final List errors = errorHandler.getErrors(); - final IRubyObject[] errorsAry = new IRubyObject[errors.size()]; - for (int i = 0; i < errors.size(); i++) { - XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime); - xmlSyntaxError.setException(errors.get(i)); - errorsAry[i] = xmlSyntaxError; - } - return runtime.newArrayNoCopy(errorsAry); - } - public XmlDocument getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex) { - if (options.recover) { - XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz); - this.addErrorsIfNecessary(context, xmlDocument); - XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime); - xmlSyntaxError.setException(ex); - ((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError); - return xmlDocument; - } else { - XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime); - xmlSyntaxError.setException(ex); - throw new RaiseException(xmlSyntaxError); - } - } - - private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klazz) { - Document document = parser.getDocument(); - XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz); - if (document != null) { - xmlDocument.setDocumentNode(context, document); - } - xmlDocument.setEncoding(ruby_encoding); - return xmlDocument; + public static RubyArray + mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler) + { + final Ruby runtime = context.runtime; + final List errors = errorHandler.getErrors(); + final IRubyObject[] errorsAry = new IRubyObject[errors.size()]; + for (int i = 0; i < errors.size(); i++) { + errorsAry[i] = errors.get(i); } + return runtime.newArrayNoCopy(errorsAry); + } - protected XmlDocument getNewEmptyDocument(ThreadContext context) { - IRubyObject[] args = new IRubyObject[0]; - return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::XML::Document"), args); + public XmlDocument + getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex) + { + if (options.recover) { + XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz); + this.addErrorsIfNecessary(context, xmlDocument); + XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime); + xmlSyntaxError.setException(ex); + ((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError); + return xmlDocument; + } else { + XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime); + xmlSyntaxError.setException(ex); + throw xmlSyntaxError.toThrowable(); } + } - /** - * This method is broken out so that HtmlDomParserContext can - * override it. - */ - protected XmlDocument wrapDocument(ThreadContext context, - RubyClass klazz, - Document doc) { - XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz); - xmlDocument.setDocumentNode(context, doc); - xmlDocument.setEncoding(ruby_encoding); + private XmlDocument + getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klass) + { + Document document = parser.getDocument(); + XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, document); + xmlDocument.setEncoding(ruby_encoding); + return xmlDocument; + } - if (options.dtdLoad) { - IRubyObject xmlDtdOrNil = XmlDtd.newFromExternalSubset(context.getRuntime(), doc); - if (!xmlDtdOrNil.isNil()) { - XmlDtd xmlDtd = (XmlDtd) xmlDtdOrNil; - doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, xmlDtd, null); - } - } - return xmlDocument; + /** + * This method is broken out so that HtmlDomParserContext can + * override it. + */ + protected XmlDocument + wrapDocument(ThreadContext context, RubyClass klass, Document doc) + { + XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc); + Helpers.invoke(context, xmlDocument, "initialize"); + xmlDocument.setEncoding(ruby_encoding); + + if (options.dtdLoad) { + IRubyObject dtd = XmlDtd.newFromExternalSubset(context.runtime, doc); + if (!dtd.isNil()) { + doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, (XmlDtd) dtd, null); + } } + return xmlDocument; + } - /** - * Must call setInputSource() before this method. - */ - public XmlDocument parse(ThreadContext context, - IRubyObject klazz, - IRubyObject url) { - XmlDocument xmlDoc; - try { - Document doc = do_parse(); - xmlDoc = wrapDocument(context, (RubyClass)klazz, doc); - xmlDoc.setUrl(url); - addErrorsIfNecessary(context, xmlDoc); - return xmlDoc; - } catch (SAXException e) { - return getDocumentWithErrorsOrRaiseException(context, (RubyClass)klazz, e); - } catch (IOException e) { - return getDocumentWithErrorsOrRaiseException(context, (RubyClass)klazz, e); - } + /** + * Must call setInputSource() before this method. + */ + public XmlDocument + parse(ThreadContext context, RubyClass klass, IRubyObject url) + { + XmlDocument xmlDoc; + try { + Document doc = do_parse(); + xmlDoc = wrapDocument(context, klass, doc); + xmlDoc.setUrl(url); + addErrorsIfNecessary(context, xmlDoc); + return xmlDoc; + } catch (SAXException e) { + return getDocumentWithErrorsOrRaiseException(context, klass, e); + } catch (IOException e) { + return getDocumentWithErrorsOrRaiseException(context, klass, e); } + } - protected Document do_parse() throws SAXException, IOException { - try { - parser.parse(getInputSource()); - } catch (NullPointerException ex) { - // FIXME: this is really a hack to fix #838. Xerces will throw a NullPointerException - // if we tried to parse ''. We should submit a patch to Xerces. - } - if (options.noBlanks) { - List emptyNodes = new ArrayList(); - findEmptyTexts(parser.getDocument(), emptyNodes); - if (emptyNodes.size() > 0) { - for (Node node : emptyNodes) { - node.getParentNode().removeChild(node); - } - } - } - return parser.getDocument(); + protected Document + do_parse() throws SAXException, IOException + { + try { + parser.parse(getInputSource()); + } catch (NullPointerException ex) { + // FIXME: this is really a hack to fix #838. Xerces will throw a NullPointerException + // if we tried to parse ''. We should submit a patch to Xerces. } - - private static void findEmptyTexts(Node node, List emptyNodes) { - if (node.getNodeType() == Node.TEXT_NODE && isBlank(node.getTextContent())) { - emptyNodes.add(node); - } else { - NodeList children = node.getChildNodes(); - for (int i=0; i < children.getLength(); i++) { - findEmptyTexts(children.item(i), emptyNodes); - } + if (options.noBlanks) { + List emptyNodes = new ArrayList(); + findEmptyTexts(parser.getDocument(), emptyNodes); + if (emptyNodes.size() > 0) { + for (Node node : emptyNodes) { + node.getParentNode().removeChild(node); } + } + } + return parser.getDocument(); + } + + private static void + findEmptyTexts(Node node, List emptyNodes) + { + if (node.getNodeType() == Node.TEXT_NODE && isBlank(node.getTextContent())) { + emptyNodes.add(node); + } else { + NodeList children = node.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + findEmptyTexts(children.item(i), emptyNodes); + } } + } } diff --git a/ext/java/nokogiri/internals/XmlSaxParser.java b/ext/java/nokogiri/internals/XmlSaxParser.java index fb3e9c4406..478fe8fd1a 100644 --- a/ext/java/nokogiri/internals/XmlSaxParser.java +++ b/ext/java/nokogiri/internals/XmlSaxParser.java @@ -1,35 +1,3 @@ -/** - * (The MIT License) - * - * Copyright (c) 2008 - 2011: - * - * * {Aaron Patterson}[http://tenderlovemaking.com] - * * {Mike Dalessio}[http://mike.daless.io] - * * {Charles Nutter}[http://blog.headius.com] - * * {Sergio Arbeo}[http://www.serabe.com] - * * {Patrick Mahoney}[http://polycrystal.org] - * * {Yoko Harada}[http://yokolet.blogspot.com] - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * 'Software'), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - package nokogiri.internals; import org.apache.xerces.parsers.SAXParser; @@ -42,24 +10,31 @@ * * @author Patrick Mahoney */ -public class XmlSaxParser extends SAXParser { +public class XmlSaxParser extends SAXParser +{ - protected XmlDeclHandler xmlDeclHandler = null; + protected XmlDeclHandler xmlDeclHandler = null; - public XmlSaxParser() { - super(); - } + public + XmlSaxParser() + { + super(); + } - public void setXmlDeclHandler(XmlDeclHandler xmlDeclHandler) { - this.xmlDeclHandler = xmlDeclHandler; - } + public void + setXmlDeclHandler(XmlDeclHandler xmlDeclHandler) + { + this.xmlDeclHandler = xmlDeclHandler; + } - @Override - public void xmlDecl(String version, String encoding, String standalone, - Augmentations augs) throws XNIException { - super.xmlDecl(version, encoding, standalone, augs); - if (xmlDeclHandler != null) { - xmlDeclHandler.xmlDecl(version, encoding, standalone); - } + @Override + public void + xmlDecl(String version, String encoding, String standalone, + Augmentations augs) throws XNIException + { + super.xmlDecl(version, encoding, standalone, augs); + if (xmlDeclHandler != null) { + xmlDeclHandler.xmlDecl(version, encoding, standalone); } + } } diff --git a/ext/java/nokogiri/internals/c14n/AttrCompare.java b/ext/java/nokogiri/internals/c14n/AttrCompare.java index 044be4757e..ed34b536dd 100644 --- a/ext/java/nokogiri/internals/c14n/AttrCompare.java +++ b/ext/java/nokogiri/internals/c14n/AttrCompare.java @@ -28,7 +28,7 @@ * Compares two attributes based on the C14n specification. * *
    - *
  • Namespace nodes have a lesser document order position than attribute + *
  • Namespace nodes have a lesser document order position than attribute * nodes. *
  • An element's namespace nodes are sorted lexicographically by * local name (the default namespace node, if one exists, has no @@ -40,80 +40,83 @@ * * @author Christian Geuer-Pollmann */ -public class AttrCompare implements Comparator, Serializable { +public class AttrCompare implements Comparator, Serializable +{ - private static final long serialVersionUID = -7113259629930576230L; - private static final int ATTR0_BEFORE_ATTR1 = -1; - private static final int ATTR1_BEFORE_ATTR0 = 1; - private static final String XMLNS = Constants.NamespaceSpecNS; + private static final long serialVersionUID = -7113259629930576230L; + private static final int ATTR0_BEFORE_ATTR1 = -1; + private static final int ATTR1_BEFORE_ATTR0 = 1; + private static final String XMLNS = Constants.NamespaceSpecNS; - /** - * Compares two attributes based on the C14n specification. - * - *
      - *
    • Namespace nodes have a lesser document order position than - * attribute nodes. - *
    • An element's namespace nodes are sorted lexicographically by - * local name (the default namespace node, if one exists, has no - * local name and is therefore lexicographically least). - *
    • An element's attribute nodes are sorted lexicographically with - * namespace URI as the primary key and local name as the secondary - * key (an empty namespace URI is lexicographically least). - *
    - * - * @param attr0 - * @param attr1 - * @return returns a negative integer, zero, or a positive integer as - * obj0 is less than, equal to, or greater than obj1 - * - */ - public int compare(Attr attr0, Attr attr1) { - String namespaceURI0 = attr0.getNamespaceURI(); - String namespaceURI1 = attr1.getNamespaceURI(); - - boolean isNamespaceAttr0 = XMLNS.equals(namespaceURI0); - boolean isNamespaceAttr1 = XMLNS.equals(namespaceURI1); + /** + * Compares two attributes based on the C14n specification. + * + *
      + *
    • Namespace nodes have a lesser document order position than + * attribute nodes. + *
    • An element's namespace nodes are sorted lexicographically by + * local name (the default namespace node, if one exists, has no + * local name and is therefore lexicographically least). + *
    • An element's attribute nodes are sorted lexicographically with + * namespace URI as the primary key and local name as the secondary + * key (an empty namespace URI is lexicographically least). + *
    + * + * @param attr0 + * @param attr1 + * @return returns a negative integer, zero, or a positive integer as + * obj0 is less than, equal to, or greater than obj1 + * + */ + public int + compare(Attr attr0, Attr attr1) + { + String namespaceURI0 = attr0.getNamespaceURI(); + String namespaceURI1 = attr1.getNamespaceURI(); - if (isNamespaceAttr0) { - if (isNamespaceAttr1) { - // both are namespaces - String localname0 = attr0.getLocalName(); - String localname1 = attr1.getLocalName(); + boolean isNamespaceAttr0 = XMLNS.equals(namespaceURI0); + boolean isNamespaceAttr1 = XMLNS.equals(namespaceURI1); - if ("xmlns".equals(localname0)) { - localname0 = ""; - } + if (isNamespaceAttr0) { + if (isNamespaceAttr1) { + // both are namespaces + String localname0 = attr0.getLocalName(); + String localname1 = attr1.getLocalName(); - if ("xmlns".equals(localname1)) { - localname1 = ""; - } + if ("xmlns".equals(localname0)) { + localname0 = ""; + } - return localname0.compareTo(localname1); - } - // attr0 is a namespace, attr1 is not - return ATTR0_BEFORE_ATTR1; - } else if (isNamespaceAttr1) { - // attr1 is a namespace, attr0 is not - return ATTR1_BEFORE_ATTR0; - } + if ("xmlns".equals(localname1)) { + localname1 = ""; + } - // none is a namespace - if (namespaceURI0 == null) { - if (namespaceURI1 == null) { - String name0 = attr0.getName(); - String name1 = attr1.getName(); - return name0.compareTo(name1); - } - return ATTR0_BEFORE_ATTR1; - } else if (namespaceURI1 == null) { - return ATTR1_BEFORE_ATTR0; - } + return localname0.compareTo(localname1); + } + // attr0 is a namespace, attr1 is not + return ATTR0_BEFORE_ATTR1; + } else if (isNamespaceAttr1) { + // attr1 is a namespace, attr0 is not + return ATTR1_BEFORE_ATTR0; + } - int a = namespaceURI0.compareTo(namespaceURI1); - if (a != 0) { - return a; - } - - return (attr0.getLocalName()).compareTo(attr1.getLocalName()); + // none is a namespace + if (namespaceURI0 == null) { + if (namespaceURI1 == null) { + String name0 = attr0.getName(); + String name1 = attr1.getName(); + return name0.compareTo(name1); + } + return ATTR0_BEFORE_ATTR1; + } else if (namespaceURI1 == null) { + return ATTR1_BEFORE_ATTR0; } + + int a = namespaceURI0.compareTo(namespaceURI1); + if (a != 0) { + return a; + } + + return (attr0.getLocalName()).compareTo(attr1.getLocalName()); + } } diff --git a/ext/java/nokogiri/internals/c14n/C14nHelper.java b/ext/java/nokogiri/internals/c14n/C14nHelper.java index 6019fd3893..4a3d842dd9 100644 --- a/ext/java/nokogiri/internals/c14n/C14nHelper.java +++ b/ext/java/nokogiri/internals/c14n/C14nHelper.java @@ -29,131 +29,150 @@ * * @author Christian Geuer-Pollmann */ -public class C14nHelper { - - /** - * Constructor C14nHelper - * - */ - private C14nHelper() { - // don't allow instantiation +public class C14nHelper +{ + + /** + * Constructor C14nHelper + * + */ + private + C14nHelper() + { + // don't allow instantiation + } + + /** + * Method namespaceIsRelative + * + * @param namespace + * @return true if the given namespace is relative. + */ + public static boolean + namespaceIsRelative(Attr namespace) + { + return !namespaceIsAbsolute(namespace); + } + + /** + * Method namespaceIsRelative + * + * @param namespaceValue + * @return true if the given namespace is relative. + */ + public static boolean + namespaceIsRelative(String namespaceValue) + { + return !namespaceIsAbsolute(namespaceValue); + } + + /** + * Method namespaceIsAbsolute + * + * @param namespace + * @return true if the given namespace is absolute. + */ + public static boolean + namespaceIsAbsolute(Attr namespace) + { + return namespaceIsAbsolute(namespace.getValue()); + } + + /** + * Method namespaceIsAbsolute + * + * @param namespaceValue + * @return true if the given namespace is absolute. + */ + public static boolean + namespaceIsAbsolute(String namespaceValue) + { + // assume empty namespaces are absolute + if (namespaceValue.length() == 0) { + return true; } - - /** - * Method namespaceIsRelative - * - * @param namespace - * @return true if the given namespace is relative. - */ - public static boolean namespaceIsRelative(Attr namespace) { - return !namespaceIsAbsolute(namespace); + return namespaceValue.indexOf(':') > 0; + } + + /** + * This method throws an exception if the Attribute value contains + * a relative URI. + * + * @param attr + * @throws CanonicalizationException + */ + public static void + assertNotRelativeNS(Attr attr) throws CanonicalizationException + { + if (attr == null) { + return; } - /** - * Method namespaceIsRelative - * - * @param namespaceValue - * @return true if the given namespace is relative. - */ - public static boolean namespaceIsRelative(String namespaceValue) { - return !namespaceIsAbsolute(namespaceValue); - } + String nodeAttrName = attr.getNodeName(); + boolean definesDefaultNS = nodeAttrName.equals("xmlns"); + boolean definesNonDefaultNS = nodeAttrName.startsWith("xmlns:"); - /** - * Method namespaceIsAbsolute - * - * @param namespace - * @return true if the given namespace is absolute. - */ - public static boolean namespaceIsAbsolute(Attr namespace) { - return namespaceIsAbsolute(namespace.getValue()); - } + if ((definesDefaultNS || definesNonDefaultNS) && namespaceIsRelative(attr)) { + String parentName = attr.getOwnerElement().getTagName(); + String attrValue = attr.getValue(); + Object exArgs[] = { parentName, nodeAttrName, attrValue }; - /** - * Method namespaceIsAbsolute - * - * @param namespaceValue - * @return true if the given namespace is absolute. - */ - public static boolean namespaceIsAbsolute(String namespaceValue) { - // assume empty namespaces are absolute - if (namespaceValue.length() == 0) { - return true; - } - return namespaceValue.indexOf(':') > 0; + throw new CanonicalizationException( + "c14n.Canonicalizer.RelativeNamespace", exArgs + ); } - - /** - * This method throws an exception if the Attribute value contains - * a relative URI. - * - * @param attr - * @throws CanonicalizationException - */ - public static void assertNotRelativeNS(Attr attr) throws CanonicalizationException { - if (attr == null) { - return; - } - - String nodeAttrName = attr.getNodeName(); - boolean definesDefaultNS = nodeAttrName.equals("xmlns"); - boolean definesNonDefaultNS = nodeAttrName.startsWith("xmlns:"); - - if ((definesDefaultNS || definesNonDefaultNS) && namespaceIsRelative(attr)) { - String parentName = attr.getOwnerElement().getTagName(); - String attrValue = attr.getValue(); - Object exArgs[] = { parentName, nodeAttrName, attrValue }; - - throw new CanonicalizationException( - "c14n.Canonicalizer.RelativeNamespace", exArgs - ); - } + } + + /** + * This method throws a CanonicalizationException if the supplied Document + * is not able to be traversed using a TreeWalker. + * + * @param document + * @throws CanonicalizationException + */ + public static void + checkTraversability(Document document) + throws CanonicalizationException + { + if (!document.isSupported("Traversal", "2.0")) { + Object exArgs[] = {document.getImplementation().getClass().getName() }; + + throw new CanonicalizationException( + "c14n.Canonicalizer.TraversalNotSupported", exArgs + ); } - - /** - * This method throws a CanonicalizationException if the supplied Document - * is not able to be traversed using a TreeWalker. - * - * @param document - * @throws CanonicalizationException - */ - public static void checkTraversability(Document document) - throws CanonicalizationException { - if (!document.isSupported("Traversal", "2.0")) { - Object exArgs[] = {document.getImplementation().getClass().getName() }; - - throw new CanonicalizationException( - "c14n.Canonicalizer.TraversalNotSupported", exArgs - ); - } - } - - /** - * This method throws a CanonicalizationException if the supplied Element - * contains any relative namespaces. - * - * @param ctxNode - * @throws CanonicalizationException - * @see C14nHelper#assertNotRelativeNS(Attr) - */ - public static void checkForRelativeNamespace(Element ctxNode) - throws CanonicalizationException { - if (ctxNode != null) { - NamedNodeMap attributes = ctxNode.getAttributes(); - - for (int i = 0; i < attributes.getLength(); i++) { - C14nHelper.assertNotRelativeNS((Attr) attributes.item(i)); - } - } else { - throw new CanonicalizationException("Called checkForRelativeNamespace() on null"); - } + } + + /** + * This method throws a CanonicalizationException if the supplied Element + * contains any relative namespaces. + * + * @param ctxNode + * @throws CanonicalizationException + * @see C14nHelper#assertNotRelativeNS(Attr) + */ + public static void + checkForRelativeNamespace(Element ctxNode) + throws CanonicalizationException + { + if (ctxNode != null) { + NamedNodeMap attributes = ctxNode.getAttributes(); + + for (int i = 0; i < attributes.getLength(); i++) { + C14nHelper.assertNotRelativeNS((Attr) attributes.item(i)); + } + } else { + throw new CanonicalizationException("Called checkForRelativeNamespace() on null"); } - - public static String getErrorMessage(String message, Object... exArgs) { - StringBuffer sb = new StringBuffer(message); - for (Object arg : exArgs) { - sb.append(", ").append(arg.toString()); - } - return new String(sb); + } + + public static String + getErrorMessage(String message, Object... exArgs) + { + StringBuffer sb = new StringBuffer(message); + for (Object arg : exArgs) { + sb.append(", ").append(arg.toString()); } + return new String(sb); + } } diff --git a/ext/java/nokogiri/internals/c14n/CanonicalFilter.java b/ext/java/nokogiri/internals/c14n/CanonicalFilter.java index 812699a184..5717f646f6 100644 --- a/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +++ b/ext/java/nokogiri/internals/c14n/CanonicalFilter.java @@ -8,30 +8,36 @@ import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Node; -public class CanonicalFilter { - private final Block block; - private final ThreadContext context; - - public CanonicalFilter(ThreadContext context, Block block) { - this.context = context; - this.block = block; +public class CanonicalFilter +{ + private final Block block; + private final ThreadContext context; + + public + CanonicalFilter(ThreadContext context, Block block) + { + this.context = context; + this.block = block; + } + + public boolean + includeNodes(Node currentNode, Node parentNode) + { + if (block == null || !block.isGiven()) { + return true; } - public boolean includeNodes(Node currentNode, Node parentNode) { - if (block == null || !block.isGiven()) - return true; - - IRubyObject current = NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), currentNode); - IRubyObject parent = NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), parentNode); + IRubyObject current = NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), currentNode); + IRubyObject parent = NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), parentNode); - if (parent.isNil()) { - IRubyObject doc = ((XmlNode) current).document(context); - boolean returnValue = block.call(context, current, doc).isTrue(); - block.call(context, doc, context.nil); - return returnValue; - } - - return block.call(context, current, parent).isTrue(); + if (parent.isNil()) { + IRubyObject doc = ((XmlNode) current).document(context); + boolean returnValue = block.call(context, current, doc).isTrue(); + block.call(context, doc, context.nil); + return returnValue; } + return block.call(context, current, parent).isTrue(); + } + } diff --git a/ext/java/nokogiri/internals/c14n/CanonicalizationException.java b/ext/java/nokogiri/internals/c14n/CanonicalizationException.java index 2474d97b5b..bd0329cd95 100644 --- a/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +++ b/ext/java/nokogiri/internals/c14n/CanonicalizationException.java @@ -24,70 +24,83 @@ * * @author Christian Geuer-Pollmann */ -public class CanonicalizationException extends Exception { +public class CanonicalizationException extends Exception +{ - /** - * - */ - private static final long serialVersionUID = 1L; + /** + * + */ + private static final long serialVersionUID = 1L; - /** - * Constructor CanonicalizationException - * - */ - public CanonicalizationException() { - super(); - } + /** + * Constructor CanonicalizationException + * + */ + public + CanonicalizationException() + { + super(); + } - /** - * Constructor CanonicalizationException - * - * @param message - */ - public CanonicalizationException(String message) { - super(message); - } - - /** - * Constructor CanonicalizationException - * - * @param message - * @param rootCause - */ - public CanonicalizationException(Exception rootCause) { - super(rootCause); - } + /** + * Constructor CanonicalizationException + * + * @param message + */ + public + CanonicalizationException(String message) + { + super(message); + } + + /** + * Constructor CanonicalizationException + * + * @param message + * @param rootCause + */ + public + CanonicalizationException(Exception rootCause) + { + super(rootCause); + } + + /** + * Constructor CanonicalizationException + * + * @param msgID + * @param exArgs + */ + public + CanonicalizationException(String message, Object... exArgs) + { + super(C14nHelper.getErrorMessage(message, exArgs)); + } + + /** + * Constructor CanonicalizationException + * + * @param message + * @param rootCause + */ + public + CanonicalizationException(String message, Exception rootCause) + { + super(message, rootCause); + } + + /** + * Constructor CanonicalizationException + * + * @param msgID + * @param exArgs + * @param originalException + */ + public + CanonicalizationException(String message, Exception rootCause, Object... exArgs) + { + super(C14nHelper.getErrorMessage(message, exArgs), rootCause); + } - /** - * Constructor CanonicalizationException - * - * @param msgID - * @param exArgs - */ - public CanonicalizationException(String message, Object... exArgs) { - super(C14nHelper.getErrorMessage(message, exArgs)); - } - - /** - * Constructor CanonicalizationException - * - * @param message - * @param rootCause - */ - public CanonicalizationException(String message, Exception rootCause) { - super(message, rootCause); - } - /** - * Constructor CanonicalizationException - * - * @param msgID - * @param exArgs - * @param originalException - */ - public CanonicalizationException(String message, Exception rootCause, Object... exArgs) { - super(C14nHelper.getErrorMessage(message, exArgs), rootCause); - } - - } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer.java b/ext/java/nokogiri/internals/c14n/Canonicalizer.java index 7086bee9c0..b3e301cd8b 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer.java @@ -28,225 +28,251 @@ * * @author Christian Geuer-Pollmann */ -public class Canonicalizer { - - /** The output encoding of canonicalized data */ - public static final String ENCODING = "UTF8"; - - /** - * XPath Expression for selecting every node and continuous comments joined - * in only one node - */ - public static final String XPATH_C14N_WITH_COMMENTS_SINGLE_NODE = - "(.//. | .//@* | .//namespace::*)"; - - /** - * The URL defined in XML-SEC Rec for inclusive c14n without comments. - */ - public static final String ALGO_ID_C14N_OMIT_COMMENTS = - "http://www.w3.org/TR/2001/REC-xml-c14n-20010315"; - /** - * The URL defined in XML-SEC Rec for inclusive c14n with comments. - */ - public static final String ALGO_ID_C14N_WITH_COMMENTS = - ALGO_ID_C14N_OMIT_COMMENTS + "#WithComments"; - /** - * The URL defined in XML-SEC Rec for exclusive c14n without comments. - */ - public static final String ALGO_ID_C14N_EXCL_OMIT_COMMENTS = - "http://www.w3.org/2001/10/xml-exc-c14n#"; - /** - * The URL defined in XML-SEC Rec for exclusive c14n with comments. - */ - public static final String ALGO_ID_C14N_EXCL_WITH_COMMENTS = - ALGO_ID_C14N_EXCL_OMIT_COMMENTS + "WithComments"; - /** - * The URI for inclusive c14n 1.1 without comments. - */ - public static final String ALGO_ID_C14N11_OMIT_COMMENTS = - "http://www.w3.org/2006/12/xml-c14n11"; - /** - * The URI for inclusive c14n 1.1 with comments. - */ - public static final String ALGO_ID_C14N11_WITH_COMMENTS = - ALGO_ID_C14N11_OMIT_COMMENTS + "#WithComments"; - /** - * Non-standard algorithm to serialize the physical representation for XML Encryption - */ - public static final String ALGO_ID_C14N_PHYSICAL = - "http://santuario.apache.org/c14n/physical"; - - private static Map> canonicalizerHash = null; - - private final CanonicalizerSpi canonicalizerSpi; - - /** - * Constructor Canonicalizer - * - * @param algorithmURI - * @throws InvalidCanonicalizerException - */ - private Canonicalizer(String algorithmURI) throws CanonicalizationException { - try { - Class implementingClass = - canonicalizerHash.get(algorithmURI); - - canonicalizerSpi = implementingClass.newInstance(); - canonicalizerSpi.reset = true; - } catch (Exception e) { - Object exArgs[] = { algorithmURI }; - throw new CanonicalizationException( - "signature.Canonicalizer.UnknownCanonicalizer", exArgs, e - ); - } - } +public class Canonicalizer +{ - /** - * Method getInstance - * - * @param algorithmURI - * @return a Canonicalizer instance ready for the job - * @throws InvalidCanonicalizerException - */ - public static final Canonicalizer getInstance(String algorithmURI) - throws CanonicalizationException { - if (canonicalizerHash == null) { - canonicalizerHash = new ConcurrentHashMap>(); - Canonicalizer.registerDefaultAlgorithms(); - } - return new Canonicalizer(algorithmURI); - } + /** The output encoding of canonicalized data */ + public static final String ENCODING = "UTF8"; - /** - * Method register - * - * @param algorithmURI - * @param implementingClass - * @throws CanonicalizationException - */ - @SuppressWarnings("unchecked") - public static void register(String algorithmURI, String implementingClass) - throws CanonicalizationException, ClassNotFoundException { - // check whether URI is already registered - Class registeredClass = - canonicalizerHash.get(algorithmURI); - - if (registeredClass != null) { - Object exArgs[] = { algorithmURI, registeredClass }; - throw new CanonicalizationException("algorithm.alreadyRegistered", exArgs); - } - - canonicalizerHash.put( - algorithmURI, (Class)Class.forName(implementingClass) - ); - } + /** + * XPath Expression for selecting every node and continuous comments joined + * in only one node + */ + public static final String XPATH_C14N_WITH_COMMENTS_SINGLE_NODE = + "(.//. | .//@* | .//namespace::*)"; - /** - * Method register - * - * @param algorithmURI - * @param implementingClass - * @throws CanonicalizationException - */ - public static void register(String algorithmURI, Class implementingClass) - throws CanonicalizationException { - // check whether URI is already registered - Class registeredClass = canonicalizerHash.get(algorithmURI); - - if (registeredClass != null) { - Object exArgs[] = { algorithmURI, registeredClass }; - throw new CanonicalizationException("algorithm.alreadyRegistered", exArgs); - } - - canonicalizerHash.put(algorithmURI, implementingClass); - } + /** + * The URL defined in XML-SEC Rec for inclusive c14n without comments. + */ + public static final String ALGO_ID_C14N_OMIT_COMMENTS = + "http://www.w3.org/TR/2001/REC-xml-c14n-20010315"; + /** + * The URL defined in XML-SEC Rec for inclusive c14n with comments. + */ + public static final String ALGO_ID_C14N_WITH_COMMENTS = + ALGO_ID_C14N_OMIT_COMMENTS + "#WithComments"; + /** + * The URL defined in XML-SEC Rec for exclusive c14n without comments. + */ + public static final String ALGO_ID_C14N_EXCL_OMIT_COMMENTS = + "http://www.w3.org/2001/10/xml-exc-c14n#"; + /** + * The URL defined in XML-SEC Rec for exclusive c14n with comments. + */ + public static final String ALGO_ID_C14N_EXCL_WITH_COMMENTS = + ALGO_ID_C14N_EXCL_OMIT_COMMENTS + "WithComments"; + /** + * The URI for inclusive c14n 1.1 without comments. + */ + public static final String ALGO_ID_C14N11_OMIT_COMMENTS = + "http://www.w3.org/2006/12/xml-c14n11"; + /** + * The URI for inclusive c14n 1.1 with comments. + */ + public static final String ALGO_ID_C14N11_WITH_COMMENTS = + ALGO_ID_C14N11_OMIT_COMMENTS + "#WithComments"; + /** + * Non-standard algorithm to serialize the physical representation for XML Encryption + */ + public static final String ALGO_ID_C14N_PHYSICAL = + "http://santuario.apache.org/c14n/physical"; - /** - * This method registers the default algorithms. - */ - private static void registerDefaultAlgorithms() { - canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS, - Canonicalizer20010315OmitComments.class); - canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS, - Canonicalizer20010315WithComments.class); - canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS, - Canonicalizer20010315ExclOmitComments.class); - canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS, - Canonicalizer20010315ExclWithComments.class); - canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS, - Canonicalizer11_OmitComments.class); - canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS, - Canonicalizer11_WithComments.class); - canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_PHYSICAL, - CanonicalizerPhysical.class); - } + private static Map> canonicalizerHash = null; - /** - * Method getURI - * - * @return the URI defined for this c14n instance. - */ - public final String getURI() { - return canonicalizerSpi.engineGetURI(); - } + private final CanonicalizerSpi canonicalizerSpi; - /** - * Method getIncludeComments - * - * @return true if the c14n respect the comments. - */ - public boolean getIncludeComments() { - return canonicalizerSpi.engineGetIncludeComments(); - } + /** + * Constructor Canonicalizer + * + * @param algorithmURI + * @throws InvalidCanonicalizerException + */ + @SuppressWarnings("deprecation") + private + Canonicalizer(String algorithmURI) throws CanonicalizationException + { + try { + Class implementingClass = + canonicalizerHash.get(algorithmURI); - /** - * Canonicalizes the subtree rooted by node. - * - * @param node The node to canonicalize - * @return the result of the c14n. - * - * @throws CanonicalizationException - */ - public byte[] canonicalizeSubtree(Node node, CanonicalFilter filter) throws CanonicalizationException { - return canonicalizerSpi.engineCanonicalizeSubTree(node, filter); + canonicalizerSpi = implementingClass.newInstance(); + canonicalizerSpi.reset = true; + } catch (Exception e) { + Object exArgs[] = { algorithmURI }; + throw new CanonicalizationException( + "signature.Canonicalizer.UnknownCanonicalizer", exArgs, e + ); } + } - /** - * Canonicalizes the subtree rooted by node. - * - * @param node - * @param inclusiveNamespaces - * @return the result of the c14n. - * @throws CanonicalizationException - */ - public byte[] canonicalizeSubtree(Node node, String inclusiveNamespaces, CanonicalFilter filter) - throws CanonicalizationException { - return canonicalizerSpi.engineCanonicalizeSubTree(node, inclusiveNamespaces, filter); + /** + * Method getInstance + * + * @param algorithmURI + * @return a Canonicalizer instance ready for the job + * @throws InvalidCanonicalizerException + */ + public static final Canonicalizer + getInstance(String algorithmURI) + throws CanonicalizationException + { + if (canonicalizerHash == null) { + canonicalizerHash = new ConcurrentHashMap>(); + Canonicalizer.registerDefaultAlgorithms(); } + return new Canonicalizer(algorithmURI); + } - /** - * Sets the writer where the canonicalization ends. ByteArrayOutputStream - * if none is set. - * @param os - */ - public void setWriter(OutputStream os) { - canonicalizerSpi.setWriter(os); - } + /** + * Method register + * + * @param algorithmURI + * @param implementingClass + * @throws CanonicalizationException + */ + @SuppressWarnings("unchecked") + public static void + register(String algorithmURI, String implementingClass) + throws CanonicalizationException, ClassNotFoundException + { + // check whether URI is already registered + Class registeredClass = + canonicalizerHash.get(algorithmURI); - /** - * Returns the name of the implementing {@link CanonicalizerSpi} class - * - * @return the name of the implementing {@link CanonicalizerSpi} class - */ - public String getImplementingCanonicalizerClass() { - return canonicalizerSpi.getClass().getName(); + if (registeredClass != null) { + Object exArgs[] = { algorithmURI, registeredClass }; + throw new CanonicalizationException("algorithm.alreadyRegistered", exArgs); } - /** - * Set the canonicalizer behaviour to not reset. - */ - public void notReset() { - canonicalizerSpi.reset = false; + canonicalizerHash.put( + algorithmURI, (Class)Class.forName(implementingClass) + ); + } + + /** + * Method register + * + * @param algorithmURI + * @param implementingClass + * @throws CanonicalizationException + */ + public static void + register(String algorithmURI, Class implementingClass) + throws CanonicalizationException + { + // check whether URI is already registered + Class registeredClass = canonicalizerHash.get(algorithmURI); + + if (registeredClass != null) { + Object exArgs[] = { algorithmURI, registeredClass }; + throw new CanonicalizationException("algorithm.alreadyRegistered", exArgs); } + canonicalizerHash.put(algorithmURI, implementingClass); + } + + /** + * This method registers the default algorithms. + */ + private static void + registerDefaultAlgorithms() + { + canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS, + Canonicalizer20010315OmitComments.class); + canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS, + Canonicalizer20010315WithComments.class); + canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS, + Canonicalizer20010315ExclOmitComments.class); + canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS, + Canonicalizer20010315ExclWithComments.class); + canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS, + Canonicalizer11_OmitComments.class); + canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS, + Canonicalizer11_WithComments.class); + canonicalizerHash.put(Canonicalizer.ALGO_ID_C14N_PHYSICAL, + CanonicalizerPhysical.class); + } + + /** + * Method getURI + * + * @return the URI defined for this c14n instance. + */ + public final String + getURI() + { + return canonicalizerSpi.engineGetURI(); + } + + /** + * Method getIncludeComments + * + * @return true if the c14n respect the comments. + */ + public boolean + getIncludeComments() + { + return canonicalizerSpi.engineGetIncludeComments(); + } + + /** + * Canonicalizes the subtree rooted by node. + * + * @param node The node to canonicalize + * @return the result of the c14n. + * + * @throws CanonicalizationException + */ + public byte[] + canonicalizeSubtree(Node node, CanonicalFilter filter) throws CanonicalizationException + { + return canonicalizerSpi.engineCanonicalizeSubTree(node, filter); + } + + /** + * Canonicalizes the subtree rooted by node. + * + * @param node + * @param inclusiveNamespaces + * @return the result of the c14n. + * @throws CanonicalizationException + */ + public byte[] + canonicalizeSubtree(Node node, String inclusiveNamespaces, CanonicalFilter filter) + throws CanonicalizationException + { + return canonicalizerSpi.engineCanonicalizeSubTree(node, inclusiveNamespaces, filter); + } + + /** + * Sets the writer where the canonicalization ends. ByteArrayOutputStream + * if none is set. + * @param os + */ + public void + setWriter(OutputStream os) + { + canonicalizerSpi.setWriter(os); + } + + /** + * Returns the name of the implementing {@link CanonicalizerSpi} class + * + * @return the name of the implementing {@link CanonicalizerSpi} class + */ + public String + getImplementingCanonicalizerClass() + { + return canonicalizerSpi.getClass().getName(); + } + + /** + * Set the canonicalizer behaviour to not reset. + */ + public void + notReset() + { + canonicalizerSpi.reset = false; + } + } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer11.java b/ext/java/nokogiri/internals/c14n/Canonicalizer11.java index 47ed34ed2d..324770c785 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer11.java @@ -43,597 +43,622 @@ * @author Sean Mullan * @author Raul Benito */ -public abstract class Canonicalizer11 extends CanonicalizerBase { - - private static final String XMLNS_URI = Constants.NamespaceSpecNS; - private static final String XML_LANG_URI = Constants.XML_LANG_SPACE_SpecNS; - private final SortedSet result = new TreeSet(COMPARE); - - private boolean firstCall = true; +public abstract class Canonicalizer11 extends CanonicalizerBase +{ + + private static final String XMLNS_URI = Constants.NamespaceSpecNS; + private static final String XML_LANG_URI = Constants.XML_LANG_SPACE_SpecNS; + private final SortedSet result = new TreeSet(COMPARE); + + private boolean firstCall = true; + + private static class XmlAttrStack + { + static class XmlsStackElement + { + int level; + boolean rendered = false; + List nodes = new ArrayList(); + } - private static class XmlAttrStack { - static class XmlsStackElement { - int level; - boolean rendered = false; - List nodes = new ArrayList(); + int currentLevel = 0; + int lastlevel = 0; + XmlsStackElement cur; + List levels = new ArrayList(); + + void + push(int level) + { + currentLevel = level; + if (currentLevel == -1) { + return; + } + cur = null; + while (lastlevel >= currentLevel) { + levels.remove(levels.size() - 1); + int newSize = levels.size(); + if (newSize == 0) { + lastlevel = 0; + return; } + lastlevel = (levels.get(newSize - 1)).level; + } + } - int currentLevel = 0; - int lastlevel = 0; - XmlsStackElement cur; - List levels = new ArrayList(); + void + addXmlnsAttr(Attr n) + { + if (cur == null) { + cur = new XmlsStackElement(); + cur.level = currentLevel; + levels.add(cur); + lastlevel = currentLevel; + } + cur.nodes.add(n); + } - void push(int level) { - currentLevel = level; - if (currentLevel == -1) { - return; - } - cur = null; - while (lastlevel >= currentLevel) { - levels.remove(levels.size() - 1); - int newSize = levels.size(); - if (newSize == 0) { - lastlevel = 0; - return; - } - lastlevel = (levels.get(newSize - 1)).level; - } + void + getXmlnsAttr(Collection col) throws CanonicalizationException + { + int size = levels.size() - 1; + if (cur == null) { + cur = new XmlsStackElement(); + cur.level = currentLevel; + lastlevel = currentLevel; + levels.add(cur); + } + boolean parentRendered = false; + if (size == -1) { + parentRendered = true; + } else { + XmlsStackElement e = levels.get(size); + if (e.rendered && e.level + 1 == currentLevel) { + parentRendered = true; } - - void addXmlnsAttr(Attr n) { - if (cur == null) { - cur = new XmlsStackElement(); - cur.level = currentLevel; - levels.add(cur); - lastlevel = currentLevel; - } - cur.nodes.add(n); + } + if (parentRendered) { + col.addAll(cur.nodes); + cur.rendered = true; + return; + } + + Map loa = new HashMap(); + List baseAttrs = new ArrayList(); + boolean successiveOmitted = true; + for (; size >= 0; size--) { + XmlsStackElement e = levels.get(size); + if (e.rendered) { + successiveOmitted = false; } - - void getXmlnsAttr(Collection col) throws CanonicalizationException { - int size = levels.size() - 1; - if (cur == null) { - cur = new XmlsStackElement(); - cur.level = currentLevel; - lastlevel = currentLevel; - levels.add(cur); - } - boolean parentRendered = false; - if (size == -1) { - parentRendered = true; - } else { - XmlsStackElement e = levels.get(size); - if (e.rendered && e.level + 1 == currentLevel) { - parentRendered = true; - } - } - if (parentRendered) { - col.addAll(cur.nodes); - cur.rendered = true; - return; - } - - Map loa = new HashMap(); - List baseAttrs = new ArrayList(); - boolean successiveOmitted = true; - for (; size >= 0; size--) { - XmlsStackElement e = levels.get(size); - if (e.rendered) { - successiveOmitted = false; - } - Iterator it = e.nodes.iterator(); - while (it.hasNext() && successiveOmitted) { - Attr n = it.next(); - if (n.getLocalName().equals("base") && !e.rendered) { - baseAttrs.add(n); - } else if (!loa.containsKey(n.getName())) { - loa.put(n.getName(), n); - } - } - } - if (!baseAttrs.isEmpty()) { - Iterator it = col.iterator(); - String base = null; - Attr baseAttr = null; - while (it.hasNext()) { - Attr n = it.next(); - if (n.getLocalName().equals("base")) { - base = n.getValue(); - baseAttr = n; - break; - } - } - it = baseAttrs.iterator(); - while (it.hasNext()) { - Attr n = it.next(); - if (base == null) { - base = n.getValue(); - baseAttr = n; - } else { - try { - base = joinURI(n.getValue(), base); - } catch (URISyntaxException e1) { - throw new CanonicalizationException(e1); - } - } - } - if (base != null && base.length() != 0) { - baseAttr.setValue(base); - col.add(baseAttr); - } - } - - cur.rendered = true; - col.addAll(loa.values()); + Iterator it = e.nodes.iterator(); + while (it.hasNext() && successiveOmitted) { + Attr n = it.next(); + if (n.getLocalName().equals("base") && !e.rendered) { + baseAttrs.add(n); + } else if (!loa.containsKey(n.getName())) { + loa.put(n.getName(), n); + } } - } - - private final XmlAttrStack xmlattrStack = new XmlAttrStack(); - - /** - * Constructor Canonicalizer11 - * - * @param includeComments - */ - public Canonicalizer11(boolean includeComments) { - super(includeComments); - } - - /** - * Always throws a CanonicalizationException because this is inclusive c14n. - * - * @param rootNode - * @param inclusiveNamespaces - * @return none it always fails - * @throws CanonicalizationException - */ - public byte[] engineCanonicalizeSubTree( - Node rootNode, String inclusiveNamespaces, CanonicalFilter filter - ) throws CanonicalizationException { - throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); - } - - /** - * Returns the Attr[]s to be output for the given element. - *
    - * The code of this method is a copy of {@link #handleAttributes(Element, - * NameSpaceSymbTable)}, - * whereas it takes into account that subtree-c14n is -- well -- - * subtree-based. - * So if the element in question isRoot of c14n, it's parent is not in the - * node set, as well as all other ancestors. - * - * @param element - * @param ns - * @return the Attr[]s to be output - * @throws CanonicalizationException - * @throws URISyntaxException - */ - @Override - protected Iterator handleAttributesSubtree(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException { - if (!element.hasAttributes() && !firstCall) { - return null; + } + if (!baseAttrs.isEmpty()) { + Iterator it = col.iterator(); + String base = null; + Attr baseAttr = null; + while (it.hasNext()) { + Attr n = it.next(); + if (n.getLocalName().equals("base")) { + base = n.getValue(); + baseAttr = n; + break; + } } - // result will contain the attrs which have to be output - final SortedSet result = this.result; - result.clear(); - - if (element.hasAttributes()) { - NamedNodeMap attrs = element.getAttributes(); - int attrsLength = attrs.getLength(); - - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - String NUri = attribute.getNamespaceURI(); - String NName = attribute.getLocalName(); - String NValue = attribute.getValue(); - - if (!XMLNS_URI.equals(NUri)) { - // It's not a namespace attr node. Add to the result and continue. - result.add(attribute); - } else if (!(XML.equals(NName) && XML_LANG_URI.equals(NValue))) { - // The default mapping for xml must not be output. - Node n = ns.addMappingAndRender(NName, NValue, attribute); - - if (n != null) { - // Render the ns definition - result.add((Attr)n); - if (C14nHelper.namespaceIsRelative(attribute)) { - Object exArgs[] = {element.getTagName(), NName, attribute.getNodeValue()}; - throw new CanonicalizationException( - "c14n.Canonicalizer.RelativeNamespace", exArgs - ); - } - } - } + it = baseAttrs.iterator(); + while (it.hasNext()) { + Attr n = it.next(); + if (base == null) { + base = n.getValue(); + baseAttr = n; + } else { + try { + base = joinURI(n.getValue(), base); + } catch (URISyntaxException e1) { + throw new CanonicalizationException(e1); } + } } - - if (firstCall) { - // It is the first node of the subtree - // Obtain all the namespaces defined in the parents, and added to the output. - ns.getUnrenderedNodes(result); - // output the attributes in the xml namespace. - xmlattrStack.getXmlnsAttr(result); - firstCall = false; + if (base != null && base.length() != 0) { + baseAttr.setValue(base); + col.add(baseAttr); } + } - return result.iterator(); + cur.rendered = true; + col.addAll(loa.values()); } - - /** - * Returns the Attr[]s to be output for the given element. - *
    - * IMPORTANT: This method expects to work on a modified DOM tree, i.e. a - * DOM which has been prepared using - * {@link nokogiri.internals.c14n.security.utils.XMLUtils#circumventBug2650( - * org.w3c.dom.Document)}. - * - * @param element - * @param ns - * @return the Attr[]s to be output - * @throws CanonicalizationException - */ - @Override - protected Iterator handleAttributes(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException { - // result will contain the attrs which have to be output - xmlattrStack.push(ns.getLevel()); - boolean isRealVisible = isVisibleDO(element, ns.getLevel()) == 1; - final SortedSet result = this.result; - result.clear(); - - if (element.hasAttributes()) { - NamedNodeMap attrs = element.getAttributes(); - int attrsLength = attrs.getLength(); - - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - String NUri = attribute.getNamespaceURI(); - String NName = attribute.getLocalName(); - String NValue = attribute.getValue(); - - if (!XMLNS_URI.equals(NUri)) { - //A non namespace definition node. - if (XML_LANG_URI.equals(NUri)) { - if (NName.equals("id")) { - if (isRealVisible) { - // treat xml:id like any other attribute - // (emit it, but don't inherit it) - result.add(attribute); - } - } else { - xmlattrStack.addXmlnsAttr(attribute); - } - } else if (isRealVisible) { - //The node is visible add the attribute to the list of output attributes. - result.add(attribute); - } - } else if (!XML.equals(NName) || !XML_LANG_URI.equals(NValue)) { - /* except omit namespace node with local name xml, which defines - * the xml prefix, if its string value is - * http://www.w3.org/XML/1998/namespace. - */ - // add the prefix binding to the ns symb table. - if (isVisible(attribute)) { - if (isRealVisible || !ns.removeMappingIfRender(NName)) { - // The xpath select this node output it if needed. - Node n = ns.addMappingAndRender(NName, NValue, attribute); - if (n != null) { - result.add((Attr)n); - if (C14nHelper.namespaceIsRelative(attribute)) { - Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; - throw new CanonicalizationException( - "c14n.Canonicalizer.RelativeNamespace", exArgs - ); - } - } - } - } else { - if (isRealVisible && !XMLNS.equals(NName)) { - ns.removeMapping(NName); - } else { - ns.addMapping(NName, NValue, attribute); - } - } - } - } - } - - if (isRealVisible) { - //The element is visible, handle the xmlns definition - Attr xmlns = element.getAttributeNodeNS(XMLNS_URI, XMLNS); - Node n = null; - if (xmlns == null) { - //No xmlns def just get the already defined. - n = ns.getMapping(XMLNS); - } else if (!isVisible(xmlns)) { - //There is a definition but the xmlns is not selected by the xpath. - //then xmlns="" - n = ns.addMappingAndRender(XMLNS, "", nullNode); - } - //output the xmlns def if needed. - if (n != null) { - result.add((Attr)n); + } + + private final XmlAttrStack xmlattrStack = new XmlAttrStack(); + + /** + * Constructor Canonicalizer11 + * + * @param includeComments + */ + public + Canonicalizer11(boolean includeComments) + { + super(includeComments); + } + + /** + * Always throws a CanonicalizationException because this is inclusive c14n. + * + * @param rootNode + * @param inclusiveNamespaces + * @return none it always fails + * @throws CanonicalizationException + */ + public byte[] + engineCanonicalizeSubTree( + Node rootNode, String inclusiveNamespaces, CanonicalFilter filter + ) throws CanonicalizationException + { + throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); + } + + /** + * Returns the Attr[]s to be output for the given element. + *
    + * The code of this method is a copy of {@link #handleAttributes(Element, + * NameSpaceSymbTable)}, + * whereas it takes into account that subtree-c14n is -- well -- + * subtree-based. + * So if the element in question isRoot of c14n, it's parent is not in the + * node set, as well as all other ancestors. + * + * @param element + * @param ns + * @return the Attr[]s to be output + * @throws CanonicalizationException + * @throws URISyntaxException + */ + @Override + protected Iterator + handleAttributesSubtree(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException + { + if (!element.hasAttributes() && !firstCall) { + return null; + } + // result will contain the attrs which have to be output + final SortedSet result = this.result; + result.clear(); + + if (element.hasAttributes()) { + NamedNodeMap attrs = element.getAttributes(); + int attrsLength = attrs.getLength(); + + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + String NUri = attribute.getNamespaceURI(); + String NName = attribute.getLocalName(); + String NValue = attribute.getValue(); + + if (!XMLNS_URI.equals(NUri)) { + // It's not a namespace attr node. Add to the result and continue. + result.add(attribute); + } else if (!(XML.equals(NName) && XML_LANG_URI.equals(NValue))) { + // The default mapping for xml must not be output. + Node n = ns.addMappingAndRender(NName, NValue, attribute); + + if (n != null) { + // Render the ns definition + result.add((Attr)n); + if (C14nHelper.namespaceIsRelative(attribute)) { + Object exArgs[] = {element.getTagName(), NName, attribute.getNodeValue()}; + throw new CanonicalizationException( + "c14n.Canonicalizer.RelativeNamespace", exArgs + ); } - //Float all xml:* attributes of the unselected parent elements to this one. - xmlattrStack.getXmlnsAttr(result); - ns.getUnrenderedNodes(result); + } } + } + } - return result.iterator(); + if (firstCall) { + // It is the first node of the subtree + // Obtain all the namespaces defined in the parents, and added to the output. + ns.getUnrenderedNodes(result); + // output the attributes in the xml namespace. + xmlattrStack.getXmlnsAttr(result); + firstCall = false; } - @Override - protected void handleParent(Element e, NameSpaceSymbTable ns) { - if (!e.hasAttributes() && e.getNamespaceURI() == null) { - return; - } - xmlattrStack.push(-1); - NamedNodeMap attrs = e.getAttributes(); - int attrsLength = attrs.getLength(); - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - String NName = attribute.getLocalName(); - String NValue = attribute.getNodeValue(); - - if (Constants.NamespaceSpecNS.equals(attribute.getNamespaceURI())) { - if (!XML.equals(NName) || !Constants.XML_LANG_SPACE_SpecNS.equals(NValue)) { - ns.addMapping(NName, NValue, attribute); + return result.iterator(); + } + + /** + * Returns the Attr[]s to be output for the given element. + *
    + * IMPORTANT: This method expects to work on a modified DOM tree, i.e. a + * DOM which has been prepared using + * {@link nokogiri.internals.c14n.security.utils.XMLUtils#circumventBug2650( + * org.w3c.dom.Document)}. + * + * @param element + * @param ns + * @return the Attr[]s to be output + * @throws CanonicalizationException + */ + @Override + protected Iterator + handleAttributes(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException + { + // result will contain the attrs which have to be output + xmlattrStack.push(ns.getLevel()); + boolean isRealVisible = isVisibleDO(element, ns.getLevel()) == 1; + final SortedSet result = this.result; + result.clear(); + + if (element.hasAttributes()) { + NamedNodeMap attrs = element.getAttributes(); + int attrsLength = attrs.getLength(); + + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + String NUri = attribute.getNamespaceURI(); + String NName = attribute.getLocalName(); + String NValue = attribute.getValue(); + + if (!XMLNS_URI.equals(NUri)) { + //A non namespace definition node. + if (XML_LANG_URI.equals(NUri)) { + if (NName.equals("id")) { + if (isRealVisible) { + // treat xml:id like any other attribute + // (emit it, but don't inherit it) + result.add(attribute); + } + } else { + xmlattrStack.addXmlnsAttr(attribute); + } + } else if (isRealVisible) { + //The node is visible add the attribute to the list of output attributes. + result.add(attribute); + } + } else if (!XML.equals(NName) || !XML_LANG_URI.equals(NValue)) { + /* except omit namespace node with local name xml, which defines + * the xml prefix, if its string value is + * http://www.w3.org/XML/1998/namespace. + */ + // add the prefix binding to the ns symb table. + if (isVisible(attribute)) { + if (isRealVisible || !ns.removeMappingIfRender(NName)) { + // The xpath select this node output it if needed. + Node n = ns.addMappingAndRender(NName, NValue, attribute); + if (n != null) { + result.add((Attr)n); + if (C14nHelper.namespaceIsRelative(attribute)) { + Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; + throw new CanonicalizationException( + "c14n.Canonicalizer.RelativeNamespace", exArgs + ); } - } else if (!"id".equals(NName) && XML_LANG_URI.equals(attribute.getNamespaceURI())) { - xmlattrStack.addXmlnsAttr(attribute); + } } - } - if (e.getNamespaceURI() != null) { - String NName = e.getPrefix(); - String NValue = e.getNamespaceURI(); - String Name; - if (NName == null || NName.equals("")) { - NName = "xmlns"; - Name = "xmlns"; + } else { + if (isRealVisible && !XMLNS.equals(NName)) { + ns.removeMapping(NName); } else { - Name = "xmlns:" + NName; + ns.addMapping(NName, NValue, attribute); } - Attr n = e.getOwnerDocument().createAttributeNS("http://www.w3.org/2000/xmlns/", Name); - n.setValue(NValue); - ns.addMapping(NName, NValue, n); + } } + } } - private static String joinURI(String baseURI, String relativeURI) throws URISyntaxException { - String bscheme = null; - String bauthority = null; - String bpath = ""; - String bquery = null; + if (isRealVisible) { + //The element is visible, handle the xmlns definition + Attr xmlns = element.getAttributeNodeNS(XMLNS_URI, XMLNS); + Node n = null; + if (xmlns == null) { + //No xmlns def just get the already defined. + n = ns.getMapping(XMLNS); + } else if (!isVisible(xmlns)) { + //There is a definition but the xmlns is not selected by the xpath. + //then xmlns="" + n = ns.addMappingAndRender(XMLNS, "", nullNode); + } + //output the xmlns def if needed. + if (n != null) { + result.add((Attr)n); + } + //Float all xml:* attributes of the unselected parent elements to this one. + xmlattrStack.getXmlnsAttr(result); + ns.getUnrenderedNodes(result); + } - // pre-parse the baseURI - if (baseURI != null) { - if (baseURI.endsWith("..")) { - baseURI = baseURI + "/"; - } - URI base = new URI(baseURI); - bscheme = base.getScheme(); - bauthority = base.getAuthority(); - bpath = base.getPath(); - bquery = base.getQuery(); + return result.iterator(); + } + + @Override + protected void + handleParent(Element e, NameSpaceSymbTable ns) + { + if (!e.hasAttributes() && e.getNamespaceURI() == null) { + return; + } + xmlattrStack.push(-1); + NamedNodeMap attrs = e.getAttributes(); + int attrsLength = attrs.getLength(); + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + String NName = attribute.getLocalName(); + String NValue = attribute.getNodeValue(); + + if (Constants.NamespaceSpecNS.equals(attribute.getNamespaceURI())) { + if (!XML.equals(NName) || !Constants.XML_LANG_SPACE_SpecNS.equals(NValue)) { + ns.addMapping(NName, NValue, attribute); } + } else if (!"id".equals(NName) && XML_LANG_URI.equals(attribute.getNamespaceURI())) { + xmlattrStack.addXmlnsAttr(attribute); + } + } + if (e.getNamespaceURI() != null) { + String NName = e.getPrefix(); + String NValue = e.getNamespaceURI(); + String Name; + if (NName == null || NName.equals("")) { + NName = "xmlns"; + Name = "xmlns"; + } else { + Name = "xmlns:" + NName; + } + Attr n = e.getOwnerDocument().createAttributeNS("http://www.w3.org/2000/xmlns/", Name); + n.setValue(NValue); + ns.addMapping(NName, NValue, n); + } + } + + private static String + joinURI(String baseURI, String relativeURI) throws URISyntaxException + { + String bscheme = null; + String bauthority = null; + String bpath = ""; + String bquery = null; + + // pre-parse the baseURI + if (baseURI != null) { + if (baseURI.endsWith("..")) { + baseURI = baseURI + "/"; + } + URI base = new URI(baseURI); + bscheme = base.getScheme(); + bauthority = base.getAuthority(); + bpath = base.getPath(); + bquery = base.getQuery(); + } - URI r = new URI(relativeURI); - String rscheme = r.getScheme(); - String rauthority = r.getAuthority(); - String rpath = r.getPath(); - String rquery = r.getQuery(); + URI r = new URI(relativeURI); + String rscheme = r.getScheme(); + String rauthority = r.getAuthority(); + String rpath = r.getPath(); + String rquery = r.getQuery(); - String tscheme, tauthority, tpath, tquery; - if (rscheme != null && rscheme.equals(bscheme)) { - rscheme = null; - } - if (rscheme != null) { - tscheme = rscheme; - tauthority = rauthority; - tpath = removeDotSegments(rpath); + String tscheme, tauthority, tpath, tquery; + if (rscheme != null && rscheme.equals(bscheme)) { + rscheme = null; + } + if (rscheme != null) { + tscheme = rscheme; + tauthority = rauthority; + tpath = removeDotSegments(rpath); + tquery = rquery; + } else { + if (rauthority != null) { + tauthority = rauthority; + tpath = removeDotSegments(rpath); + tquery = rquery; + } else { + if (rpath.length() == 0) { + tpath = bpath; + if (rquery != null) { tquery = rquery; + } else { + tquery = bquery; + } } else { - if (rauthority != null) { - tauthority = rauthority; - tpath = removeDotSegments(rpath); - tquery = rquery; + if (rpath.startsWith("/")) { + tpath = removeDotSegments(rpath); + } else { + if (bauthority != null && bpath.length() == 0) { + tpath = "/" + rpath; } else { - if (rpath.length() == 0) { - tpath = bpath; - if (rquery != null) { - tquery = rquery; - } else { - tquery = bquery; - } - } else { - if (rpath.startsWith("/")) { - tpath = removeDotSegments(rpath); - } else { - if (bauthority != null && bpath.length() == 0) { - tpath = "/" + rpath; - } else { - int last = bpath.lastIndexOf('/'); - if (last == -1) { - tpath = rpath; - } else { - tpath = bpath.substring(0, last+1) + rpath; - } - } - tpath = removeDotSegments(tpath); - } - tquery = rquery; - } - tauthority = bauthority; + int last = bpath.lastIndexOf('/'); + if (last == -1) { + tpath = rpath; + } else { + tpath = bpath.substring(0, last + 1) + rpath; + } } - tscheme = bscheme; + tpath = removeDotSegments(tpath); + } + tquery = rquery; } - return new URI(tscheme, tauthority, tpath, tquery, null).toString(); + tauthority = bauthority; + } + tscheme = bscheme; + } + return new URI(tscheme, tauthority, tpath, tquery, null).toString(); + } + + private static String + removeDotSegments(String path) + { + + // 1. The input buffer is initialized with the now-appended path + // components then replace occurrences of "//" in the input buffer + // with "/" until no more occurrences of "//" are in the input buffer. + String input = path; + while (input.indexOf("//") > -1) { + input = input.replaceAll("//", "/"); } - private static String removeDotSegments(String path) { - - // 1. The input buffer is initialized with the now-appended path - // components then replace occurrences of "//" in the input buffer - // with "/" until no more occurrences of "//" are in the input buffer. - String input = path; - while (input.indexOf("//") > -1) { - input = input.replaceAll("//", "/"); - } + // Initialize the output buffer with the empty string. + StringBuilder output = new StringBuilder(); - // Initialize the output buffer with the empty string. - StringBuilder output = new StringBuilder(); + // If the input buffer starts with a root slash "/" then move this + // character to the output buffer. + if (input.charAt(0) == '/') { + output.append('/'); + input = input.substring(1); + } - // If the input buffer starts with a root slash "/" then move this - // character to the output buffer. - if (input.charAt(0) == '/') { - output.append('/'); - input = input.substring(1); + printStep("1 ", output, input); + + // While the input buffer is not empty, loop as follows + while (input.length() != 0) { + // 2A. If the input buffer begins with a prefix of "./", + // then remove that prefix from the input buffer + // else if the input buffer begins with a prefix of "../", then + // if also the output does not contain the root slash "/" only, + // then move this prefix to the end of the output buffer else + // remove that prefix + if (input.startsWith("./")) { + input = input.substring(2); + printStep("2A", output, input); + } else if (input.startsWith("../")) { + input = input.substring(3); + if (!output.toString().equals("/")) { + output.append("../"); } - - printStep("1 ", output, input); - - // While the input buffer is not empty, loop as follows - while (input.length() != 0) { - // 2A. If the input buffer begins with a prefix of "./", - // then remove that prefix from the input buffer - // else if the input buffer begins with a prefix of "../", then - // if also the output does not contain the root slash "/" only, - // then move this prefix to the end of the output buffer else - // remove that prefix - if (input.startsWith("./")) { - input = input.substring(2); - printStep("2A", output, input); - } else if (input.startsWith("../")) { - input = input.substring(3); - if (!output.toString().equals("/")) { - output.append("../"); - } - printStep("2A", output, input); - // 2B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that prefix - // with "/" in the input buffer; otherwise, - } else if (input.startsWith("/./")) { - input = input.substring(2); - printStep("2B", output, input); - } else if (input.equals("/.")) { - // FIXME: what is complete path segment? - input = input.replaceFirst("/.", "/"); - printStep("2B", output, input); - // 2C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that prefix - // with "/" in the input buffer and if also the output buffer is - // empty, last segment in the output buffer equals "../" or "..", - // where ".." is a complete path segment, then append ".." or "/.." - // for the latter case respectively to the output buffer else - // remove the last segment and its preceding "/" (if any) from the - // output buffer and if hereby the first character in the output - // buffer was removed and it was not the root slash then delete a - // leading slash from the input buffer; otherwise, - } else if (input.startsWith("/../")) { - input = input.substring(3); - if (output.length() == 0) { - output.append("/"); - } else if (output.toString().endsWith("../")) { - output.append(".."); - } else if (output.toString().endsWith("..")) { - output.append("/.."); - } else { - int index = output.lastIndexOf("/"); - if (index == -1) { - output = new StringBuilder(); - if (input.charAt(0) == '/') { - input = input.substring(1); - } - } else { - output = output.delete(index, output.length()); - } - } - printStep("2C", output, input); - } else if (input.equals("/..")) { - // FIXME: what is complete path segment? - input = input.replaceFirst("/..", "/"); - if (output.length() == 0) { - output.append("/"); - } else if (output.toString().endsWith("../")) { - output.append(".."); - } else if (output.toString().endsWith("..")) { - output.append("/.."); - } else { - int index = output.lastIndexOf("/"); - if (index == -1) { - output = new StringBuilder(); - if (input.charAt(0) == '/') { - input = input.substring(1); - } - } else { - output = output.delete(index, output.length()); - } - } - printStep("2C", output, input); - // 2D. if the input buffer consists only of ".", then remove - // that from the input buffer else if the input buffer consists - // only of ".." and if the output buffer does not contain only - // the root slash "/", then move the ".." to the output buffer - // else delte it.; otherwise, - } else if (input.equals(".")) { - input = ""; - printStep("2D", output, input); - } else if (input.equals("..")) { - if (!output.toString().equals("/")) { - output.append(".."); - } - input = ""; - printStep("2D", output, input); - // 2E. move the first path segment (if any) in the input buffer - // to the end of the output buffer, including the initial "/" - // character (if any) and any subsequent characters up to, but not - // including, the next "/" character or the end of the input buffer. - } else { - int end; - int begin = input.indexOf('/'); - if (begin == 0) { - end = input.indexOf('/', 1); - } else { - end = begin; - begin = 0; - } - String segment; - if (end == -1) { - segment = input.substring(begin); - input = ""; - } else { - segment = input.substring(begin, end); - input = input.substring(end); - } - output.append(segment); - printStep("2E", output, input); + printStep("2A", output, input); + // 2B. if the input buffer begins with a prefix of "/./" or "/.", + // where "." is a complete path segment, then replace that prefix + // with "/" in the input buffer; otherwise, + } else if (input.startsWith("/./")) { + input = input.substring(2); + printStep("2B", output, input); + } else if (input.equals("/.")) { + // FIXME: what is complete path segment? + input = input.replaceFirst("/.", "/"); + printStep("2B", output, input); + // 2C. if the input buffer begins with a prefix of "/../" or "/..", + // where ".." is a complete path segment, then replace that prefix + // with "/" in the input buffer and if also the output buffer is + // empty, last segment in the output buffer equals "../" or "..", + // where ".." is a complete path segment, then append ".." or "/.." + // for the latter case respectively to the output buffer else + // remove the last segment and its preceding "/" (if any) from the + // output buffer and if hereby the first character in the output + // buffer was removed and it was not the root slash then delete a + // leading slash from the input buffer; otherwise, + } else if (input.startsWith("/../")) { + input = input.substring(3); + if (output.length() == 0) { + output.append("/"); + } else if (output.toString().endsWith("../")) { + output.append(".."); + } else if (output.toString().endsWith("..")) { + output.append("/.."); + } else { + int index = output.lastIndexOf("/"); + if (index == -1) { + output = new StringBuilder(); + if (input.charAt(0) == '/') { + input = input.substring(1); } + } else { + output = output.delete(index, output.length()); + } } - - // 3. Finally, if the only or last segment of the output buffer is - // "..", where ".." is a complete path segment not followed by a slash - // then append a slash "/". The output buffer is returned as the result - // of remove_dot_segments - if (output.toString().endsWith("..")) { - output.append('/'); - printStep("3 ", output, input); + printStep("2C", output, input); + } else if (input.equals("/..")) { + // FIXME: what is complete path segment? + input = input.replaceFirst("/..", "/"); + if (output.length() == 0) { + output.append("/"); + } else if (output.toString().endsWith("../")) { + output.append(".."); + } else if (output.toString().endsWith("..")) { + output.append("/.."); + } else { + int index = output.lastIndexOf("/"); + if (index == -1) { + output = new StringBuilder(); + if (input.charAt(0) == '/') { + input = input.substring(1); + } + } else { + output = output.delete(index, output.length()); + } } - - return output.toString(); + printStep("2C", output, input); + // 2D. if the input buffer consists only of ".", then remove + // that from the input buffer else if the input buffer consists + // only of ".." and if the output buffer does not contain only + // the root slash "/", then move the ".." to the output buffer + // else delte it.; otherwise, + } else if (input.equals(".")) { + input = ""; + printStep("2D", output, input); + } else if (input.equals("..")) { + if (!output.toString().equals("/")) { + output.append(".."); + } + input = ""; + printStep("2D", output, input); + // 2E. move the first path segment (if any) in the input buffer + // to the end of the output buffer, including the initial "/" + // character (if any) and any subsequent characters up to, but not + // including, the next "/" character or the end of the input buffer. + } else { + int end; + int begin = input.indexOf('/'); + if (begin == 0) { + end = input.indexOf('/', 1); + } else { + end = begin; + begin = 0; + } + String segment; + if (end == -1) { + segment = input.substring(begin); + input = ""; + } else { + segment = input.substring(begin, end); + input = input.substring(end); + } + output.append(segment); + printStep("2E", output, input); + } } - private static void printStep(String step, StringBuilder output, String input) { - //if (System.getProperty("nokogiri.c14.debug") == "on") { // - // System.out.println(" " + step + ": " + output); - // if (output.length() == 0) { - // System.out.println("\t\t\t\t" + input); - // } else { - // System.out.println("\t\t\t" + input); - // } - //} + // 3. Finally, if the only or last segment of the output buffer is + // "..", where ".." is a complete path segment not followed by a slash + // then append a slash "/". The output buffer is returned as the result + // of remove_dot_segments + if (output.toString().endsWith("..")) { + output.append('/'); + printStep("3 ", output, input); } + return output.toString(); + } + + private static void + printStep(String step, StringBuilder output, String input) + { + //if (System.getProperty("nokogiri.c14.debug") == "on") { // + // System.out.println(" " + step + ": " + output); + // if (output.length() == 0) { + // System.out.println("\t\t\t\t" + input); + // } else { + // System.out.println("\t\t\t" + input); + // } + //} + } + } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java b/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java index 852e179d72..ce9f754775 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java @@ -22,17 +22,24 @@ /** * @author Sean Mullan */ -public class Canonicalizer11_OmitComments extends Canonicalizer11 { +public class Canonicalizer11_OmitComments extends Canonicalizer11 +{ - public Canonicalizer11_OmitComments() { - super(false); - } + public + Canonicalizer11_OmitComments() + { + super(false); + } - public final String engineGetURI() { - return Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS; - } + public final String + engineGetURI() + { + return Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS; + } - public final boolean engineGetIncludeComments() { - return false; - } + public final boolean + engineGetIncludeComments() + { + return false; + } } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java b/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java index 594247009b..ca80d6f554 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java @@ -22,17 +22,24 @@ /** * @author Sean Mullan */ -public class Canonicalizer11_WithComments extends Canonicalizer11 { +public class Canonicalizer11_WithComments extends Canonicalizer11 +{ - public Canonicalizer11_WithComments() { - super(true); - } + public + Canonicalizer11_WithComments() + { + super(true); + } - public final String engineGetURI() { - return Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS; - } + public final String + engineGetURI() + { + return Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS; + } - public final boolean engineGetIncludeComments() { - return true; - } + public final boolean + engineGetIncludeComments() + { + return true; + } } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java index 66b344a38a..bf095e8747 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java @@ -40,328 +40,349 @@ * * @author Christian Geuer-Pollmann */ -public abstract class Canonicalizer20010315 extends CanonicalizerBase { - private static final String XMLNS_URI = Constants.NamespaceSpecNS; - private static final String XML_LANG_URI = Constants.XML_LANG_SPACE_SpecNS; - - private boolean firstCall = true; - private final SortedSet result = new TreeSet(COMPARE); - - private static class XmlAttrStack { - static class XmlsStackElement { - int level; - boolean rendered = false; - List nodes = new ArrayList(); - } - - int currentLevel = 0; - int lastlevel = 0; - XmlsStackElement cur; - List levels = new ArrayList(); - - void push(int level) { - currentLevel = level; - if (currentLevel == -1) { - return; - } - cur = null; - while (lastlevel >= currentLevel) { - levels.remove(levels.size() - 1); - int newSize = levels.size(); - if (newSize == 0) { - lastlevel = 0; - return; - } - lastlevel = (levels.get(newSize - 1)).level; - } - } - - void addXmlnsAttr(Attr n) { - if (cur == null) { - cur = new XmlsStackElement(); - cur.level = currentLevel; - levels.add(cur); - lastlevel = currentLevel; - } - cur.nodes.add(n); - } - - void getXmlnsAttr(Collection col) { - int size = levels.size() - 1; - if (cur == null) { - cur = new XmlsStackElement(); - cur.level = currentLevel; - lastlevel = currentLevel; - levels.add(cur); - } - boolean parentRendered = false; - if (size == -1) { - parentRendered = true; - } else { - XmlsStackElement e = levels.get(size); - if (e.rendered && e.level + 1 == currentLevel) { - parentRendered = true; - } - } - if (parentRendered) { - col.addAll(cur.nodes); - cur.rendered = true; - return; - } - - Map loa = new HashMap(); - for (; size >= 0; size--) { - XmlsStackElement e = levels.get(size); - Iterator it = e.nodes.iterator(); - while (it.hasNext()) { - Attr n = it.next(); - if (!loa.containsKey(n.getName())) { - loa.put(n.getName(), n); - } - } - } - - cur.rendered = true; - col.addAll(loa.values()); - } - +public abstract class Canonicalizer20010315 extends CanonicalizerBase +{ + private static final String XMLNS_URI = Constants.NamespaceSpecNS; + private static final String XML_LANG_URI = Constants.XML_LANG_SPACE_SpecNS; + + private boolean firstCall = true; + private final SortedSet result = new TreeSet(COMPARE); + + private static class XmlAttrStack + { + static class XmlsStackElement + { + int level; + boolean rendered = false; + List nodes = new ArrayList(); } - private final XmlAttrStack xmlattrStack = new XmlAttrStack(); - - /** - * Constructor Canonicalizer20010315 - * - * @param includeComments - */ - public Canonicalizer20010315(boolean includeComments) { - super(includeComments); - } - - /** - * Always throws a CanonicalizationException because this is inclusive c14n. - * - * @param xpathNodeSet - * @param inclusiveNamespaces - * @return none it always fails - * @throws CanonicalizationException always - */ - public byte[] engineCanonicalizeXPathNodeSet(Set xpathNodeSet, String inclusiveNamespaces, CanonicalFilter filter) - throws CanonicalizationException { - - /** $todo$ well, should we throw UnsupportedOperationException ? */ - throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); + int currentLevel = 0; + int lastlevel = 0; + XmlsStackElement cur; + List levels = new ArrayList(); + + void + push(int level) + { + currentLevel = level; + if (currentLevel == -1) { + return; + } + cur = null; + while (lastlevel >= currentLevel) { + levels.remove(levels.size() - 1); + int newSize = levels.size(); + if (newSize == 0) { + lastlevel = 0; + return; + } + lastlevel = (levels.get(newSize - 1)).level; + } } - /** - * Always throws a CanonicalizationException because this is inclusive c14n. - * - * @param rootNode - * @param inclusiveNamespaces - * @return none it always fails - * @throws CanonicalizationException - */ - @Override - public byte[] engineCanonicalizeSubTree(Node rootNode, String inclusiveNamespaces, CanonicalFilter filter) - throws CanonicalizationException { - - /** $todo$ well, should we throw UnsupportedOperationException ? */ - throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); + void + addXmlnsAttr(Attr n) + { + if (cur == null) { + cur = new XmlsStackElement(); + cur.level = currentLevel; + levels.add(cur); + lastlevel = currentLevel; + } + cur.nodes.add(n); } - /** - * Returns the Attr[]s to be output for the given element. - *
    - * The code of this method is a copy of {@link #handleAttributes(Element, - * NameSpaceSymbTable)}, - * whereas it takes into account that subtree-c14n is -- well -- subtree-based. - * So if the element in question isRoot of c14n, it's parent is not in the - * node set, as well as all other ancestors. - * - * @param element - * @param ns - * @return the Attr[]s to be output - * @throws CanonicalizationException - */ - @Override - protected Iterator handleAttributesSubtree(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException { - if (!element.hasAttributes() && !firstCall) { - return null; - } - // result will contain the attrs which have to be output - final SortedSet result = this.result; - result.clear(); - - if (element.hasAttributes()) { - NamedNodeMap attrs = element.getAttributes(); - int attrsLength = attrs.getLength(); - - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - String NUri = attribute.getNamespaceURI(); - String NName = attribute.getLocalName(); - String NValue = attribute.getValue(); - - if (!XMLNS_URI.equals(NUri)) { - //It's not a namespace attr node. Add to the result and continue. - result.add(attribute); - } else if (!(XML.equals(NName) && XML_LANG_URI.equals(NValue))) { - //The default mapping for xml must not be output. - Node n = ns.addMappingAndRender(NName, NValue, attribute); - - if (n != null) { - //Render the ns definition - result.add((Attr)n); - if (C14nHelper.namespaceIsRelative(attribute)) { - Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; - throw new CanonicalizationException( - "c14n.Canonicalizer.RelativeNamespace", exArgs - ); - } - } - } - } + void + getXmlnsAttr(Collection col) + { + int size = levels.size() - 1; + if (cur == null) { + cur = new XmlsStackElement(); + cur.level = currentLevel; + lastlevel = currentLevel; + levels.add(cur); + } + boolean parentRendered = false; + if (size == -1) { + parentRendered = true; + } else { + XmlsStackElement e = levels.get(size); + if (e.rendered && e.level + 1 == currentLevel) { + parentRendered = true; } - - if (firstCall) { - //It is the first node of the subtree - //Obtain all the namespaces defined in the parents, and added to the output. - ns.getUnrenderedNodes(result); - //output the attributes in the xml namespace. - xmlattrStack.getXmlnsAttr(result); - firstCall = false; + } + if (parentRendered) { + col.addAll(cur.nodes); + cur.rendered = true; + return; + } + + Map loa = new HashMap(); + for (; size >= 0; size--) { + XmlsStackElement e = levels.get(size); + Iterator it = e.nodes.iterator(); + while (it.hasNext()) { + Attr n = it.next(); + if (!loa.containsKey(n.getName())) { + loa.put(n.getName(), n); + } } + } - return result.iterator(); + cur.rendered = true; + col.addAll(loa.values()); } - /** - * Returns the Attr[]s to be output for the given element. - *
    - * IMPORTANT: This method expects to work on a modified DOM tree, i.e. a DOM which has - * been prepared using {@link nokogiri.internals.c14n.security.utils.XMLUtils#circumventBug2650( - * org.w3c.dom.Document)}. - * - * @param element - * @param ns - * @return the Attr[]s to be output - * @throws CanonicalizationException - */ - @Override - protected Iterator handleAttributes(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException { - // result will contain the attrs which have to be output - xmlattrStack.push(ns.getLevel()); - boolean isRealVisible = isVisibleDO(element, ns.getLevel()) == 1; - final SortedSet result = this.result; - result.clear(); - - if (element.hasAttributes()) { - NamedNodeMap attrs = element.getAttributes(); - int attrsLength = attrs.getLength(); - - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - String NUri = attribute.getNamespaceURI(); - String NName = attribute.getLocalName(); - String NValue = attribute.getValue(); - - if (!XMLNS_URI.equals(NUri)) { - //A non namespace definition node. - if (XML_LANG_URI.equals(NUri)) { - xmlattrStack.addXmlnsAttr(attribute); - } else if (isRealVisible) { - //The node is visible add the attribute to the list of output attributes. - result.add(attribute); - } - } else if (!XML.equals(NName) || !XML_LANG_URI.equals(NValue)) { - /* except omit namespace node with local name xml, which defines - * the xml prefix, if its string value is http://www.w3.org/XML/1998/namespace. - */ - //add the prefix binding to the ns symb table. - if (isVisible(attribute)) { - if (isRealVisible || !ns.removeMappingIfRender(NName)) { - //The xpath select this node output it if needed. - Node n = ns.addMappingAndRender(NName, NValue, attribute); - if (n != null) { - result.add((Attr)n); - if (C14nHelper.namespaceIsRelative(attribute)) { - Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; - throw new CanonicalizationException( - "c14n.Canonicalizer.RelativeNamespace", exArgs - ); - } - } - } - } else { - if (isRealVisible && !XMLNS.equals(NName)) { - ns.removeMapping(NName); - } else { - ns.addMapping(NName, NValue, attribute); - } - } - } - } - } - if (isRealVisible) { - //The element is visible, handle the xmlns definition - Attr xmlns = element.getAttributeNodeNS(XMLNS_URI, XMLNS); - Node n = null; - if (xmlns == null) { - //No xmlns def just get the already defined. - n = ns.getMapping(XMLNS); - } else if (!isVisible(xmlns)) { - //There is a definition but the xmlns is not selected by the xpath. - //then xmlns="" - n = ns.addMappingAndRender(XMLNS, "", nullNode); - } - //output the xmlns def if needed. - if (n != null) { - result.add((Attr)n); + } + + private final XmlAttrStack xmlattrStack = new XmlAttrStack(); + + /** + * Constructor Canonicalizer20010315 + * + * @param includeComments + */ + public + Canonicalizer20010315(boolean includeComments) + { + super(includeComments); + } + + /** + * Always throws a CanonicalizationException because this is inclusive c14n. + * + * @param xpathNodeSet + * @param inclusiveNamespaces + * @return none it always fails + * @throws CanonicalizationException always + */ + public byte[] + engineCanonicalizeXPathNodeSet(Set xpathNodeSet, String inclusiveNamespaces, CanonicalFilter filter) + throws CanonicalizationException + { + + /** $todo$ well, should we throw UnsupportedOperationException ? */ + throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); + } + + /** + * Always throws a CanonicalizationException because this is inclusive c14n. + * + * @param rootNode + * @param inclusiveNamespaces + * @return none it always fails + * @throws CanonicalizationException + */ + @Override + public byte[] + engineCanonicalizeSubTree(Node rootNode, String inclusiveNamespaces, CanonicalFilter filter) + throws CanonicalizationException + { + + /** $todo$ well, should we throw UnsupportedOperationException ? */ + throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); + } + + /** + * Returns the Attr[]s to be output for the given element. + *
    + * The code of this method is a copy of {@link #handleAttributes(Element, + * NameSpaceSymbTable)}, + * whereas it takes into account that subtree-c14n is -- well -- subtree-based. + * So if the element in question isRoot of c14n, it's parent is not in the + * node set, as well as all other ancestors. + * + * @param element + * @param ns + * @return the Attr[]s to be output + * @throws CanonicalizationException + */ + @Override + protected Iterator + handleAttributesSubtree(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException + { + if (!element.hasAttributes() && !firstCall) { + return null; + } + // result will contain the attrs which have to be output + final SortedSet result = this.result; + result.clear(); + + if (element.hasAttributes()) { + NamedNodeMap attrs = element.getAttributes(); + int attrsLength = attrs.getLength(); + + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + String NUri = attribute.getNamespaceURI(); + String NName = attribute.getLocalName(); + String NValue = attribute.getValue(); + + if (!XMLNS_URI.equals(NUri)) { + //It's not a namespace attr node. Add to the result and continue. + result.add(attribute); + } else if (!(XML.equals(NName) && XML_LANG_URI.equals(NValue))) { + //The default mapping for xml must not be output. + Node n = ns.addMappingAndRender(NName, NValue, attribute); + + if (n != null) { + //Render the ns definition + result.add((Attr)n); + if (C14nHelper.namespaceIsRelative(attribute)) { + Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; + throw new CanonicalizationException( + "c14n.Canonicalizer.RelativeNamespace", exArgs + ); } - //Float all xml:* attributes of the unselected parent elements to this one. - xmlattrStack.getXmlnsAttr(result); - ns.getUnrenderedNodes(result); + } } + } + } - return result.iterator(); + if (firstCall) { + //It is the first node of the subtree + //Obtain all the namespaces defined in the parents, and added to the output. + ns.getUnrenderedNodes(result); + //output the attributes in the xml namespace. + xmlattrStack.getXmlnsAttr(result); + firstCall = false; } - @Override - protected void handleParent(Element e, NameSpaceSymbTable ns) { - if (!e.hasAttributes() && e.getNamespaceURI() == null) { - return; - } - xmlattrStack.push(-1); - NamedNodeMap attrs = e.getAttributes(); - int attrsLength = attrs.getLength(); - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - String NName = attribute.getLocalName(); - String NValue = attribute.getNodeValue(); - - if (Constants.NamespaceSpecNS.equals(attribute.getNamespaceURI())) { - if (!XML.equals(NName) || !Constants.XML_LANG_SPACE_SpecNS.equals(NValue)) { - ns.addMapping(NName, NValue, attribute); + return result.iterator(); + } + + /** + * Returns the Attr[]s to be output for the given element. + *
    + * IMPORTANT: This method expects to work on a modified DOM tree, i.e. a DOM which has + * been prepared using {@link nokogiri.internals.c14n.security.utils.XMLUtils#circumventBug2650( + * org.w3c.dom.Document)}. + * + * @param element + * @param ns + * @return the Attr[]s to be output + * @throws CanonicalizationException + */ + @Override + protected Iterator + handleAttributes(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException + { + // result will contain the attrs which have to be output + xmlattrStack.push(ns.getLevel()); + boolean isRealVisible = isVisibleDO(element, ns.getLevel()) == 1; + final SortedSet result = this.result; + result.clear(); + + if (element.hasAttributes()) { + NamedNodeMap attrs = element.getAttributes(); + int attrsLength = attrs.getLength(); + + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + String NUri = attribute.getNamespaceURI(); + String NName = attribute.getLocalName(); + String NValue = attribute.getValue(); + + if (!XMLNS_URI.equals(NUri)) { + //A non namespace definition node. + if (XML_LANG_URI.equals(NUri)) { + xmlattrStack.addXmlnsAttr(attribute); + } else if (isRealVisible) { + //The node is visible add the attribute to the list of output attributes. + result.add(attribute); + } + } else if (!XML.equals(NName) || !XML_LANG_URI.equals(NValue)) { + /* except omit namespace node with local name xml, which defines + * the xml prefix, if its string value is http://www.w3.org/XML/1998/namespace. + */ + //add the prefix binding to the ns symb table. + if (isVisible(attribute)) { + if (isRealVisible || !ns.removeMappingIfRender(NName)) { + //The xpath select this node output it if needed. + Node n = ns.addMappingAndRender(NName, NValue, attribute); + if (n != null) { + result.add((Attr)n); + if (C14nHelper.namespaceIsRelative(attribute)) { + Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; + throw new CanonicalizationException( + "c14n.Canonicalizer.RelativeNamespace", exArgs + ); } - } else if (XML_LANG_URI.equals(attribute.getNamespaceURI())) { - xmlattrStack.addXmlnsAttr(attribute); + } } - } - if (e.getNamespaceURI() != null) { - String NName = e.getPrefix(); - String NValue = e.getNamespaceURI(); - String Name; - if (NName == null || NName.equals("")) { - NName = "xmlns"; - Name = "xmlns"; + } else { + if (isRealVisible && !XMLNS.equals(NName)) { + ns.removeMapping(NName); } else { - Name = "xmlns:" + NName; + ns.addMapping(NName, NValue, attribute); } - Attr n = e.getOwnerDocument().createAttributeNS("http://www.w3.org/2000/xmlns/", Name); - n.setValue(NValue); - ns.addMapping(NName, NValue, n); + } } + } + } + if (isRealVisible) { + //The element is visible, handle the xmlns definition + Attr xmlns = element.getAttributeNodeNS(XMLNS_URI, XMLNS); + Node n = null; + if (xmlns == null) { + //No xmlns def just get the already defined. + n = ns.getMapping(XMLNS); + } else if (!isVisible(xmlns)) { + //There is a definition but the xmlns is not selected by the xpath. + //then xmlns="" + n = ns.addMappingAndRender(XMLNS, "", nullNode); + } + //output the xmlns def if needed. + if (n != null) { + result.add((Attr)n); + } + //Float all xml:* attributes of the unselected parent elements to this one. + xmlattrStack.getXmlnsAttr(result); + ns.getUnrenderedNodes(result); + } + + return result.iterator(); + } + + @Override + protected void + handleParent(Element e, NameSpaceSymbTable ns) + { + if (!e.hasAttributes() && e.getNamespaceURI() == null) { + return; + } + xmlattrStack.push(-1); + NamedNodeMap attrs = e.getAttributes(); + int attrsLength = attrs.getLength(); + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + String NName = attribute.getLocalName(); + String NValue = attribute.getNodeValue(); + + if (Constants.NamespaceSpecNS.equals(attribute.getNamespaceURI())) { + if (!XML.equals(NName) || !Constants.XML_LANG_SPACE_SpecNS.equals(NValue)) { + ns.addMapping(NName, NValue, attribute); + } + } else if (XML_LANG_URI.equals(attribute.getNamespaceURI())) { + xmlattrStack.addXmlnsAttr(attribute); + } + } + if (e.getNamespaceURI() != null) { + String NName = e.getPrefix(); + String NValue = e.getNamespaceURI(); + String Name; + if (NName == null || NName.equals("")) { + NName = "xmlns"; + Name = "xmlns"; + } else { + Name = "xmlns:" + NName; + } + Attr n = e.getOwnerDocument().createAttributeNS("http://www.w3.org/2000/xmlns/", Name); + n.setValue(NValue); + ns.addMapping(NName, NValue, n); } + } } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java index 7ffe66d2fe..f9e7cf7f5b 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java @@ -45,251 +45,264 @@ * @see * XML Canonicalization, Version 1.0 */ -public abstract class Canonicalizer20010315Excl extends CanonicalizerBase { +public abstract class Canonicalizer20010315Excl extends CanonicalizerBase +{ - private static final String XML_LANG_URI = Constants.XML_LANG_SPACE_SpecNS; - private static final String XMLNS_URI = Constants.NamespaceSpecNS; + private static final String XML_LANG_URI = Constants.XML_LANG_SPACE_SpecNS; + private static final String XMLNS_URI = Constants.NamespaceSpecNS; - /** - * This Set contains the names (Strings like "xmlns" or "xmlns:foo") of - * the inclusive namespaces. - */ - private SortedSet inclusiveNSSet; + /** + * This Set contains the names (Strings like "xmlns" or "xmlns:foo") of + * the inclusive namespaces. + */ + private SortedSet inclusiveNSSet; - private final SortedSet result = new TreeSet(COMPARE); + private final SortedSet result = new TreeSet(COMPARE); - /** - * Constructor Canonicalizer20010315Excl - * - * @param includeComments - */ - public Canonicalizer20010315Excl(boolean includeComments) { - super(includeComments); - } + /** + * Constructor Canonicalizer20010315Excl + * + * @param includeComments + */ + public + Canonicalizer20010315Excl(boolean includeComments) + { + super(includeComments); + } - /** - * Method engineCanonicalizeSubTree - * @inheritDoc - * @param rootNode - * - * @throws CanonicalizationException - */ - @Override - public byte[] engineCanonicalizeSubTree(Node rootNode, CanonicalFilter filter) - throws CanonicalizationException { - return engineCanonicalizeSubTree(rootNode, "", null); - } + /** + * Method engineCanonicalizeSubTree + * @inheritDoc + * @param rootNode + * + * @throws CanonicalizationException + */ + @Override + public byte[] + engineCanonicalizeSubTree(Node rootNode, CanonicalFilter filter) + throws CanonicalizationException + { + return engineCanonicalizeSubTree(rootNode, "", null); + } - /** - * Method engineCanonicalizeSubTree - * @inheritDoc - * @param rootNode - * @param inclusiveNamespaces - * - * @throws CanonicalizationException - */ - @Override - public byte[] engineCanonicalizeSubTree( - Node rootNode, String inclusiveNamespaces, CanonicalFilter filter - ) throws CanonicalizationException { - return engineCanonicalizeSubTree(rootNode, inclusiveNamespaces, null, filter); - } + /** + * Method engineCanonicalizeSubTree + * @inheritDoc + * @param rootNode + * @param inclusiveNamespaces + * + * @throws CanonicalizationException + */ + @Override + public byte[] + engineCanonicalizeSubTree( + Node rootNode, String inclusiveNamespaces, CanonicalFilter filter + ) throws CanonicalizationException + { + return engineCanonicalizeSubTree(rootNode, inclusiveNamespaces, null, filter); + } - /** - * Method engineCanonicalizeSubTree - * @param rootNode - * @param inclusiveNamespaces - * @param excl A element to exclude from the c14n process. - * @return the rootNode c14n. - * @throws CanonicalizationException - */ - public byte[] engineCanonicalizeSubTree( - Node rootNode, String inclusiveNamespaces, Node excl, CanonicalFilter filter - ) throws CanonicalizationException{ - inclusiveNSSet = InclusiveNamespaces.prefixStr2Set(inclusiveNamespaces); - return super.engineCanonicalizeSubTree(rootNode, excl, filter); - } + /** + * Method engineCanonicalizeSubTree + * @param rootNode + * @param inclusiveNamespaces + * @param excl A element to exclude from the c14n process. + * @return the rootNode c14n. + * @throws CanonicalizationException + */ + public byte[] + engineCanonicalizeSubTree( + Node rootNode, String inclusiveNamespaces, Node excl, CanonicalFilter filter + ) throws CanonicalizationException + { + inclusiveNSSet = InclusiveNamespaces.prefixStr2Set(inclusiveNamespaces); + return super.engineCanonicalizeSubTree(rootNode, excl, filter); + } - @Override - protected Iterator handleAttributesSubtree(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException { - // result will contain the attrs which have to be output - final SortedSet result = this.result; - result.clear(); + @Override + protected Iterator + handleAttributesSubtree(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException + { + // result will contain the attrs which have to be output + final SortedSet result = this.result; + result.clear(); - // The prefix visibly utilized (in the attribute or in the name) in - // the element - SortedSet visiblyUtilized = new TreeSet(); - if (inclusiveNSSet != null && !inclusiveNSSet.isEmpty()) { - visiblyUtilized.addAll(inclusiveNSSet); - } + // The prefix visibly utilized (in the attribute or in the name) in + // the element + SortedSet visiblyUtilized = new TreeSet(); + if (inclusiveNSSet != null && !inclusiveNSSet.isEmpty()) { + visiblyUtilized.addAll(inclusiveNSSet); + } - if (element.hasAttributes()) { - NamedNodeMap attrs = element.getAttributes(); - int attrsLength = attrs.getLength(); - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - String NName = attribute.getLocalName(); - String NNodeValue = attribute.getNodeValue(); + if (element.hasAttributes()) { + NamedNodeMap attrs = element.getAttributes(); + int attrsLength = attrs.getLength(); + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + String NName = attribute.getLocalName(); + String NNodeValue = attribute.getNodeValue(); - if (!XMLNS_URI.equals(attribute.getNamespaceURI())) { - // Not a namespace definition. - // The Element is output element, add the prefix (if used) to - // visiblyUtilized - String prefix = attribute.getPrefix(); - if (prefix != null && !(prefix.equals(XML) || prefix.equals(XMLNS))) { - visiblyUtilized.add(prefix); - } - // Add to the result. - result.add(attribute); - } else if (!(XML.equals(NName) && XML_LANG_URI.equals(NNodeValue)) - && ns.addMapping(NName, NNodeValue, attribute) - && C14nHelper.namespaceIsRelative(NNodeValue)) { - // The default mapping for xml must not be output. - // New definition check if it is relative. - Object exArgs[] = {element.getTagName(), NName, attribute.getNodeValue()}; - throw new CanonicalizationException( - "c14n.Canonicalizer.RelativeNamespace", exArgs - ); - } - } - } - String prefix; - if (element.getNamespaceURI() != null - && !(element.getPrefix() == null || element.getPrefix().length() == 0)) { - prefix = element.getPrefix(); - } else { - prefix = XMLNS; - } - visiblyUtilized.add(prefix); - - for (String s : visiblyUtilized) { - Attr key = ns.getMapping(s); - if (key != null) { - result.add(key); - } + if (!XMLNS_URI.equals(attribute.getNamespaceURI())) { + // Not a namespace definition. + // The Element is output element, add the prefix (if used) to + // visiblyUtilized + String prefix = attribute.getPrefix(); + if (prefix != null && !(prefix.equals(XML) || prefix.equals(XMLNS))) { + visiblyUtilized.add(prefix); + } + // Add to the result. + result.add(attribute); + } else if (!(XML.equals(NName) && XML_LANG_URI.equals(NNodeValue)) + && ns.addMapping(NName, NNodeValue, attribute) + && C14nHelper.namespaceIsRelative(NNodeValue)) { + // The default mapping for xml must not be output. + // New definition check if it is relative. + Object exArgs[] = {element.getTagName(), NName, attribute.getNodeValue()}; + throw new CanonicalizationException( + "c14n.Canonicalizer.RelativeNamespace", exArgs + ); } - - return result.iterator(); + } } + String prefix; + if (element.getNamespaceURI() != null + && !(element.getPrefix() == null || element.getPrefix().length() == 0)) { + prefix = element.getPrefix(); + } else { + prefix = XMLNS; + } + visiblyUtilized.add(prefix); - /** - * @inheritDoc - * @param element - * @throws CanonicalizationException - */ - @Override - protected final Iterator handleAttributes(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException { - // result will contain the attrs which have to be output - final SortedSet result = this.result; - result.clear(); + for (String s : visiblyUtilized) { + Attr key = ns.getMapping(s); + if (key != null) { + result.add(key); + } + } - // The prefix visibly utilized (in the attribute or in the name) in - // the element - Set visiblyUtilized = null; - // It's the output selected. - boolean isOutputElement = isVisibleDO(element, ns.getLevel()) == 1; - if (isOutputElement) { - visiblyUtilized = new TreeSet(); - if (inclusiveNSSet != null && !inclusiveNSSet.isEmpty()) { - visiblyUtilized.addAll(inclusiveNSSet); - } - } + return result.iterator(); + } - if (element.hasAttributes()) { - NamedNodeMap attrs = element.getAttributes(); - int attrsLength = attrs.getLength(); - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); + /** + * @inheritDoc + * @param element + * @throws CanonicalizationException + */ + @Override + protected final Iterator + handleAttributes(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException + { + // result will contain the attrs which have to be output + final SortedSet result = this.result; + result.clear(); - String NName = attribute.getLocalName(); - String NNodeValue = attribute.getNodeValue(); + // The prefix visibly utilized (in the attribute or in the name) in + // the element + Set visiblyUtilized = null; + // It's the output selected. + boolean isOutputElement = isVisibleDO(element, ns.getLevel()) == 1; + if (isOutputElement) { + visiblyUtilized = new TreeSet(); + if (inclusiveNSSet != null && !inclusiveNSSet.isEmpty()) { + visiblyUtilized.addAll(inclusiveNSSet); + } + } - if (!XMLNS_URI.equals(attribute.getNamespaceURI())) { - if (isVisible(attribute) && isOutputElement) { - // The Element is output element, add the prefix (if used) - // to visibyUtilized - String prefix = attribute.getPrefix(); - if (prefix != null && !(prefix.equals(XML) || prefix.equals(XMLNS))) { - visiblyUtilized.add(prefix); - } - // Add to the result. - result.add(attribute); - } - } else if (isOutputElement && !isVisible(attribute) && !XMLNS.equals(NName)) { - ns.removeMappingIfNotRender(NName); - } else { - if (!isOutputElement && isVisible(attribute) - && inclusiveNSSet.contains(NName) - && !ns.removeMappingIfRender(NName)) { - Node n = ns.addMappingAndRender(NName, NNodeValue, attribute); - if (n != null) { - result.add((Attr)n); - if (C14nHelper.namespaceIsRelative(attribute)) { - Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; - throw new CanonicalizationException( - "c14n.Canonicalizer.RelativeNamespace", exArgs - ); - } - } - } + if (element.hasAttributes()) { + NamedNodeMap attrs = element.getAttributes(); + int attrsLength = attrs.getLength(); + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); - if (ns.addMapping(NName, NNodeValue, attribute) - && C14nHelper.namespaceIsRelative(NNodeValue)) { - // New definition check if it is relative - Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; - throw new CanonicalizationException( - "c14n.Canonicalizer.RelativeNamespace", exArgs - ); - } - } - } - } + String NName = attribute.getLocalName(); + String NNodeValue = attribute.getNodeValue(); - if (isOutputElement) { - // The element is visible, handle the xmlns definition - Attr xmlns = element.getAttributeNodeNS(XMLNS_URI, XMLNS); - if (xmlns != null && !isVisible(xmlns)) { - // There is a definition but the xmlns is not selected by the - // xpath. then xmlns="" - ns.addMapping(XMLNS, "", nullNode); + if (!XMLNS_URI.equals(attribute.getNamespaceURI())) { + if (isVisible(attribute) && isOutputElement) { + // The Element is output element, add the prefix (if used) + // to visibyUtilized + String prefix = attribute.getPrefix(); + if (prefix != null && !(prefix.equals(XML) || prefix.equals(XMLNS))) { + visiblyUtilized.add(prefix); } - - String prefix; - if (element.getNamespaceURI() != null - && !(element.getPrefix() == null || element.getPrefix().length() == 0)) { - prefix = element.getPrefix(); - } else { - prefix = XMLNS; + // Add to the result. + result.add(attribute); + } + } else if (isOutputElement && !isVisible(attribute) && !XMLNS.equals(NName)) { + ns.removeMappingIfNotRender(NName); + } else { + if (!isOutputElement && isVisible(attribute) + && inclusiveNSSet.contains(NName) + && !ns.removeMappingIfRender(NName)) { + Node n = ns.addMappingAndRender(NName, NNodeValue, attribute); + if (n != null) { + result.add((Attr)n); + if (C14nHelper.namespaceIsRelative(attribute)) { + Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; + throw new CanonicalizationException( + "c14n.Canonicalizer.RelativeNamespace", exArgs + ); + } } - visiblyUtilized.add(prefix); + } - for (String s : visiblyUtilized) { - Attr key = ns.getMapping(s); - if (key != null) { - result.add(key); - } - } + if (ns.addMapping(NName, NNodeValue, attribute) + && C14nHelper.namespaceIsRelative(NNodeValue)) { + // New definition check if it is relative + Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() }; + throw new CanonicalizationException( + "c14n.Canonicalizer.RelativeNamespace", exArgs + ); + } } - - return result.iterator(); + } } - /* - protected void circumventBugIfNeeded(XMLSignatureInput input) - throws CanonicalizationException, ParserConfigurationException, - IOException, SAXException { - if (!input.isNeedsToBeExpanded() || inclusiveNSSet.isEmpty() || inclusiveNSSet.isEmpty()) { - return; - } - Document doc = null; - if (input.getSubNode() != null) { - doc = XMLUtils.getOwnerDocument(input.getSubNode()); - } else { - doc = XMLUtils.getOwnerDocument(input.getNodeSet()); + if (isOutputElement) { + // The element is visible, handle the xmlns definition + Attr xmlns = element.getAttributeNodeNS(XMLNS_URI, XMLNS); + if (xmlns != null && !isVisible(xmlns)) { + // There is a definition but the xmlns is not selected by the + // xpath. then xmlns="" + ns.addMapping(XMLNS, "", nullNode); + } + + String prefix; + if (element.getNamespaceURI() != null + && !(element.getPrefix() == null || element.getPrefix().length() == 0)) { + prefix = element.getPrefix(); + } else { + prefix = XMLNS; + } + visiblyUtilized.add(prefix); + + for (String s : visiblyUtilized) { + Attr key = ns.getMapping(s); + if (key != null) { + result.add(key); } - XMLUtils.circumventBug2650(doc); + } } - */ + + return result.iterator(); + } + + /* + protected void circumventBugIfNeeded(XMLSignatureInput input) + throws CanonicalizationException, ParserConfigurationException, + IOException, SAXException { + if (!input.isNeedsToBeExpanded() || inclusiveNSSet.isEmpty() || inclusiveNSSet.isEmpty()) { + return; + } + Document doc = null; + if (input.getSubNode() != null) { + doc = XMLUtils.getOwnerDocument(input.getSubNode()); + } else { + doc = XMLUtils.getOwnerDocument(input.getNodeSet()); + } + XMLUtils.circumventBug2650(doc); + } + */ } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java index 33967b035d..f53be05d35 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java @@ -19,22 +19,29 @@ package nokogiri.internals.c14n; -public class Canonicalizer20010315ExclOmitComments extends Canonicalizer20010315Excl { +public class Canonicalizer20010315ExclOmitComments extends Canonicalizer20010315Excl +{ - /** - * - */ - public Canonicalizer20010315ExclOmitComments() { - super(false); - } + /** + * + */ + public + Canonicalizer20010315ExclOmitComments() + { + super(false); + } - /** @inheritDoc */ - public final String engineGetURI() { - return Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS; - } + /** @inheritDoc */ + public final String + engineGetURI() + { + return Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS; + } - /** @inheritDoc */ - public final boolean engineGetIncludeComments() { - return false; - } + /** @inheritDoc */ + public final boolean + engineGetIncludeComments() + { + return false; + } } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java index c0f59ac727..22e175b717 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java @@ -22,23 +22,30 @@ /** * Class Canonicalizer20010315ExclWithComments */ -public class Canonicalizer20010315ExclWithComments extends Canonicalizer20010315Excl { +public class Canonicalizer20010315ExclWithComments extends Canonicalizer20010315Excl +{ - /** - * Constructor Canonicalizer20010315ExclWithComments - * - */ - public Canonicalizer20010315ExclWithComments() { - super(true); - } + /** + * Constructor Canonicalizer20010315ExclWithComments + * + */ + public + Canonicalizer20010315ExclWithComments() + { + super(true); + } - /** @inheritDoc */ - public final String engineGetURI() { - return Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS; - } + /** @inheritDoc */ + public final String + engineGetURI() + { + return Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS; + } - /** @inheritDoc */ - public final boolean engineGetIncludeComments() { - return true; - } + /** @inheritDoc */ + public final boolean + engineGetIncludeComments() + { + return true; + } } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java index 1dd61c1f74..f8585c9680 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java @@ -22,23 +22,30 @@ /** * @author Christian Geuer-Pollmann */ -public class Canonicalizer20010315OmitComments extends Canonicalizer20010315 { +public class Canonicalizer20010315OmitComments extends Canonicalizer20010315 +{ - /** - * Constructor Canonicalizer20010315WithXPathOmitComments - * - */ - public Canonicalizer20010315OmitComments() { - super(false); - } + /** + * Constructor Canonicalizer20010315WithXPathOmitComments + * + */ + public + Canonicalizer20010315OmitComments() + { + super(false); + } - /** @inheritDoc */ - public final String engineGetURI() { - return Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS; - } + /** @inheritDoc */ + public final String + engineGetURI() + { + return Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS; + } - /** @inheritDoc */ - public final boolean engineGetIncludeComments() { - return false; - } + /** @inheritDoc */ + public final boolean + engineGetIncludeComments() + { + return false; + } } diff --git a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java index e392a0661b..6104ab5de4 100644 --- a/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +++ b/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java @@ -22,22 +22,29 @@ /** * @author Christian Geuer-Pollmann */ -public class Canonicalizer20010315WithComments extends Canonicalizer20010315 { +public class Canonicalizer20010315WithComments extends Canonicalizer20010315 +{ - /** - * Constructor Canonicalizer20010315WithXPathWithComments - */ - public Canonicalizer20010315WithComments() { - super(true); - } + /** + * Constructor Canonicalizer20010315WithXPathWithComments + */ + public + Canonicalizer20010315WithComments() + { + super(true); + } - /** @inheritDoc */ - public final String engineGetURI() { - return Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS; - } + /** @inheritDoc */ + public final String + engineGetURI() + { + return Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS; + } - /** @inheritDoc */ - public final boolean engineGetIncludeComments() { - return true; - } + /** @inheritDoc */ + public final boolean + engineGetIncludeComments() + { + return true; + } } diff --git a/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java b/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java index 4f2ed76734..7a81f428f5 100644 --- a/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +++ b/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java @@ -46,585 +46,615 @@ * * @author Christian Geuer-Pollmann */ -public abstract class CanonicalizerBase extends CanonicalizerSpi { - public static final String XML = "xml"; - public static final String XMLNS = "xmlns"; - - protected static final AttrCompare COMPARE = new AttrCompare(); - protected static final Attr nullNode; - - private static final byte[] END_PI = {'?','>'}; - private static final byte[] BEGIN_PI = {'<','?'}; - private static final byte[] END_COMM = {'-','-','>'}; - private static final byte[] BEGIN_COMM = {'<','!','-','-'}; - private static final byte[] XA = {'&','#','x','A',';'}; - private static final byte[] X9 = {'&','#','x','9',';'}; - private static final byte[] QUOT = {'&','q','u','o','t',';'}; - private static final byte[] XD = {'&','#','x','D',';'}; - private static final byte[] GT = {'&','g','t',';'}; - private static final byte[] LT = {'&','l','t',';'}; - private static final byte[] END_TAG = {'<','/'}; - private static final byte[] AMP = {'&','a','m','p',';'}; - private static final byte[] equalsStr = {'=','\"'}; - - protected static final int NODE_BEFORE_DOCUMENT_ELEMENT = -1; - protected static final int NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT = 0; - protected static final int NODE_AFTER_DOCUMENT_ELEMENT = 1; - - static { - // The null xmlns definition. - try { - DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - nullNode = documentBuilder.newDocument().createAttributeNS(Constants.NamespaceSpecNS, XMLNS); - nullNode.setValue(""); - } catch (Exception e) { - throw new RuntimeException("Unable to create nullNode: " + e); - } - } - - private List nodeFilter; - - private final boolean includeComments; - //private Set xpathNodeSet; - /** - * The node to be skipped/excluded from the DOM tree - * in subtree canonicalizations. - */ - private Node excludeNode; - private OutputStream writer = new ByteArrayOutputStream(); - - /** - * Constructor CanonicalizerBase - * - * @param includeComments - */ - public CanonicalizerBase(boolean includeComments) { - this.includeComments = includeComments; +public abstract class CanonicalizerBase extends CanonicalizerSpi +{ + public static final String XML = "xml"; + public static final String XMLNS = "xmlns"; + + protected static final AttrCompare COMPARE = new AttrCompare(); + protected static final Attr nullNode; + + private static final byte[] END_PI = {'?', '>'}; + private static final byte[] BEGIN_PI = {'<', '?'}; + private static final byte[] END_COMM = {'-', '-', '>'}; + private static final byte[] BEGIN_COMM = {'<', '!', '-', '-'}; + private static final byte[] XA = {'&', '#', 'x', 'A', ';'}; + private static final byte[] X9 = {'&', '#', 'x', '9', ';'}; + private static final byte[] QUOT = {'&', 'q', 'u', 'o', 't', ';'}; + private static final byte[] XD = {'&', '#', 'x', 'D', ';'}; + private static final byte[] GT = {'&', 'g', 't', ';'}; + private static final byte[] LT = {'&', 'l', 't', ';'}; + private static final byte[] END_TAG = {'<', '/'}; + private static final byte[] AMP = {'&', 'a', 'm', 'p', ';'}; + private static final byte[] equalsStr = {'=', '\"'}; + + protected static final int NODE_BEFORE_DOCUMENT_ELEMENT = -1; + protected static final int NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT = 0; + protected static final int NODE_AFTER_DOCUMENT_ELEMENT = 1; + + static + { + // The null xmlns definition. + try { + DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + nullNode = documentBuilder.newDocument().createAttributeNS(Constants.NamespaceSpecNS, XMLNS); + nullNode.setValue(""); + } catch (Exception e) { + throw new RuntimeException("Unable to create nullNode: " + e); } - - /** - * Method engineCanonicalizeSubTree - * @inheritDoc - * @param rootNode - * @throws CanonicalizationException - */ - @Override - public byte[] engineCanonicalizeSubTree(Node rootNode, CanonicalFilter filter) - throws CanonicalizationException { - return engineCanonicalizeSubTree(rootNode, (Node)null, filter); + } + + private List nodeFilter; + + private final boolean includeComments; + //private Set xpathNodeSet; + /** + * The node to be skipped/excluded from the DOM tree + * in subtree canonicalizations. + */ + private Node excludeNode; + private OutputStream writer = new ByteArrayOutputStream(); + + /** + * Constructor CanonicalizerBase + * + * @param includeComments + */ + public + CanonicalizerBase(boolean includeComments) + { + this.includeComments = includeComments; + } + + /** + * Method engineCanonicalizeSubTree + * @inheritDoc + * @param rootNode + * @throws CanonicalizationException + */ + @Override + public byte[] + engineCanonicalizeSubTree(Node rootNode, CanonicalFilter filter) + throws CanonicalizationException + { + return engineCanonicalizeSubTree(rootNode, (Node)null, filter); + } + + /** + * @param writer The writer to set. + */ + @Override + public void + setWriter(OutputStream writer) + { + this.writer = writer; + } + + /** + * Canonicalizes a Subtree node. + * + * @param rootNode + * the root of the subtree to canonicalize + * @param excludeNode + * a node to be excluded from the canonicalize operation + * @return The canonicalize stream. + * @throws CanonicalizationException + */ + protected byte[] + engineCanonicalizeSubTree(Node rootNode, Node excludeNode, CanonicalFilter filter) + throws CanonicalizationException + { + this.excludeNode = excludeNode; + try { + NameSpaceSymbTable ns = new NameSpaceSymbTable(); + int nodeLevel = NODE_BEFORE_DOCUMENT_ELEMENT; + if (rootNode != null && Node.ELEMENT_NODE == rootNode.getNodeType()) { + //Fills the nssymbtable with the definitions of the parent of the root subnode + getParentNameSpaces((Element)rootNode, ns); + nodeLevel = NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT; + } + this.canonicalizeSubTree(rootNode, ns, rootNode, nodeLevel, filter); + this.writer.flush(); + if (this.writer instanceof ByteArrayOutputStream) { + byte[] result = ((ByteArrayOutputStream)this.writer).toByteArray(); + if (reset) { + ((ByteArrayOutputStream)this.writer).reset(); + } else { + this.writer.close(); + } + return result; + } else { + this.writer.close(); + } + return null; + + } catch (UnsupportedEncodingException ex) { + throw new CanonicalizationException("empty", ex); + } catch (IOException ex) { + throw new CanonicalizationException("empty", ex); } - - /** - * @param writer The writer to set. - */ - @Override - public void setWriter(OutputStream writer) { - this.writer = writer; + } + + + /** + * Method canonicalizeSubTree, this function is a recursive one. + * + * @param currentNode + * @param ns + * @param endnode + * @throws CanonicalizationException + * @throws IOException + */ + protected final void + canonicalizeSubTree( + Node currentNode, NameSpaceSymbTable ns, Node endnode, int documentLevel, CanonicalFilter filter + ) throws CanonicalizationException, IOException + { + if (isVisibleInt(currentNode) == -1) { + return; } - - /** - * Canonicalizes a Subtree node. - * - * @param rootNode - * the root of the subtree to canonicalize - * @param excludeNode - * a node to be excluded from the canonicalize operation - * @return The canonicalize stream. - * @throws CanonicalizationException - */ - protected byte[] engineCanonicalizeSubTree(Node rootNode, Node excludeNode, CanonicalFilter filter) - throws CanonicalizationException { - this.excludeNode = excludeNode; - try { - NameSpaceSymbTable ns = new NameSpaceSymbTable(); - int nodeLevel = NODE_BEFORE_DOCUMENT_ELEMENT; - if (rootNode != null && Node.ELEMENT_NODE == rootNode.getNodeType()) { - //Fills the nssymbtable with the definitions of the parent of the root subnode - getParentNameSpaces((Element)rootNode, ns); - nodeLevel = NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT; + Node sibling = null; + Node parentNode = null; + final OutputStream writer = this.writer; + final Node excludeNode = this.excludeNode; + final boolean includeComments = this.includeComments; + Map cache = new HashMap(); + do { + switch (currentNode.getNodeType()) { + + case Node.ENTITY_NODE : + case Node.NOTATION_NODE : + case Node.ATTRIBUTE_NODE : + // illegal node type during traversal + throw new CanonicalizationException("empty"); + + case Node.DOCUMENT_FRAGMENT_NODE : + case Node.DOCUMENT_NODE : + ns.outputNodePush(); + sibling = currentNode.getFirstChild(); + break; + + case Node.COMMENT_NODE : + if (includeComments) { + outputCommentToWriter((Comment) currentNode, writer, documentLevel); + } + break; + + case Node.PROCESSING_INSTRUCTION_NODE : + outputPItoWriter((ProcessingInstruction) currentNode, writer, documentLevel); + break; + + case Node.TEXT_NODE : + case Node.CDATA_SECTION_NODE : + outputTextToWriter(currentNode.getNodeValue(), writer); + break; + + case Node.ELEMENT_NODE : + documentLevel = NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT; + if (currentNode == excludeNode) { + break; + } + if (filter != null && !filter.includeNodes(currentNode, parentNode)) { + break; + } + + Element currentElement = (Element)currentNode; + //Add a level to the nssymbtable. So latter can be pop-back. + ns.outputNodePush(); + writer.write('<'); + String name = currentElement.getTagName(); + UtfHelpper.writeByte(name, writer, cache); + + Iterator attrs = this.handleAttributesSubtree(currentElement, ns); + if (attrs != null) { + //we output all Attrs which are available + while (attrs.hasNext()) { + Attr attr = attrs.next(); + outputAttrToWriter(attr.getNodeName(), attr.getNodeValue(), writer, cache); } - this.canonicalizeSubTree(rootNode, ns, rootNode, nodeLevel, filter); - this.writer.flush(); - if (this.writer instanceof ByteArrayOutputStream) { - byte[] result = ((ByteArrayOutputStream)this.writer).toByteArray(); - if (reset) { - ((ByteArrayOutputStream)this.writer).reset(); - } else { - this.writer.close(); - } - return result; - } else { - this.writer.close(); + } + writer.write('>'); + sibling = currentNode.getFirstChild(); + if (sibling == null) { + writer.write(END_TAG); + UtfHelpper.writeStringToUtf8(name, writer); + writer.write('>'); + //We finished with this level, pop to the previous definitions. + ns.outputNodePop(); + if (parentNode != null) { + sibling = currentNode.getNextSibling(); } - return null; - - } catch (UnsupportedEncodingException ex) { - throw new CanonicalizationException("empty", ex); - } catch (IOException ex) { - throw new CanonicalizationException("empty", ex); + } else { + parentNode = currentElement; + } + break; + case Node.DOCUMENT_TYPE_NODE : + default : + break; + } + while (sibling == null && parentNode != null) { + writer.write(END_TAG); + UtfHelpper.writeByte(((Element)parentNode).getTagName(), writer, cache); + writer.write('>'); + //We finished with this level, pop to the previous definitions. + ns.outputNodePop(); + if (parentNode == endnode) { + return; } - } - - - /** - * Method canonicalizeSubTree, this function is a recursive one. - * - * @param currentNode - * @param ns - * @param endnode - * @throws CanonicalizationException - * @throws IOException - */ - protected final void canonicalizeSubTree( - Node currentNode, NameSpaceSymbTable ns, Node endnode, int documentLevel, CanonicalFilter filter - ) throws CanonicalizationException, IOException { - if (isVisibleInt(currentNode) == -1) { - return; + sibling = parentNode.getNextSibling(); + parentNode = parentNode.getParentNode(); + if (parentNode == null || Node.ELEMENT_NODE != parentNode.getNodeType()) { + documentLevel = NODE_AFTER_DOCUMENT_ELEMENT; + parentNode = null; } - Node sibling = null; - Node parentNode = null; - final OutputStream writer = this.writer; - final Node excludeNode = this.excludeNode; - final boolean includeComments = this.includeComments; - Map cache = new HashMap(); - do { - switch (currentNode.getNodeType()) { - - case Node.ENTITY_NODE : - case Node.NOTATION_NODE : - case Node.ATTRIBUTE_NODE : - // illegal node type during traversal - throw new CanonicalizationException("empty"); - - case Node.DOCUMENT_FRAGMENT_NODE : - case Node.DOCUMENT_NODE : - ns.outputNodePush(); - sibling = currentNode.getFirstChild(); - break; - - case Node.COMMENT_NODE : - if (includeComments) { - outputCommentToWriter((Comment) currentNode, writer, documentLevel); - } - break; - - case Node.PROCESSING_INSTRUCTION_NODE : - outputPItoWriter((ProcessingInstruction) currentNode, writer, documentLevel); - break; - - case Node.TEXT_NODE : - case Node.CDATA_SECTION_NODE : - outputTextToWriter(currentNode.getNodeValue(), writer); - break; - - case Node.ELEMENT_NODE : - documentLevel = NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT; - if (currentNode == excludeNode) { - break; - } - if (filter != null && !filter.includeNodes(currentNode, parentNode)) { - break; - } - - Element currentElement = (Element)currentNode; - //Add a level to the nssymbtable. So latter can be pop-back. - ns.outputNodePush(); - writer.write('<'); - String name = currentElement.getTagName(); - UtfHelpper.writeByte(name, writer, cache); - - Iterator attrs = this.handleAttributesSubtree(currentElement, ns); - if (attrs != null) { - //we output all Attrs which are available - while (attrs.hasNext()) { - Attr attr = attrs.next(); - outputAttrToWriter(attr.getNodeName(), attr.getNodeValue(), writer, cache); - } - } - writer.write('>'); - sibling = currentNode.getFirstChild(); - if (sibling == null) { - writer.write(END_TAG); - UtfHelpper.writeStringToUtf8(name, writer); - writer.write('>'); - //We finished with this level, pop to the previous definitions. - ns.outputNodePop(); - if (parentNode != null) { - sibling = currentNode.getNextSibling(); - } - } else { - parentNode = currentElement; - } - break; - case Node.DOCUMENT_TYPE_NODE : - default : - break; - } - while (sibling == null && parentNode != null) { - writer.write(END_TAG); - UtfHelpper.writeByte(((Element)parentNode).getTagName(), writer, cache); - writer.write('>'); - //We finished with this level, pop to the previous definitions. - ns.outputNodePop(); - if (parentNode == endnode) { - return; - } - sibling = parentNode.getNextSibling(); - parentNode = parentNode.getParentNode(); - if (parentNode == null || Node.ELEMENT_NODE != parentNode.getNodeType()) { - documentLevel = NODE_AFTER_DOCUMENT_ELEMENT; - parentNode = null; - } - } - if (sibling == null) { - return; - } - currentNode = sibling; - sibling = currentNode.getNextSibling(); - } while(true); - } - - protected int isVisibleDO(Node currentNode, int level) { - if (nodeFilter != null) { - Iterator it = nodeFilter.iterator(); - while (it.hasNext()) { - int i = (it.next()).isNodeIncludeDO(currentNode, level); - if (i != 1) { - return i; - } - } + } + if (sibling == null) { + return; + } + currentNode = sibling; + sibling = currentNode.getNextSibling(); + } while (true); + } + + protected int + isVisibleDO(Node currentNode, int level) + { + if (nodeFilter != null) { + Iterator it = nodeFilter.iterator(); + while (it.hasNext()) { + int i = (it.next()).isNodeIncludeDO(currentNode, level); + if (i != 1) { + return i; } - //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) { - // return 0; - //} - return 1; + } } - - protected int isVisibleInt(Node currentNode) { - if (nodeFilter != null) { - Iterator it = nodeFilter.iterator(); - while (it.hasNext()) { - int i = (it.next()).isNodeInclude(currentNode); - if (i != 1) { - return i; - } - } + //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) { + // return 0; + //} + return 1; + } + + protected int + isVisibleInt(Node currentNode) + { + if (nodeFilter != null) { + Iterator it = nodeFilter.iterator(); + while (it.hasNext()) { + int i = (it.next()).isNodeInclude(currentNode); + if (i != 1) { + return i; } - //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) { - // return 0; - //} - return 1; + } } - - protected boolean isVisible(Node currentNode) { - if (nodeFilter != null) { - Iterator it = nodeFilter.iterator(); - while (it.hasNext()) { - if (it.next().isNodeInclude(currentNode) != 1) { - return false; - } - } + //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) { + // return 0; + //} + return 1; + } + + protected boolean + isVisible(Node currentNode) + { + if (nodeFilter != null) { + Iterator it = nodeFilter.iterator(); + while (it.hasNext()) { + if (it.next().isNodeInclude(currentNode) != 1) { + return false; } - //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) { - // return false; - //} - return true; + } } - - protected void handleParent(Element e, NameSpaceSymbTable ns) { - if (!e.hasAttributes() && e.getNamespaceURI() == null) { - return; - } - NamedNodeMap attrs = e.getAttributes(); - int attrsLength = attrs.getLength(); - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - String NName = attribute.getLocalName(); - String NValue = attribute.getNodeValue(); - - if (Constants.NamespaceSpecNS.equals(attribute.getNamespaceURI()) - && (!XML.equals(NName) || !Constants.XML_LANG_SPACE_SpecNS.equals(NValue))) { - ns.addMapping(NName, NValue, attribute); - } - } - if (e.getNamespaceURI() != null) { - String NName = e.getPrefix(); - String NValue = e.getNamespaceURI(); - String Name; - if (NName == null || NName.equals("")) { - NName = XMLNS; - Name = XMLNS; - } else { - Name = XMLNS + ":" + NName; - } - Attr n = e.getOwnerDocument().createAttributeNS("http://www.w3.org/2000/xmlns/", Name); - n.setValue(NValue); - ns.addMapping(NName, NValue, n); - } + //if ((this.xpathNodeSet != null) && !this.xpathNodeSet.contains(currentNode)) { + // return false; + //} + return true; + } + + protected void + handleParent(Element e, NameSpaceSymbTable ns) + { + if (!e.hasAttributes() && e.getNamespaceURI() == null) { + return; } - - /** - * Adds to ns the definitions from the parent elements of el - * @param el - * @param ns - */ - protected final void getParentNameSpaces(Element el, NameSpaceSymbTable ns) { - Node n1 = el.getParentNode(); - if (n1 == null || Node.ELEMENT_NODE != n1.getNodeType()) { - return; - } - //Obtain all the parents of the element - List parents = new ArrayList(); - Node parent = n1; - while (parent != null && Node.ELEMENT_NODE == parent.getNodeType()) { - parents.add((Element)parent); - parent = parent.getParentNode(); - } - //Visit them in reverse order. - ListIterator it = parents.listIterator(parents.size()); - while (it.hasPrevious()) { - Element ele = it.previous(); - handleParent(ele, ns); - } - parents.clear(); - Attr nsprefix; - if (((nsprefix = ns.getMappingWithoutRendered(XMLNS)) != null) - && "".equals(nsprefix.getValue())) { - ns.addMappingAndRender(XMLNS, "", nullNode); - } + NamedNodeMap attrs = e.getAttributes(); + int attrsLength = attrs.getLength(); + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + String NName = attribute.getLocalName(); + String NValue = attribute.getNodeValue(); + + if (Constants.NamespaceSpecNS.equals(attribute.getNamespaceURI()) + && (!XML.equals(NName) || !Constants.XML_LANG_SPACE_SpecNS.equals(NValue))) { + ns.addMapping(NName, NValue, attribute); + } } - - /** - * Obtain the attributes to output for this node in XPathNodeSet c14n. - * - * @param element - * @param ns - * @return the attributes nodes to output. - * @throws CanonicalizationException - */ - abstract Iterator handleAttributes(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException; - - /** - * Obtain the attributes to output for this node in a Subtree c14n. - * - * @param element - * @param ns - * @return the attributes nodes to output. - * @throws CanonicalizationException - */ - abstract Iterator handleAttributesSubtree(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException; - - //abstract void circumventBugIfNeeded(XMLSignatureInput input) - // throws CanonicalizationException, ParserConfigurationException, IOException, SAXException; - - /** - * Outputs an Attribute to the internal Writer. - * - * The string value of the node is modified by replacing - *
      - *
    • all ampersands (&) with &amp;
    • - *
    • all open angle brackets (<) with &lt;
    • - *
    • all quotation mark characters with &quot;
    • - *
    • and the whitespace characters #x9, #xA, and #xD, with character - * references. The character references are written in uppercase - * hexadecimal with no leading zeroes (for example, #xD is represented - * by the character reference &#xD;)
    • - *
    - * - * @param name - * @param value - * @param writer - * @throws IOException - */ - protected static final void outputAttrToWriter( - final String name, final String value, - final OutputStream writer, final Map cache - ) throws IOException { - writer.write(' '); - UtfHelpper.writeByte(name, writer, cache); - writer.write(equalsStr); - byte[] toWrite; - final int length = value.length(); - int i = 0; - while (i < length) { - char c = value.charAt(i++); - - switch (c) { - - case '&' : - toWrite = AMP; - break; - - case '<' : - toWrite = LT; - break; - - case '"' : - toWrite = QUOT; - break; - - case 0x09 : // '\t' - toWrite = X9; - break; - - case 0x0A : // '\n' - toWrite = XA; - break; - - case 0x0D : // '\r' - toWrite = XD; - break; - - default : - if (c < 0x80) { - writer.write(c); - } else { - UtfHelpper.writeCharToUtf8(c, writer); - } - continue; - } - writer.write(toWrite); - } - - writer.write('\"'); + if (e.getNamespaceURI() != null) { + String NName = e.getPrefix(); + String NValue = e.getNamespaceURI(); + String Name; + if (NName == null || NName.equals("")) { + NName = XMLNS; + Name = XMLNS; + } else { + Name = XMLNS + ":" + NName; + } + Attr n = e.getOwnerDocument().createAttributeNS("http://www.w3.org/2000/xmlns/", Name); + n.setValue(NValue); + ns.addMapping(NName, NValue, n); + } + } + + /** + * Adds to ns the definitions from the parent elements of el + * @param el + * @param ns + */ + protected final void + getParentNameSpaces(Element el, NameSpaceSymbTable ns) + { + Node n1 = el.getParentNode(); + if (n1 == null || Node.ELEMENT_NODE != n1.getNodeType()) { + return; + } + //Obtain all the parents of the element + List parents = new ArrayList(); + Node parent = n1; + while (parent != null && Node.ELEMENT_NODE == parent.getNodeType()) { + parents.add((Element)parent); + parent = parent.getParentNode(); + } + //Visit them in reverse order. + ListIterator it = parents.listIterator(parents.size()); + while (it.hasPrevious()) { + Element ele = it.previous(); + handleParent(ele, ns); + } + parents.clear(); + Attr nsprefix; + if (((nsprefix = ns.getMappingWithoutRendered(XMLNS)) != null) + && "".equals(nsprefix.getValue())) { + ns.addMappingAndRender(XMLNS, "", nullNode); + } + } + + /** + * Obtain the attributes to output for this node in XPathNodeSet c14n. + * + * @param element + * @param ns + * @return the attributes nodes to output. + * @throws CanonicalizationException + */ + abstract Iterator handleAttributes(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException; + + /** + * Obtain the attributes to output for this node in a Subtree c14n. + * + * @param element + * @param ns + * @return the attributes nodes to output. + * @throws CanonicalizationException + */ + abstract Iterator handleAttributesSubtree(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException; + + //abstract void circumventBugIfNeeded(XMLSignatureInput input) + // throws CanonicalizationException, ParserConfigurationException, IOException, SAXException; + + /** + * Outputs an Attribute to the internal Writer. + * + * The string value of the node is modified by replacing + *
      + *
    • all ampersands (&) with &amp;
    • + *
    • all open angle brackets (<) with &lt;
    • + *
    • all quotation mark characters with &quot;
    • + *
    • and the whitespace characters #x9, #xA, and #xD, with character + * references. The character references are written in uppercase + * hexadecimal with no leading zeroes (for example, #xD is represented + * by the character reference &#xD;)
    • + *
    + * + * @param name + * @param value + * @param writer + * @throws IOException + */ + protected static final void + outputAttrToWriter( + final String name, final String value, + final OutputStream writer, final Map cache + ) throws IOException + { + writer.write(' '); + UtfHelpper.writeByte(name, writer, cache); + writer.write(equalsStr); + byte[] toWrite; + final int length = value.length(); + int i = 0; + while (i < length) { + char c = value.charAt(i++); + + switch (c) { + + case '&' : + toWrite = AMP; + break; + + case '<' : + toWrite = LT; + break; + + case '"' : + toWrite = QUOT; + break; + + case 0x09 : // '\t' + toWrite = X9; + break; + + case 0x0A : // '\n' + toWrite = XA; + break; + + case 0x0D : // '\r' + toWrite = XD; + break; + + default : + if (c < 0x80) { + writer.write(c); + } else { + UtfHelpper.writeCharToUtf8(c, writer); + } + continue; + } + writer.write(toWrite); } - /** - * Outputs a PI to the internal Writer. - * - * @param currentPI - * @param writer where to write the things - * @throws IOException - */ - protected void outputPItoWriter( - ProcessingInstruction currentPI, OutputStream writer, int position - ) throws IOException { - if (position == NODE_AFTER_DOCUMENT_ELEMENT) { - writer.write('\n'); - } - writer.write(BEGIN_PI); - - final String target = currentPI.getTarget(); - int length = target.length(); - - for (int i = 0; i < length; i++) { - char c = target.charAt(i); - if (c == 0x0D) { - writer.write(XD); - } else { - if (c < 0x80) { - writer.write(c); - } else { - UtfHelpper.writeCharToUtf8(c, writer); - } - } + writer.write('\"'); + } + + /** + * Outputs a PI to the internal Writer. + * + * @param currentPI + * @param writer where to write the things + * @throws IOException + */ + protected void + outputPItoWriter( + ProcessingInstruction currentPI, OutputStream writer, int position + ) throws IOException + { + if (position == NODE_AFTER_DOCUMENT_ELEMENT) { + writer.write('\n'); + } + writer.write(BEGIN_PI); + + final String target = currentPI.getTarget(); + int length = target.length(); + + for (int i = 0; i < length; i++) { + char c = target.charAt(i); + if (c == 0x0D) { + writer.write(XD); + } else { + if (c < 0x80) { + writer.write(c); + } else { + UtfHelpper.writeCharToUtf8(c, writer); } + } + } - final String data = currentPI.getData(); + final String data = currentPI.getData(); - length = data.length(); + length = data.length(); - if (length > 0) { - writer.write(' '); + if (length > 0) { + writer.write(' '); - for (int i = 0; i < length; i++) { - char c = data.charAt(i); - if (c == 0x0D) { - writer.write(XD); - } else { - UtfHelpper.writeCharToUtf8(c, writer); - } - } - } - - writer.write(END_PI); - if (position == NODE_BEFORE_DOCUMENT_ELEMENT) { - writer.write('\n'); + for (int i = 0; i < length; i++) { + char c = data.charAt(i); + if (c == 0x0D) { + writer.write(XD); + } else { + UtfHelpper.writeCharToUtf8(c, writer); } + } } - /** - * Method outputCommentToWriter - * - * @param currentComment - * @param writer writer where to write the things - * @throws IOException - */ - protected void outputCommentToWriter( - Comment currentComment, OutputStream writer, int position - ) throws IOException { - if (position == NODE_AFTER_DOCUMENT_ELEMENT) { - writer.write('\n'); - } - writer.write(BEGIN_COMM); - - final String data = currentComment.getData(); - final int length = data.length(); - - for (int i = 0; i < length; i++) { - char c = data.charAt(i); - if (c == 0x0D) { - writer.write(XD); - } else { - if (c < 0x80) { - writer.write(c); - } else { - UtfHelpper.writeCharToUtf8(c, writer); - } - } - } - - writer.write(END_COMM); - if (position == NODE_BEFORE_DOCUMENT_ELEMENT) { - writer.write('\n'); + writer.write(END_PI); + if (position == NODE_BEFORE_DOCUMENT_ELEMENT) { + writer.write('\n'); + } + } + + /** + * Method outputCommentToWriter + * + * @param currentComment + * @param writer writer where to write the things + * @throws IOException + */ + protected void + outputCommentToWriter( + Comment currentComment, OutputStream writer, int position + ) throws IOException + { + if (position == NODE_AFTER_DOCUMENT_ELEMENT) { + writer.write('\n'); + } + writer.write(BEGIN_COMM); + + final String data = currentComment.getData(); + final int length = data.length(); + + for (int i = 0; i < length; i++) { + char c = data.charAt(i); + if (c == 0x0D) { + writer.write(XD); + } else { + if (c < 0x80) { + writer.write(c); + } else { + UtfHelpper.writeCharToUtf8(c, writer); } + } } - /** - * Outputs a Text of CDATA section to the internal Writer. - * - * @param text - * @param writer writer where to write the things - * @throws IOException - */ - protected static final void outputTextToWriter( - final String text, final OutputStream writer - ) throws IOException { - final int length = text.length(); - byte[] toWrite; - for (int i = 0; i < length; i++) { - char c = text.charAt(i); - - switch (c) { - - case '&' : - toWrite = AMP; - break; - - case '<' : - toWrite = LT; - break; - - case '>' : - toWrite = GT; - break; - - case 0xD : - toWrite = XD; - break; - - default : - if (c < 0x80) { - writer.write(c); - } else { - UtfHelpper.writeCharToUtf8(c, writer); - } - continue; - } - writer.write(toWrite); - } + writer.write(END_COMM); + if (position == NODE_BEFORE_DOCUMENT_ELEMENT) { + writer.write('\n'); + } + } + + /** + * Outputs a Text of CDATA section to the internal Writer. + * + * @param text + * @param writer writer where to write the things + * @throws IOException + */ + protected static final void + outputTextToWriter( + final String text, final OutputStream writer + ) throws IOException + { + final int length = text.length(); + byte[] toWrite; + for (int i = 0; i < length; i++) { + char c = text.charAt(i); + + switch (c) { + + case '&' : + toWrite = AMP; + break; + + case '<' : + toWrite = LT; + break; + + case '>' : + toWrite = GT; + break; + + case 0xD : + toWrite = XD; + break; + + default : + if (c < 0x80) { + writer.write(c); + } else { + UtfHelpper.writeCharToUtf8(c, writer); + } + continue; + } + writer.write(toWrite); } + } } diff --git a/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java b/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java index eb20be0094..2f9a758b8c 100644 --- a/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +++ b/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java @@ -43,131 +43,152 @@ * because the decrypted XML content will share the same physical representation * as the original XML content that was encrypted. */ -public class CanonicalizerPhysical extends CanonicalizerBase { - - private final SortedSet result = new TreeSet(COMPARE); - - /** - * Constructor Canonicalizer20010315 - */ - public CanonicalizerPhysical() { - super(true); - } - - /** - * Always throws a CanonicalizationException. - * - * @param xpathNodeSet - * @param inclusiveNamespaces - * @return none it always fails - * @throws CanonicalizationException always - */ - public byte[] engineCanonicalizeXPathNodeSet(Set xpathNodeSet, String inclusiveNamespaces, CanonicalFilter filter) - throws CanonicalizationException { - - /** $todo$ well, should we throw UnsupportedOperationException ? */ - throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); +public class CanonicalizerPhysical extends CanonicalizerBase +{ + + private final SortedSet result = new TreeSet(COMPARE); + + /** + * Constructor Canonicalizer20010315 + */ + public + CanonicalizerPhysical() + { + super(true); + } + + /** + * Always throws a CanonicalizationException. + * + * @param xpathNodeSet + * @param inclusiveNamespaces + * @return none it always fails + * @throws CanonicalizationException always + */ + public byte[] + engineCanonicalizeXPathNodeSet(Set xpathNodeSet, String inclusiveNamespaces, CanonicalFilter filter) + throws CanonicalizationException + { + + /** $todo$ well, should we throw UnsupportedOperationException ? */ + throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); + } + + /** + * Always throws a CanonicalizationException. + * + * @param rootNode + * @param inclusiveNamespaces + * @return none it always fails + * @throws CanonicalizationException + */ + @Override + public byte[] + engineCanonicalizeSubTree(Node rootNode, String inclusiveNamespaces, CanonicalFilter filter) + throws CanonicalizationException + { + + /** $todo$ well, should we throw UnsupportedOperationException ? */ + throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); + } + + /** + * Returns the Attr[]s to be output for the given element. + *
    + * The code of this method is a copy of {@link #handleAttributes(Element, + * NameSpaceSymbTable)}, + * whereas it takes into account that subtree-c14n is -- well -- subtree-based. + * So if the element in question isRoot of c14n, it's parent is not in the + * node set, as well as all other ancestors. + * + * @param element + * @param ns + * @return the Attr[]s to be output + * @throws CanonicalizationException + */ + @Override + protected Iterator + handleAttributesSubtree(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException + { + if (!element.hasAttributes()) { + return null; } - /** - * Always throws a CanonicalizationException. - * - * @param rootNode - * @param inclusiveNamespaces - * @return none it always fails - * @throws CanonicalizationException - */ - @Override - public byte[] engineCanonicalizeSubTree(Node rootNode, String inclusiveNamespaces, CanonicalFilter filter) - throws CanonicalizationException { - - /** $todo$ well, should we throw UnsupportedOperationException ? */ - throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); - } - - /** - * Returns the Attr[]s to be output for the given element. - *
    - * The code of this method is a copy of {@link #handleAttributes(Element, - * NameSpaceSymbTable)}, - * whereas it takes into account that subtree-c14n is -- well -- subtree-based. - * So if the element in question isRoot of c14n, it's parent is not in the - * node set, as well as all other ancestors. - * - * @param element - * @param ns - * @return the Attr[]s to be output - * @throws CanonicalizationException - */ - @Override - protected Iterator handleAttributesSubtree(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException { - if (!element.hasAttributes()) { - return null; - } - - // result will contain all the attrs declared directly on that element - final SortedSet result = this.result; - result.clear(); - - if (element.hasAttributes()) { - NamedNodeMap attrs = element.getAttributes(); - int attrsLength = attrs.getLength(); - - for (int i = 0; i < attrsLength; i++) { - Attr attribute = (Attr) attrs.item(i); - result.add(attribute); - } - } - - return result.iterator(); - } + // result will contain all the attrs declared directly on that element + final SortedSet result = this.result; + result.clear(); - /** - * Returns the Attr[]s to be output for the given element. - * - * @param element - * @param ns - * @return the Attr[]s to be output - * @throws CanonicalizationException - */ - @Override - protected Iterator handleAttributes(Element element, NameSpaceSymbTable ns) - throws CanonicalizationException { - - /** $todo$ well, should we throw UnsupportedOperationException ? */ - throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); - } + if (element.hasAttributes()) { + NamedNodeMap attrs = element.getAttributes(); + int attrsLength = attrs.getLength(); - @Override - protected void handleParent(Element e, NameSpaceSymbTable ns) { - // nothing to do + for (int i = 0; i < attrsLength; i++) { + Attr attribute = (Attr) attrs.item(i); + result.add(attribute); + } } - /** @inheritDoc */ - @Override - public final String engineGetURI() { - return Canonicalizer.ALGO_ID_C14N_PHYSICAL; - } - - /** @inheritDoc */ - @Override - public final boolean engineGetIncludeComments() { - return true; - } - - @Override - protected void outputPItoWriter(ProcessingInstruction currentPI, - OutputStream writer, int position) throws IOException { - // Processing Instructions before or after the document element are not treated specially - super.outputPItoWriter(currentPI, writer, NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT); - } - - @Override - protected void outputCommentToWriter(Comment currentComment, - OutputStream writer, int position) throws IOException { - // Comments before or after the document element are not treated specially - super.outputCommentToWriter(currentComment, writer, NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT); - } + return result.iterator(); + } + + /** + * Returns the Attr[]s to be output for the given element. + * + * @param element + * @param ns + * @return the Attr[]s to be output + * @throws CanonicalizationException + */ + @Override + protected Iterator + handleAttributes(Element element, NameSpaceSymbTable ns) + throws CanonicalizationException + { + + /** $todo$ well, should we throw UnsupportedOperationException ? */ + throw new CanonicalizationException("c14n.Canonicalizer.UnsupportedOperation"); + } + + @Override + protected void + handleParent(Element e, NameSpaceSymbTable ns) + { + // nothing to do + } + + /** @inheritDoc */ + @Override + public final String + engineGetURI() + { + return Canonicalizer.ALGO_ID_C14N_PHYSICAL; + } + + /** @inheritDoc */ + @Override + public final boolean + engineGetIncludeComments() + { + return true; + } + + @Override + protected void + outputPItoWriter(ProcessingInstruction currentPI, + OutputStream writer, int position) throws IOException + { + // Processing Instructions before or after the document element are not treated specially + super.outputPItoWriter(currentPI, writer, NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT); + } + + @Override + protected void + outputCommentToWriter(Comment currentComment, + OutputStream writer, int position) throws IOException + { + // Comments before or after the document element are not treated specially + super.outputCommentToWriter(currentComment, writer, NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT); + } } diff --git a/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java b/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java index 44d57ff986..db05ba198f 100644 --- a/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +++ b/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java @@ -28,49 +28,50 @@ * * @author Christian Geuer-Pollmann */ -public abstract class CanonicalizerSpi { +public abstract class CanonicalizerSpi +{ - /** Reset the writer after a c14n */ - protected boolean reset = false; + /** Reset the writer after a c14n */ + protected boolean reset = false; - /** - * Returns the URI of this engine. - * @return the URI - */ - public abstract String engineGetURI(); + /** + * Returns the URI of this engine. + * @return the URI + */ + public abstract String engineGetURI(); - /** - * Returns true if comments are included - * @return true if comments are included - */ - public abstract boolean engineGetIncludeComments(); + /** + * Returns true if comments are included + * @return true if comments are included + */ + public abstract boolean engineGetIncludeComments(); - /** - * C14n a node tree. - * - * @param rootNode - * @return the c14n bytes - * @throws CanonicalizationException - */ - public abstract byte[] engineCanonicalizeSubTree(Node rootNode, CanonicalFilter filter) - throws CanonicalizationException; + /** + * C14n a node tree. + * + * @param rootNode + * @return the c14n bytes + * @throws CanonicalizationException + */ + public abstract byte[] engineCanonicalizeSubTree(Node rootNode, CanonicalFilter filter) + throws CanonicalizationException; - /** - * C14n a node tree. - * - * @param rootNode - * @param inclusiveNamespaces - * @return the c14n bytes - * @throws CanonicalizationException - */ - public abstract byte[] engineCanonicalizeSubTree(Node rootNode, String inclusiveNamespaces, CanonicalFilter filter) - throws CanonicalizationException; + /** + * C14n a node tree. + * + * @param rootNode + * @param inclusiveNamespaces + * @return the c14n bytes + * @throws CanonicalizationException + */ + public abstract byte[] engineCanonicalizeSubTree(Node rootNode, String inclusiveNamespaces, CanonicalFilter filter) + throws CanonicalizationException; - /** - * Sets the writer where the canonicalization ends. ByteArrayOutputStream if - * none is set. - * @param os - */ - public abstract void setWriter(OutputStream os); + /** + * Sets the writer where the canonicalization ends. ByteArrayOutputStream if + * none is set. + * @param os + */ + public abstract void setWriter(OutputStream os); } diff --git a/ext/java/nokogiri/internals/c14n/Constants.java b/ext/java/nokogiri/internals/c14n/Constants.java index 815b78cadf..648c7f83cf 100644 --- a/ext/java/nokogiri/internals/c14n/Constants.java +++ b/ext/java/nokogiri/internals/c14n/Constants.java @@ -27,16 +27,19 @@ * * @author $Author: coheigea $ */ -public class Constants { - - /** The URI for XML spec*/ - public static final String XML_LANG_SPACE_SpecNS = "http://www.w3.org/XML/1998/namespace"; - - /** The URI for XMLNS spec*/ - public static final String NamespaceSpecNS = "http://www.w3.org/2000/xmlns/"; +public class Constants +{ - private Constants() { - // we don't allow instantiation - } + /** The URI for XML spec*/ + public static final String XML_LANG_SPACE_SpecNS = "http://www.w3.org/XML/1998/namespace"; + + /** The URI for XMLNS spec*/ + public static final String NamespaceSpecNS = "http://www.w3.org/2000/xmlns/"; + + private + Constants() + { + // we don't allow instantiation + } } diff --git a/ext/java/nokogiri/internals/c14n/ElementProxy.java b/ext/java/nokogiri/internals/c14n/ElementProxy.java index 54832f0583..62cb077b77 100644 --- a/ext/java/nokogiri/internals/c14n/ElementProxy.java +++ b/ext/java/nokogiri/internals/c14n/ElementProxy.java @@ -32,262 +32,294 @@ * This is the base class to all Objects which have a direct 1:1 mapping to an * Element in a particular namespace. */ -public abstract class ElementProxy { - - /** Field constructionElement */ - protected Element constructionElement = null; - - /** Field baseURI */ - protected String baseURI = null; - - /** Field doc */ - protected Document doc = null; - - /** Field prefixMappings */ - private static Map prefixMappings = new ConcurrentHashMap(); - - /** - * Constructor ElementProxy - * - */ - public ElementProxy() { +public abstract class ElementProxy +{ + + /** Field constructionElement */ + protected Element constructionElement = null; + + /** Field baseURI */ + protected String baseURI = null; + + /** Field doc */ + protected Document doc = null; + + /** Field prefixMappings */ + private static Map prefixMappings = new ConcurrentHashMap(); + + /** + * Constructor ElementProxy + * + */ + public + ElementProxy() + { + } + + /** + * Constructor ElementProxy + * + * @param doc + */ + public + ElementProxy(Document doc) + { + if (doc == null) { + throw new RuntimeException("Document is null"); } - /** - * Constructor ElementProxy - * - * @param doc - */ - public ElementProxy(Document doc) { - if (doc == null) { - throw new RuntimeException("Document is null"); - } - - this.doc = doc; - this.constructionElement = - createElementForFamilyLocal(this.doc, this.getBaseNamespace(), this.getBaseLocalName()); + this.doc = doc; + this.constructionElement = + createElementForFamilyLocal(this.doc, this.getBaseNamespace(), this.getBaseLocalName()); + } + + /** + * Constructor ElementProxy + * + * @param element + * @param BaseURI + * @throws XMLSecurityException + */ + public + ElementProxy(Element element, String BaseURI) throws CanonicalizationException + { + if (element == null) { + throw new CanonicalizationException("ElementProxy.nullElement"); } - - /** - * Constructor ElementProxy - * - * @param element - * @param BaseURI - * @throws XMLSecurityException - */ - public ElementProxy(Element element, String BaseURI) throws CanonicalizationException { - if (element == null) { - throw new CanonicalizationException("ElementProxy.nullElement"); - } - - //if (System.getProperty("nokogiri.c14.debug") == "on") { - // System.out.println("setElement(\"" + element.getTagName() + "\", \"" + BaseURI + "\")"); - //} - - this.doc = element.getOwnerDocument(); - this.constructionElement = element; - this.baseURI = BaseURI; - - this.guaranteeThatElementInCorrectSpace(); - } - - /** - * Returns the namespace of the Elements of the sub-class. - * - * @return the namespace of the Elements of the sub-class. - */ - public abstract String getBaseNamespace(); - - /** - * Returns the localname of the Elements of the sub-class. - * - * @return the localname of the Elements of the sub-class. - */ - public abstract String getBaseLocalName(); - - - protected Element createElementForFamilyLocal( - Document doc, String namespace, String localName - ) { - Element result; - if (namespace == null) { - result = doc.createElementNS(null, localName); - } else { - String baseName = this.getBaseNamespace(); - String prefix = ElementProxy.getDefaultPrefix(baseName); - if ((prefix == null) || (prefix.length() == 0)) { - result = doc.createElementNS(namespace, localName); - result.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", namespace); - } else { - result = doc.createElementNS(namespace, prefix + ":" + localName); - result.setAttributeNS(Constants.NamespaceSpecNS, "xmlns:" + prefix, namespace); - } - } - return result; - } - - /** - * This method creates an Element in a given namespace with a given localname. - * It uses the {@link ElementProxy#getDefaultPrefix} method to decide whether - * a particular prefix is bound to that namespace. - *
    - * This method was refactored out of the constructor. - * - * @param doc - * @param namespace - * @param localName - * @return The element created. - */ - public static Element createElementForFamily(Document doc, String namespace, String localName) { - Element result; - String prefix = ElementProxy.getDefaultPrefix(namespace); - - if (namespace == null) { - result = doc.createElementNS(null, localName); - } else { - if ((prefix == null) || (prefix.length() == 0)) { - result = doc.createElementNS(namespace, localName); - result.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", namespace); - } else { - result = doc.createElementNS(namespace, prefix + ":" + localName); - result.setAttributeNS(Constants.NamespaceSpecNS, "xmlns:" + prefix, namespace); - } - } - - return result; + //if (System.getProperty("nokogiri.c14.debug") == "on") { + // System.out.println("setElement(\"" + element.getTagName() + "\", \"" + BaseURI + "\")"); + //} + + this.doc = element.getOwnerDocument(); + this.constructionElement = element; + this.baseURI = BaseURI; + + this.guaranteeThatElementInCorrectSpace(); + } + + /** + * Returns the namespace of the Elements of the sub-class. + * + * @return the namespace of the Elements of the sub-class. + */ + public abstract String getBaseNamespace(); + + /** + * Returns the localname of the Elements of the sub-class. + * + * @return the localname of the Elements of the sub-class. + */ + public abstract String getBaseLocalName(); + + + protected Element + createElementForFamilyLocal( + Document doc, String namespace, String localName + ) + { + Element result; + if (namespace == null) { + result = doc.createElementNS(null, localName); + } else { + String baseName = this.getBaseNamespace(); + String prefix = ElementProxy.getDefaultPrefix(baseName); + if ((prefix == null) || (prefix.length() == 0)) { + result = doc.createElementNS(namespace, localName); + result.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", namespace); + } else { + result = doc.createElementNS(namespace, prefix + ":" + localName); + result.setAttributeNS(Constants.NamespaceSpecNS, "xmlns:" + prefix, namespace); + } } - - /** - * Returns the Element which was constructed by the Object. - * - * @return the Element which was constructed by the Object. - */ - public final Element getElement() { - return this.constructionElement; + return result; + } + + + /** + * This method creates an Element in a given namespace with a given localname. + * It uses the {@link ElementProxy#getDefaultPrefix} method to decide whether + * a particular prefix is bound to that namespace. + *
    + * This method was refactored out of the constructor. + * + * @param doc + * @param namespace + * @param localName + * @return The element created. + */ + public static Element + createElementForFamily(Document doc, String namespace, String localName) + { + Element result; + String prefix = ElementProxy.getDefaultPrefix(namespace); + + if (namespace == null) { + result = doc.createElementNS(null, localName); + } else { + if ((prefix == null) || (prefix.length() == 0)) { + result = doc.createElementNS(namespace, localName); + result.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", namespace); + } else { + result = doc.createElementNS(namespace, prefix + ":" + localName); + result.setAttributeNS(Constants.NamespaceSpecNS, "xmlns:" + prefix, namespace); + } } - /** - * Returns the Element plus a leading and a trailing CarriageReturn Text node. - * - * @return the Element which was constructed by the Object. - */ - public final NodeList getElementPlusReturns() { - - HelperNodeList nl = new HelperNodeList(); - - nl.appendChild(this.doc.createTextNode("\n")); - nl.appendChild(this.getElement()); - nl.appendChild(this.doc.createTextNode("\n")); - - return nl; + return result; + } + + /** + * Returns the Element which was constructed by the Object. + * + * @return the Element which was constructed by the Object. + */ + public final Element + getElement() + { + return this.constructionElement; + } + + /** + * Returns the Element plus a leading and a trailing CarriageReturn Text node. + * + * @return the Element which was constructed by the Object. + */ + public final NodeList + getElementPlusReturns() + { + + HelperNodeList nl = new HelperNodeList(); + + nl.appendChild(this.doc.createTextNode("\n")); + nl.appendChild(this.getElement()); + nl.appendChild(this.doc.createTextNode("\n")); + + return nl; + } + + /** + * Method getDocument + * + * @return the Document where this element is contained. + */ + public Document + getDocument() + { + return this.doc; + } + + /** + * Method getBaseURI + * + * @return the base uri of the namespace of this element + */ + public String + getBaseURI() + { + return this.baseURI; + } + + /** + * Method guaranteeThatElementInCorrectSpace + * + * @throws XMLSecurityException + */ + void + guaranteeThatElementInCorrectSpace() throws CanonicalizationException + { + + String expectedLocalName = this.getBaseLocalName(); + String expectedNamespaceUri = this.getBaseNamespace(); + + String actualLocalName = this.constructionElement.getLocalName(); + String actualNamespaceUri = this.constructionElement.getNamespaceURI(); + + if (!expectedNamespaceUri.equals(actualNamespaceUri) + && !expectedLocalName.equals(actualLocalName)) { + Object exArgs[] = { actualNamespaceUri + ":" + actualLocalName, + expectedNamespaceUri + ":" + expectedLocalName + }; + throw new CanonicalizationException("xml.WrongElement", exArgs); } - - /** - * Method getDocument - * - * @return the Document where this element is contained. - */ - public Document getDocument() { - return this.doc; + } + + /** + * Method addText + * + * @param text + */ + public void + addText(String text) + { + if (text != null) { + Text t = this.doc.createTextNode(text); + + this.constructionElement.appendChild(t); } - - /** - * Method getBaseURI - * - * @return the base uri of the namespace of this element - */ - public String getBaseURI() { - return this.baseURI; - } - - /** - * Method guaranteeThatElementInCorrectSpace - * - * @throws XMLSecurityException - */ - void guaranteeThatElementInCorrectSpace() throws CanonicalizationException { - - String expectedLocalName = this.getBaseLocalName(); - String expectedNamespaceUri = this.getBaseNamespace(); - - String actualLocalName = this.constructionElement.getLocalName(); - String actualNamespaceUri = this.constructionElement.getNamespaceURI(); - - if(!expectedNamespaceUri.equals(actualNamespaceUri) - && !expectedLocalName.equals(actualLocalName)) { - Object exArgs[] = { actualNamespaceUri + ":" + actualLocalName, - expectedNamespaceUri + ":" + expectedLocalName}; - throw new CanonicalizationException("xml.WrongElement", exArgs); - } - } - - /** - * Method addText - * - * @param text - */ - public void addText(String text) { - if (text != null) { - Text t = this.doc.createTextNode(text); - - this.constructionElement.appendChild(t); - } - } - - /** - * Method getTextFromChildElement - * - * @param localname - * @param namespace - * @return the Text of the textNode - */ - public String getTextFromChildElement(String localname, String namespace) { - return XMLUtils.selectNode( - this.constructionElement.getFirstChild(), - namespace, - localname, - 0).getTextContent(); - } - - /** - * Method getTextFromTextChild - * - * @return the Text obtained by concatenating all the text nodes of this - * element - */ - public String getTextFromTextChild() { - return XMLUtils.getFullTextChildrenFromElement(this.constructionElement); - } - - /** - * Method length - * - * @param namespace - * @param localname - * @return the number of elements {namespace}:localname under this element - */ - public int length(String namespace, String localname) { - int number = 0; - Node sibling = this.constructionElement.getFirstChild(); - while (sibling != null) { - if (localname.equals(sibling.getLocalName()) - && namespace.equals(sibling.getNamespaceURI())) { - number++; - } - sibling = sibling.getNextSibling(); - } - return number; - } - - /** - * Method getDefaultPrefix - * - * @param namespace - * @return the default prefix bind to this element. - */ - public static String getDefaultPrefix(String namespace) { - return prefixMappings.get(namespace); + } + + /** + * Method getTextFromChildElement + * + * @param localname + * @param namespace + * @return the Text of the textNode + */ + public String + getTextFromChildElement(String localname, String namespace) + { + return XMLUtils.selectNode( + this.constructionElement.getFirstChild(), + namespace, + localname, + 0).getTextContent(); + } + + /** + * Method getTextFromTextChild + * + * @return the Text obtained by concatenating all the text nodes of this + * element + */ + public String + getTextFromTextChild() + { + return XMLUtils.getFullTextChildrenFromElement(this.constructionElement); + } + + /** + * Method length + * + * @param namespace + * @param localname + * @return the number of elements {namespace}:localname under this element + */ + public int + length(String namespace, String localname) + { + int number = 0; + Node sibling = this.constructionElement.getFirstChild(); + while (sibling != null) { + if (localname.equals(sibling.getLocalName()) + && namespace.equals(sibling.getNamespaceURI())) { + number++; + } + sibling = sibling.getNextSibling(); } + return number; + } + + /** + * Method getDefaultPrefix + * + * @param namespace + * @return the default prefix bind to this element. + */ + public static String + getDefaultPrefix(String namespace) + { + return prefixMappings.get(namespace); + } } diff --git a/ext/java/nokogiri/internals/c14n/HelperNodeList.java b/ext/java/nokogiri/internals/c14n/HelperNodeList.java index e6cc0b9504..5e84282ff5 100644 --- a/ext/java/nokogiri/internals/c14n/HelperNodeList.java +++ b/ext/java/nokogiri/internals/c14n/HelperNodeList.java @@ -27,67 +27,80 @@ /** * @author Christian Geuer-Pollmann */ -public class HelperNodeList implements NodeList { +public class HelperNodeList implements NodeList +{ - /** Field nodes */ - List nodes = new ArrayList(); - boolean allNodesMustHaveSameParent = false; + /** Field nodes */ + List nodes = new ArrayList(); + boolean allNodesMustHaveSameParent = false; - /** - * - */ - public HelperNodeList() { - this(false); - } + /** + * + */ + public + HelperNodeList() + { + this(false); + } - /** - * @param allNodesMustHaveSameParent - */ - public HelperNodeList(boolean allNodesMustHaveSameParent) { - this.allNodesMustHaveSameParent = allNodesMustHaveSameParent; - } + /** + * @param allNodesMustHaveSameParent + */ + public + HelperNodeList(boolean allNodesMustHaveSameParent) + { + this.allNodesMustHaveSameParent = allNodesMustHaveSameParent; + } - /** - * Method item - * - * @param index - * @return node with index i - */ - public Node item(int index) { - return nodes.get(index); - } + /** + * Method item + * + * @param index + * @return node with index i + */ + public Node + item(int index) + { + return nodes.get(index); + } - /** - * Method getLength - * - * @return length of the list - */ - public int getLength() { - return nodes.size(); - } + /** + * Method getLength + * + * @return length of the list + */ + public int + getLength() + { + return nodes.size(); + } - /** - * Method appendChild - * - * @param node - * @throws IllegalArgumentException - */ - public void appendChild(Node node) throws IllegalArgumentException { - if (this.allNodesMustHaveSameParent && this.getLength() > 0 - && this.item(0).getParentNode() != node.getParentNode()) { - throw new IllegalArgumentException("Nodes have not the same Parent"); - } - nodes.add(node); + /** + * Method appendChild + * + * @param node + * @throws IllegalArgumentException + */ + public void + appendChild(Node node) throws IllegalArgumentException + { + if (this.allNodesMustHaveSameParent && this.getLength() > 0 + && this.item(0).getParentNode() != node.getParentNode()) { + throw new IllegalArgumentException("Nodes have not the same Parent"); } + nodes.add(node); + } - /** - * @return the document that contains this nodelist - */ - public Document getOwnerDocument() { - if (this.getLength() == 0) { - return null; - } - return XMLUtils.getOwnerDocument(this.item(0)); + /** + * @return the document that contains this nodelist + */ + public Document + getOwnerDocument() + { + if (this.getLength() == 0) { + return null; } + return XMLUtils.getOwnerDocument(this.item(0)); + } } diff --git a/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java b/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java index 343893e406..484dd95bae 100644 --- a/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +++ b/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java @@ -28,52 +28,59 @@ * * @author Christian Geuer-Pollmann */ -public class IgnoreAllErrorHandler implements ErrorHandler { +public class IgnoreAllErrorHandler implements ErrorHandler +{ - /** Field throwExceptions */ - private static final boolean warnOnExceptions = - System.getProperty("org.apache.xml.security.test.warn.on.exceptions", "false").equals("true"); + /** Field throwExceptions */ + private static final boolean warnOnExceptions = + System.getProperty("org.apache.xml.security.test.warn.on.exceptions", "false").equals("true"); - /** Field throwExceptions */ - private static final boolean throwExceptions = - System.getProperty("org.apache.xml.security.test.throw.exceptions", "false").equals("true"); + /** Field throwExceptions */ + private static final boolean throwExceptions = + System.getProperty("org.apache.xml.security.test.throw.exceptions", "false").equals("true"); - /** @inheritDoc */ - public void warning(SAXParseException ex) throws SAXException { - if (IgnoreAllErrorHandler.warnOnExceptions) { - // TODO - // get handler from upper layer - //log.warn("", ex); - } - if (IgnoreAllErrorHandler.throwExceptions) { - throw ex; - } + /** @inheritDoc */ + public void + warning(SAXParseException ex) throws SAXException + { + if (IgnoreAllErrorHandler.warnOnExceptions) { + // TODO + // get handler from upper layer + //log.warn("", ex); } + if (IgnoreAllErrorHandler.throwExceptions) { + throw ex; + } + } - /** @inheritDoc */ - public void error(SAXParseException ex) throws SAXException { - if (IgnoreAllErrorHandler.warnOnExceptions) { - // TODO - // get handler from upper layer - //log.error("", ex); - } - if (IgnoreAllErrorHandler.throwExceptions) { - throw ex; - } + /** @inheritDoc */ + public void + error(SAXParseException ex) throws SAXException + { + if (IgnoreAllErrorHandler.warnOnExceptions) { + // TODO + // get handler from upper layer + //log.error("", ex); + } + if (IgnoreAllErrorHandler.throwExceptions) { + throw ex; } + } - /** @inheritDoc */ - public void fatalError(SAXParseException ex) throws SAXException { - if (IgnoreAllErrorHandler.warnOnExceptions) { - // TODO - // get handler from upper layer - //log.warn("", ex); - } - if (IgnoreAllErrorHandler.throwExceptions) { - throw ex; - } + /** @inheritDoc */ + public void + fatalError(SAXParseException ex) throws SAXException + { + if (IgnoreAllErrorHandler.warnOnExceptions) { + // TODO + // get handler from upper layer + //log.warn("", ex); + } + if (IgnoreAllErrorHandler.throwExceptions) { + throw ex; } + } } diff --git a/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java b/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java index 467b62f8e2..c0a7c8a81e 100644 --- a/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +++ b/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java @@ -36,131 +36,146 @@ * @author Christian Geuer-Pollmann */ //public class InclusiveNamespaces extends ElementProxy implements TransformParam { -public class InclusiveNamespaces extends ElementProxy { - - /** Field _TAG_EC_INCLUSIVENAMESPACES */ - public static final String _TAG_EC_INCLUSIVENAMESPACES = - "InclusiveNamespaces"; - - /** Field _ATT_EC_PREFIXLIST */ - public static final String _ATT_EC_PREFIXLIST = "PrefixList"; - - /** Field ExclusiveCanonicalizationNamespace */ - public static final String ExclusiveCanonicalizationNamespace = - "http://www.w3.org/2001/10/xml-exc-c14n#"; - - /** - * Constructor XPathContainer - * - * @param doc - * @param prefixList - */ - public InclusiveNamespaces(Document doc, String prefixList) { - this(doc, InclusiveNamespaces.prefixStr2Set(prefixList)); +public class InclusiveNamespaces extends ElementProxy +{ + + /** Field _TAG_EC_INCLUSIVENAMESPACES */ + public static final String _TAG_EC_INCLUSIVENAMESPACES = + "InclusiveNamespaces"; + + /** Field _ATT_EC_PREFIXLIST */ + public static final String _ATT_EC_PREFIXLIST = "PrefixList"; + + /** Field ExclusiveCanonicalizationNamespace */ + public static final String ExclusiveCanonicalizationNamespace = + "http://www.w3.org/2001/10/xml-exc-c14n#"; + + /** + * Constructor XPathContainer + * + * @param doc + * @param prefixList + */ + public + InclusiveNamespaces(Document doc, String prefixList) + { + this(doc, InclusiveNamespaces.prefixStr2Set(prefixList)); + } + + /** + * Constructor InclusiveNamespaces + * + * @param doc + * @param prefixes + */ + public + InclusiveNamespaces(Document doc, Set prefixes) + { + super(doc); + + SortedSet prefixList; + if (prefixes instanceof SortedSet) { + prefixList = (SortedSet)prefixes; + } else { + prefixList = new TreeSet(prefixes); } - /** - * Constructor InclusiveNamespaces - * - * @param doc - * @param prefixes - */ - public InclusiveNamespaces(Document doc, Set prefixes) { - super(doc); - - SortedSet prefixList; - if (prefixes instanceof SortedSet) { - prefixList = (SortedSet)prefixes; - } else { - prefixList = new TreeSet(prefixes); - } - - StringBuilder sb = new StringBuilder(prefixList.size() * 8); - for (String prefix : prefixList) { - if (prefix.equals("xmlns")) { - sb.append("#default "); - } else { - sb.append(prefix).append(' '); - } - } - int last = sb.length() - 1; - while (last >= 0 && sb.charAt(last) == ' ') sb.setLength(last--); // trim - - this.constructionElement.setAttributeNS(null, InclusiveNamespaces._ATT_EC_PREFIXLIST, sb.toString()); + StringBuilder sb = new StringBuilder(prefixList.size() * 8); + for (String prefix : prefixList) { + if (prefix.equals("xmlns")) { + sb.append("#default "); + } else { + sb.append(prefix).append(' '); + } } - - /** - * Constructor InclusiveNamespaces - * - * @param element - * @param BaseURI - * @throws XMLSecurityException - */ - public InclusiveNamespaces(Element element, String BaseURI) - throws CanonicalizationException { - super(element, BaseURI); + int last = sb.length() - 1; + while (last >= 0 && sb.charAt(last) == ' ') { sb.setLength(last--); } // trim + + this.constructionElement.setAttributeNS(null, InclusiveNamespaces._ATT_EC_PREFIXLIST, sb.toString()); + } + + /** + * Constructor InclusiveNamespaces + * + * @param element + * @param BaseURI + * @throws XMLSecurityException + */ + public + InclusiveNamespaces(Element element, String BaseURI) + throws CanonicalizationException + { + super(element, BaseURI); + } + + /** + * Method getInclusiveNamespaces + * + * @return The Inclusive Namespace string + */ + public String + getInclusiveNamespaces() + { + return this.constructionElement.getAttributeNS(null, InclusiveNamespaces._ATT_EC_PREFIXLIST); + } + + /** + * Decodes the inclusiveNamespaces String and returns all + * selected namespace prefixes as a Set. The #default + * namespace token is represented as an empty namespace prefix + * ("xmlns"). + *
    + * The String inclusiveNamespaces=" xenc ds #default" + * is returned as a Set containing the following Strings: + *
      + *
    • xmlns
    • + *
    • xenc
    • + *
    • ds
    • + *
    + * + * @param inclusiveNamespaces + * @return A set to string + */ + public static SortedSet + prefixStr2Set(String inclusiveNamespaces) + { + SortedSet prefixes = new TreeSet(); + + if ((inclusiveNamespaces == null) || (inclusiveNamespaces.length() == 0)) { + return prefixes; } - /** - * Method getInclusiveNamespaces - * - * @return The Inclusive Namespace string - */ - public String getInclusiveNamespaces() { - return this.constructionElement.getAttributeNS(null, InclusiveNamespaces._ATT_EC_PREFIXLIST); - } - - /** - * Decodes the inclusiveNamespaces String and returns all - * selected namespace prefixes as a Set. The #default - * namespace token is represented as an empty namespace prefix - * ("xmlns"). - *
    - * The String inclusiveNamespaces=" xenc ds #default" - * is returned as a Set containing the following Strings: - *
      - *
    • xmlns
    • - *
    • xenc
    • - *
    • ds
    • - *
    - * - * @param inclusiveNamespaces - * @return A set to string - */ - public static SortedSet prefixStr2Set(String inclusiveNamespaces) { - SortedSet prefixes = new TreeSet(); - - if ((inclusiveNamespaces == null) || (inclusiveNamespaces.length() == 0)) { - return prefixes; - } - - String[] tokens = inclusiveNamespaces.split("\\s"); - for (String prefix : tokens) { - if (prefix.equals("#default")) { - prefixes.add("xmlns"); - } else { - prefixes.add(prefix); - } - } - - return prefixes; + String[] tokens = inclusiveNamespaces.split("\\s"); + for (String prefix : tokens) { + if (prefix.equals("#default")) { + prefixes.add("xmlns"); + } else { + prefixes.add(prefix); + } } - /** - * Method getBaseNamespace - * - * @inheritDoc - */ - public String getBaseNamespace() { - return InclusiveNamespaces.ExclusiveCanonicalizationNamespace; - } - - /** - * Method getBaseLocalName - * - * @inheritDoc - */ - public String getBaseLocalName() { - return InclusiveNamespaces._TAG_EC_INCLUSIVENAMESPACES; - } + return prefixes; + } + + /** + * Method getBaseNamespace + * + * @inheritDoc + */ + public String + getBaseNamespace() + { + return InclusiveNamespaces.ExclusiveCanonicalizationNamespace; + } + + /** + * Method getBaseLocalName + * + * @inheritDoc + */ + public String + getBaseLocalName() + { + return InclusiveNamespaces._TAG_EC_INCLUSIVENAMESPACES; + } } diff --git a/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java b/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java index d01ad6995b..210d9bd1fb 100644 --- a/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +++ b/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java @@ -19,58 +19,69 @@ package nokogiri.internals.c14n; -public class InvalidCanonicalizerException extends Exception { +public class InvalidCanonicalizerException extends Exception +{ - /** - * - */ - private static final long serialVersionUID = 1L; + /** + * + */ + private static final long serialVersionUID = 1L; - /** - * Constructor InvalidCanonicalizerException - * - */ - public InvalidCanonicalizerException() { - super(); - } + /** + * Constructor InvalidCanonicalizerException + * + */ + public + InvalidCanonicalizerException() + { + super(); + } - /** - * Constructor InvalidCanonicalizerException - * - * @param message - */ - public InvalidCanonicalizerException(String message) { - super(message); - } + /** + * Constructor InvalidCanonicalizerException + * + * @param message + */ + public + InvalidCanonicalizerException(String message) + { + super(message); + } - /** - * Constructor InvalidCanonicalizerException - * - * @param message - * @param exArgs - */ - public InvalidCanonicalizerException(String message, Object... exArgs) { - super(C14nHelper.getErrorMessage(message, exArgs)); - } + /** + * Constructor InvalidCanonicalizerException + * + * @param message + * @param exArgs + */ + public + InvalidCanonicalizerException(String message, Object... exArgs) + { + super(C14nHelper.getErrorMessage(message, exArgs)); + } - /** - * Constructor InvalidCanonicalizerException - * - * @param msgID - * @param originalException - */ - public InvalidCanonicalizerException(String message, Exception rootCause) { - super(message, rootCause); - } + /** + * Constructor InvalidCanonicalizerException + * + * @param msgID + * @param originalException + */ + public + InvalidCanonicalizerException(String message, Exception rootCause) + { + super(message, rootCause); + } - /** - * Constructor InvalidCanonicalizerException - * - * @param msgID - * @param exArgs - * @param originalException - */ - public InvalidCanonicalizerException(String message, Exception rootCause, Object... exArgs) { - super(C14nHelper.getErrorMessage(message, exArgs), rootCause); - } + /** + * Constructor InvalidCanonicalizerException + * + * @param msgID + * @param exArgs + * @param originalException + */ + public + InvalidCanonicalizerException(String message, Exception rootCause, Object... exArgs) + { + super(C14nHelper.getErrorMessage(message, exArgs), rootCause); + } } diff --git a/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java b/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java index 0912a2ed4a..3e1dc9a4a7 100644 --- a/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +++ b/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java @@ -31,372 +31,422 @@ * and at the same time in a list so it can be removed when the frame is pop back. * @author Raul Benito */ -public class NameSpaceSymbTable { - - private static final String XMLNS = "xmlns"; - private static final SymbMap initialMap = new SymbMap(); - - static { - NameSpaceSymbEntry ne = new NameSpaceSymbEntry("", null, true, XMLNS); - ne.lastrendered = ""; - initialMap.put(XMLNS, ne); - } - - /**The map betwen prefix-> entry table. */ - private SymbMap symb; - - /**The stacks for removing the definitions when doing pop.*/ - private List level; - private boolean cloned = true; - - /** - * Default constractor - **/ - public NameSpaceSymbTable() { - level = new ArrayList(); - //Insert the default binding for xmlns. - symb = (SymbMap) initialMap.clone(); - } +public class NameSpaceSymbTable +{ + + private static final String XMLNS = "xmlns"; + private static final SymbMap initialMap = new SymbMap(); + + static + { + NameSpaceSymbEntry ne = new NameSpaceSymbEntry("", null, true, XMLNS); + ne.lastrendered = ""; + initialMap.put(XMLNS, ne); + } + + /**The map betwen prefix-> entry table. */ + private SymbMap symb; + + /**The stacks for removing the definitions when doing pop.*/ + private List level; + private boolean cloned = true; + + /** + * Default constractor + **/ + public + NameSpaceSymbTable() + { + level = new ArrayList(); + //Insert the default binding for xmlns. + symb = (SymbMap) initialMap.clone(); + } + + /** + * Get all the unrendered nodes in the name space. + * For Inclusive rendering + * @param result the list where to fill the unrendered xmlns definitions. + **/ + public void + getUnrenderedNodes(Collection result) + { + Iterator it = symb.entrySet().iterator(); + while (it.hasNext()) { + NameSpaceSymbEntry n = it.next(); + //put them rendered? + if ((!n.rendered) && (n.n != null)) { + n = (NameSpaceSymbEntry) n.clone(); + needsClone(); + symb.put(n.prefix, n); + n.lastrendered = n.uri; + n.rendered = true; - /** - * Get all the unrendered nodes in the name space. - * For Inclusive rendering - * @param result the list where to fill the unrendered xmlns definitions. - **/ - public void getUnrenderedNodes(Collection result) { - Iterator it = symb.entrySet().iterator(); - while (it.hasNext()) { - NameSpaceSymbEntry n = it.next(); - //put them rendered? - if ((!n.rendered) && (n.n != null)) { - n = (NameSpaceSymbEntry) n.clone(); - needsClone(); - symb.put(n.prefix, n); - n.lastrendered = n.uri; - n.rendered = true; - - result.add(n.n); - } - } + result.add(n.n); + } } - - /** - * Push a frame for visible namespace. - * For Inclusive rendering. - **/ - public void outputNodePush() { - push(); + } + + /** + * Push a frame for visible namespace. + * For Inclusive rendering. + **/ + public void + outputNodePush() + { + push(); + } + + /** + * Pop a frame for visible namespace. + **/ + public void + outputNodePop() + { + pop(); + } + + /** + * Push a frame for a node. + * Inclusive or Exclusive. + **/ + public void + push() + { + //Put the number of namespace definitions in the stack. + level.add(null); + cloned = false; + } + + /** + * Pop a frame. + * Inclusive or Exclusive. + **/ + public void + pop() + { + int size = level.size() - 1; + Object ob = level.remove(size); + if (ob != null) { + symb = (SymbMap)ob; + if (size == 0) { + cloned = false; + } else { + cloned = (level.get(size - 1) != symb); + } + } else { + cloned = false; } - - /** - * Pop a frame for visible namespace. - **/ - public void outputNodePop() { - pop(); + } + + final void + needsClone() + { + if (!cloned) { + level.set(level.size() - 1, symb); + symb = (SymbMap) symb.clone(); + cloned = true; } - - /** - * Push a frame for a node. - * Inclusive or Exclusive. - **/ - public void push() { - //Put the number of namespace definitions in the stack. - level.add(null); - cloned = false; + } + + + /** + * Gets the attribute node that defines the binding for the prefix. + * @param prefix the prefix to obtain the attribute. + * @return null if there is no need to render the prefix. Otherwise the node of + * definition. + **/ + public Attr + getMapping(String prefix) + { + NameSpaceSymbEntry entry = symb.get(prefix); + if (entry == null) { + //There is no definition for the prefix(a bug?). + return null; } - - /** - * Pop a frame. - * Inclusive or Exclusive. - **/ - public void pop() { - int size = level.size() - 1; - Object ob = level.remove(size); - if (ob != null) { - symb = (SymbMap)ob; - if (size == 0) { - cloned = false; - } else { - cloned = (level.get(size - 1) != symb); - } - } else { - cloned = false; - } + if (entry.rendered) { + //No need to render an entry already rendered. + return null; } - - final void needsClone() { - if (!cloned) { - level.set(level.size() - 1, symb); - symb = (SymbMap) symb.clone(); - cloned = true; - } + // Mark this entry as render. + entry = (NameSpaceSymbEntry) entry.clone(); + needsClone(); + symb.put(prefix, entry); + entry.rendered = true; + entry.lastrendered = entry.uri; + // Return the node for outputing. + return entry.n; + } + + /** + * Gets a definition without mark it as render. + * For render in exclusive c14n the namespaces in the include prefixes. + * @param prefix The prefix whose definition is neaded. + * @return the attr to render, null if there is no need to render + **/ + public Attr + getMappingWithoutRendered(String prefix) + { + NameSpaceSymbEntry entry = symb.get(prefix); + if (entry == null) { + return null; } - - - /** - * Gets the attribute node that defines the binding for the prefix. - * @param prefix the prefix to obtain the attribute. - * @return null if there is no need to render the prefix. Otherwise the node of - * definition. - **/ - public Attr getMapping(String prefix) { - NameSpaceSymbEntry entry = symb.get(prefix); - if (entry == null) { - //There is no definition for the prefix(a bug?). - return null; - } - if (entry.rendered) { - //No need to render an entry already rendered. - return null; - } - // Mark this entry as render. - entry = (NameSpaceSymbEntry) entry.clone(); - needsClone(); - symb.put(prefix, entry); - entry.rendered = true; - entry.lastrendered = entry.uri; - // Return the node for outputing. - return entry.n; + if (entry.rendered) { + return null; } - - /** - * Gets a definition without mark it as render. - * For render in exclusive c14n the namespaces in the include prefixes. - * @param prefix The prefix whose definition is neaded. - * @return the attr to render, null if there is no need to render - **/ - public Attr getMappingWithoutRendered(String prefix) { - NameSpaceSymbEntry entry = symb.get(prefix); - if (entry == null) { - return null; - } - if (entry.rendered) { - return null; - } - return entry.n; + return entry.n; + } + + /** + * Adds the mapping for a prefix. + * @param prefix the prefix of definition + * @param uri the Uri of the definition + * @param n the attribute that have the definition + * @return true if there is already defined. + **/ + public boolean + addMapping(String prefix, String uri, Attr n) + { + NameSpaceSymbEntry ob = symb.get(prefix); + if ((ob != null) && uri.equals(ob.uri)) { + //If we have it previously defined. Don't keep working. + return false; } - - /** - * Adds the mapping for a prefix. - * @param prefix the prefix of definition - * @param uri the Uri of the definition - * @param n the attribute that have the definition - * @return true if there is already defined. - **/ - public boolean addMapping(String prefix, String uri, Attr n) { - NameSpaceSymbEntry ob = symb.get(prefix); - if ((ob != null) && uri.equals(ob.uri)) { - //If we have it previously defined. Don't keep working. - return false; - } - //Creates and entry in the table for this new definition. - NameSpaceSymbEntry ne = new NameSpaceSymbEntry(uri, n, false, prefix); - needsClone(); - symb.put(prefix, ne); - if (ob != null) { - //We have a previous definition store it for the pop. - //Check if a previous definition(not the inmidiatly one) has been rendered. - ne.lastrendered = ob.lastrendered; - if ((ob.lastrendered != null) && (ob.lastrendered.equals(uri))) { - //Yes it is. Mark as rendered. - ne.rendered = true; - } - } - return true; + //Creates and entry in the table for this new definition. + NameSpaceSymbEntry ne = new NameSpaceSymbEntry(uri, n, false, prefix); + needsClone(); + symb.put(prefix, ne); + if (ob != null) { + //We have a previous definition store it for the pop. + //Check if a previous definition(not the inmidiatly one) has been rendered. + ne.lastrendered = ob.lastrendered; + if ((ob.lastrendered != null) && (ob.lastrendered.equals(uri))) { + //Yes it is. Mark as rendered. + ne.rendered = true; + } } - - /** - * Adds a definition and mark it as render. - * For inclusive c14n. - * @param prefix the prefix of definition - * @param uri the Uri of the definition - * @param n the attribute that have the definition - * @return the attr to render, null if there is no need to render - **/ - public Node addMappingAndRender(String prefix, String uri, Attr n) { - NameSpaceSymbEntry ob = symb.get(prefix); - - if ((ob != null) && uri.equals(ob.uri)) { - if (!ob.rendered) { - ob = (NameSpaceSymbEntry) ob.clone(); - needsClone(); - symb.put(prefix, ob); - ob.lastrendered = uri; - ob.rendered = true; - return ob.n; - } - return null; - } - - NameSpaceSymbEntry ne = new NameSpaceSymbEntry(uri,n,true,prefix); - ne.lastrendered = uri; + return true; + } + + /** + * Adds a definition and mark it as render. + * For inclusive c14n. + * @param prefix the prefix of definition + * @param uri the Uri of the definition + * @param n the attribute that have the definition + * @return the attr to render, null if there is no need to render + **/ + public Node + addMappingAndRender(String prefix, String uri, Attr n) + { + NameSpaceSymbEntry ob = symb.get(prefix); + + if ((ob != null) && uri.equals(ob.uri)) { + if (!ob.rendered) { + ob = (NameSpaceSymbEntry) ob.clone(); needsClone(); - symb.put(prefix, ne); - if ((ob != null) && (ob.lastrendered != null) && (ob.lastrendered.equals(uri))) { - ne.rendered = true; - return null; - } - return ne.n; + symb.put(prefix, ob); + ob.lastrendered = uri; + ob.rendered = true; + return ob.n; + } + return null; } - public int getLevel() { - return level.size(); + NameSpaceSymbEntry ne = new NameSpaceSymbEntry(uri, n, true, prefix); + ne.lastrendered = uri; + needsClone(); + symb.put(prefix, ne); + if ((ob != null) && (ob.lastrendered != null) && (ob.lastrendered.equals(uri))) { + ne.rendered = true; + return null; } - - public void removeMapping(String prefix) { - NameSpaceSymbEntry ob = symb.get(prefix); - - if (ob != null) { - needsClone(); - symb.put(prefix, null); - } + return ne.n; + } + + public int + getLevel() + { + return level.size(); + } + + public void + removeMapping(String prefix) + { + NameSpaceSymbEntry ob = symb.get(prefix); + + if (ob != null) { + needsClone(); + symb.put(prefix, null); } + } - public void removeMappingIfNotRender(String prefix) { - NameSpaceSymbEntry ob = symb.get(prefix); + public void + removeMappingIfNotRender(String prefix) + { + NameSpaceSymbEntry ob = symb.get(prefix); - if (ob != null && !ob.rendered) { - needsClone(); - symb.put(prefix, null); - } + if (ob != null && !ob.rendered) { + needsClone(); + symb.put(prefix, null); } + } - public boolean removeMappingIfRender(String prefix) { - NameSpaceSymbEntry ob = symb.get(prefix); + public boolean + removeMappingIfRender(String prefix) + { + NameSpaceSymbEntry ob = symb.get(prefix); - if (ob != null && ob.rendered) { - needsClone(); - symb.put(prefix, null); - } - return false; + if (ob != null && ob.rendered) { + needsClone(); + symb.put(prefix, null); } + return false; + } } /** * The internal structure of NameSpaceSymbTable. **/ -class NameSpaceSymbEntry implements Cloneable { - - String prefix; - - /**The URI that the prefix defines */ - String uri; - - /**The last output in the URI for this prefix (This for speed reason).*/ - String lastrendered = null; - - /**This prefix-URI has been already render or not.*/ - boolean rendered = false; - - /**The attribute to include.*/ - Attr n; - - NameSpaceSymbEntry(String name, Attr n, boolean rendered, String prefix) { - this.uri = name; - this.rendered = rendered; - this.n = n; - this.prefix = prefix; - } - - /** @inheritDoc */ - public Object clone() { - try { - return super.clone(); - } catch (CloneNotSupportedException e) { - return null; - } +class NameSpaceSymbEntry implements Cloneable +{ + + String prefix; + + /**The URI that the prefix defines */ + String uri; + + /**The last output in the URI for this prefix (This for speed reason).*/ + String lastrendered = null; + + /**This prefix-URI has been already render or not.*/ + boolean rendered = false; + + /**The attribute to include.*/ + Attr n; + + NameSpaceSymbEntry(String name, Attr n, boolean rendered, String prefix) + { + this.uri = name; + this.rendered = rendered; + this.n = n; + this.prefix = prefix; + } + + /** @inheritDoc */ + public Object + clone() + { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + return null; } + } } -class SymbMap implements Cloneable { - int free = 23; - NameSpaceSymbEntry[] entries; - String[] keys; - - SymbMap() { - entries = new NameSpaceSymbEntry[free]; - keys = new String[free]; +class SymbMap implements Cloneable +{ + int free = 23; + NameSpaceSymbEntry[] entries; + String[] keys; + + SymbMap() + { + entries = new NameSpaceSymbEntry[free]; + keys = new String[free]; + } + + void + put(String key, NameSpaceSymbEntry value) + { + int index = index(key); + Object oldKey = keys[index]; + keys[index] = key; + entries[index] = value; + if ((oldKey == null || !oldKey.equals(key)) && (--free == 0)) { + free = entries.length; + int newCapacity = free << 2; + rehash(newCapacity); } - - void put(String key, NameSpaceSymbEntry value) { - int index = index(key); - Object oldKey = keys[index]; - keys[index] = key; - entries[index] = value; - if ((oldKey == null || !oldKey.equals(key)) && (--free == 0)) { - free = entries.length; - int newCapacity = free << 2; - rehash(newCapacity); - } + } + + List + entrySet() + { + List a = new ArrayList(); + for (int i = 0; i < entries.length; i++) { + if ((entries[i] != null) && !("".equals(entries[i].uri))) { + a.add(entries[i]); + } } - - List entrySet() { - List a = new ArrayList(); - for (int i = 0;i < entries.length;i++) { - if ((entries[i] != null) && !("".equals(entries[i].uri))) { - a.add(entries[i]); - } - } - return a; + return a; + } + + protected int + index(Object obj) + { + Object[] set = keys; + int length = set.length; + //abs of index + int index = (obj.hashCode() & 0x7fffffff) % length; + Object cur = set[index]; + + if (cur == null || (cur.equals(obj))) { + return index; } - - protected int index(Object obj) { - Object[] set = keys; - int length = set.length; - //abs of index - int index = (obj.hashCode() & 0x7fffffff) % length; - Object cur = set[index]; - - if (cur == null || (cur.equals(obj))) { - return index; - } - length--; - do { - index = index == length ? 0 : ++index; - cur = set[index]; - } while (cur != null && (!cur.equals(obj))); - return index; - } - - /** - * rehashes the map to the new capacity. - * - * @param newCapacity an int value - */ - protected void rehash(int newCapacity) { - int oldCapacity = keys.length; - String oldKeys[] = keys; - NameSpaceSymbEntry oldVals[] = entries; - - keys = new String[newCapacity]; - entries = new NameSpaceSymbEntry[newCapacity]; - - for (int i = oldCapacity; i-- > 0;) { - if (oldKeys[i] != null) { - String o = oldKeys[i]; - int index = index(o); - keys[index] = o; - entries[index] = oldVals[i]; - } - } + length--; + do { + index = index == length ? 0 : ++index; + cur = set[index]; + } while (cur != null && (!cur.equals(obj))); + return index; + } + + /** + * rehashes the map to the new capacity. + * + * @param newCapacity an int value + */ + protected void + rehash(int newCapacity) + { + int oldCapacity = keys.length; + String oldKeys[] = keys; + NameSpaceSymbEntry oldVals[] = entries; + + keys = new String[newCapacity]; + entries = new NameSpaceSymbEntry[newCapacity]; + + for (int i = oldCapacity; i-- > 0;) { + if (oldKeys[i] != null) { + String o = oldKeys[i]; + int index = index(o); + keys[index] = o; + entries[index] = oldVals[i]; + } } - - NameSpaceSymbEntry get(String key) { - return entries[index(key)]; - } - - protected Object clone() { - try { - SymbMap copy = (SymbMap) super.clone(); - copy.entries = new NameSpaceSymbEntry[entries.length]; - System.arraycopy(entries, 0, copy.entries, 0, entries.length); - copy.keys = new String[keys.length]; - System.arraycopy(keys, 0, copy.keys, 0, keys.length); - - return copy; - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - return null; + } + + NameSpaceSymbEntry + get(String key) + { + return entries[index(key)]; + } + + protected Object + clone() + { + try { + SymbMap copy = (SymbMap) super.clone(); + copy.entries = new NameSpaceSymbEntry[entries.length]; + System.arraycopy(entries, 0, copy.entries, 0, entries.length); + copy.keys = new String[keys.length]; + System.arraycopy(keys, 0, copy.keys, 0, keys.length); + + return copy; + } catch (CloneNotSupportedException e) { + e.printStackTrace(); } + return null; + } } diff --git a/ext/java/nokogiri/internals/c14n/NodeFilter.java b/ext/java/nokogiri/internals/c14n/NodeFilter.java index dbd05e88b4..42f959de9f 100644 --- a/ext/java/nokogiri/internals/c14n/NodeFilter.java +++ b/ext/java/nokogiri/internals/c14n/NodeFilter.java @@ -23,29 +23,30 @@ /** * An interface to tell to the c14n if a node is included or not in the output */ -public interface NodeFilter { - - /** - * Tells if a node must be output in c14n. - * @param n - * @return 1 if the node should be output. - * 0 if node must not be output, - * -1 if the node and all it's child must not be output. - * - */ - int isNodeInclude(Node n); - - /** - * Tells if a node must be output in a c14n. - * The caller must assured that this method is always call - * in document order. The implementations can use this - * restriction to optimize the transformation. - * @param n - * @param level the relative level in the tree - * @return 1 if the node should be output. - * 0 if node must not be output, - * -1 if the node and all it's child must not be output. - */ - int isNodeIncludeDO(Node n, int level); +public interface NodeFilter +{ + + /** + * Tells if a node must be output in c14n. + * @param n + * @return 1 if the node should be output. + * 0 if node must not be output, + * -1 if the node and all it's child must not be output. + * + */ + int isNodeInclude(Node n); + + /** + * Tells if a node must be output in a c14n. + * The caller must assured that this method is always call + * in document order. The implementations can use this + * restriction to optimize the transformation. + * @param n + * @param level the relative level in the tree + * @return 1 if the node should be output. + * 0 if node must not be output, + * -1 if the node and all it's child must not be output. + */ + int isNodeIncludeDO(Node n, int level); } diff --git a/ext/java/nokogiri/internals/c14n/UtfHelpper.java b/ext/java/nokogiri/internals/c14n/UtfHelpper.java index 959634e97f..40866507bf 100644 --- a/ext/java/nokogiri/internals/c14n/UtfHelpper.java +++ b/ext/java/nokogiri/internals/c14n/UtfHelpper.java @@ -22,158 +22,169 @@ import java.io.OutputStream; import java.util.Map; -public final class UtfHelpper { - - private UtfHelpper() { - // complete +public final class UtfHelpper +{ + + private + UtfHelpper() + { + // complete + } + + public static void + writeByte( + final String str, + final OutputStream out, + Map cache + ) throws IOException + { + byte[] result = cache.get(str); + if (result == null) { + result = getStringInUtf8(str); + cache.put(str, result); } - public static void writeByte( - final String str, - final OutputStream out, - Map cache - ) throws IOException { - byte[] result = cache.get(str); - if (result == null) { - result = getStringInUtf8(str); - cache.put(str, result); - } + out.write(result); + } - out.write(result); + public static void + writeCharToUtf8(final char c, final OutputStream out) throws IOException + { + if (c < 0x80) { + out.write(c); + return; + } + if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { + //No Surrogates in sun java + out.write(0x3f); + return; } + int bias; + int write; + char ch; + if (c > 0x07FF) { + ch = (char)(c >>> 12); + write = 0xE0; + if (ch > 0) { + write |= (ch & 0x0F); + } + out.write(write); + write = 0x80; + bias = 0x3F; + } else { + write = 0xC0; + bias = 0x1F; + } + ch = (char)(c >>> 6); + if (ch > 0) { + write |= (ch & bias); + } + out.write(write); + out.write(0x80 | ((c) & 0x3F)); - public static void writeCharToUtf8(final char c, final OutputStream out) throws IOException { - if (c < 0x80) { - out.write(c); - return; - } - if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { - //No Surrogates in sun java - out.write(0x3f); - return; - } - int bias; - int write; - char ch; - if (c > 0x07FF) { - ch = (char)(c>>>12); - write = 0xE0; - if (ch > 0) { - write |= (ch & 0x0F); - } - out.write(write); - write = 0x80; - bias = 0x3F; - } else { - write = 0xC0; - bias = 0x1F; - } - ch = (char)(c>>>6); + } + + public static void + writeStringToUtf8( + final String str, + final OutputStream out + ) throws IOException + { + final int length = str.length(); + int i = 0; + char c; + while (i < length) { + c = str.charAt(i++); + if (c < 0x80) { + out.write(c); + continue; + } + if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { + //No Surrogates in sun java + out.write(0x3f); + continue; + } + char ch; + int bias; + int write; + if (c > 0x07FF) { + ch = (char)(c >>> 12); + write = 0xE0; if (ch > 0) { - write |= (ch & bias); - } + write |= (ch & 0x0F); + } out.write(write); - out.write(0x80 | ((c) & 0x3F)); + write = 0x80; + bias = 0x3F; + } else { + write = 0xC0; + bias = 0x1F; + } + ch = (char)(c >>> 6); + if (ch > 0) { + write |= (ch & bias); + } + out.write(write); + out.write(0x80 | ((c) & 0x3F)); } - public static void writeStringToUtf8( - final String str, - final OutputStream out - ) throws IOException{ - final int length = str.length(); - int i = 0; - char c; - while (i < length) { - c = str.charAt(i++); - if (c < 0x80) { - out.write(c); - continue; - } - if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { - //No Surrogates in sun java - out.write(0x3f); - continue; - } - char ch; - int bias; - int write; - if (c > 0x07FF) { - ch = (char)(c>>>12); - write = 0xE0; - if (ch > 0) { - write |= (ch & 0x0F); - } - out.write(write); - write = 0x80; - bias = 0x3F; - } else { - write = 0xC0; - bias = 0x1F; - } - ch = (char)(c>>>6); - if (ch > 0) { - write |= (ch & bias); - } - out.write(write); - out.write(0x80 | ((c) & 0x3F)); + } + public static byte[] + getStringInUtf8(final String str) + { + final int length = str.length(); + boolean expanded = false; + byte[] result = new byte[length]; + int i = 0; + int out = 0; + char c; + while (i < length) { + c = str.charAt(i++); + if (c < 0x80) { + result[out++] = (byte)c; + continue; + } + if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { + //No Surrogates in sun java + result[out++] = 0x3f; + continue; + } + if (!expanded) { + byte newResult[] = new byte[3 * length]; + System.arraycopy(result, 0, newResult, 0, out); + result = newResult; + expanded = true; + } + char ch; + int bias; + byte write; + if (c > 0x07FF) { + ch = (char)(c >>> 12); + write = (byte)0xE0; + if (ch > 0) { + write |= (ch & 0x0F); } - + result[out++] = write; + write = (byte)0x80; + bias = 0x3F; + } else { + write = (byte)0xC0; + bias = 0x1F; + } + ch = (char)(c >>> 6); + if (ch > 0) { + write |= (ch & bias); + } + result[out++] = write; + result[out++] = (byte)(0x80 | ((c) & 0x3F)); } - - public static byte[] getStringInUtf8(final String str) { - final int length = str.length(); - boolean expanded = false; - byte[] result = new byte[length]; - int i = 0; - int out = 0; - char c; - while (i < length) { - c = str.charAt(i++); - if (c < 0x80) { - result[out++] = (byte)c; - continue; - } - if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { - //No Surrogates in sun java - result[out++] = 0x3f; - continue; - } - if (!expanded) { - byte newResult[] = new byte[3*length]; - System.arraycopy(result, 0, newResult, 0, out); - result = newResult; - expanded = true; - } - char ch; - int bias; - byte write; - if (c > 0x07FF) { - ch = (char)(c>>>12); - write = (byte)0xE0; - if (ch > 0) { - write |= (ch & 0x0F); - } - result[out++] = write; - write = (byte)0x80; - bias = 0x3F; - } else { - write = (byte)0xC0; - bias = 0x1F; - } - ch = (char)(c>>>6); - if (ch > 0) { - write |= (ch & bias); - } - result[out++] = write; - result[out++] = (byte)(0x80 | ((c) & 0x3F)); - } - if (expanded) { - byte newResult[] = new byte[out]; - System.arraycopy(result, 0, newResult, 0, out); - result = newResult; - } - return result; + if (expanded) { + byte newResult[] = new byte[out]; + System.arraycopy(result, 0, newResult, 0, out); + result = newResult; } + return result; + } } diff --git a/ext/java/nokogiri/internals/c14n/XMLUtils.java b/ext/java/nokogiri/internals/c14n/XMLUtils.java index 5f31fa3b52..c29de56601 100644 --- a/ext/java/nokogiri/internals/c14n/XMLUtils.java +++ b/ext/java/nokogiri/internals/c14n/XMLUtils.java @@ -39,469 +39,502 @@ * * @author Christian Geuer-Pollmann */ -public class XMLUtils { - - /** - * Constructor XMLUtils - * - */ - private XMLUtils() { - // we don't allow instantiation +public class XMLUtils +{ + + /** + * Constructor XMLUtils + * + */ + private + XMLUtils() + { + // we don't allow instantiation + } + + /** + * Method getFullTextChildrenFromElement + * + * @param element + * @return the string of children + */ + public static String + getFullTextChildrenFromElement(Element element) + { + StringBuilder sb = new StringBuilder(); + + Node child = element.getFirstChild(); + while (child != null) { + if (child.getNodeType() == Node.TEXT_NODE) { + sb.append(((Text)child).getData()); + } + child = child.getNextSibling(); } - /** - * Method getFullTextChildrenFromElement - * - * @param element - * @return the string of children - */ - public static String getFullTextChildrenFromElement(Element element) { - StringBuilder sb = new StringBuilder(); - - Node child = element.getFirstChild(); - while (child != null) { - if (child.getNodeType() == Node.TEXT_NODE) { - sb.append(((Text)child).getData()); - } - child = child.getNextSibling(); - } - - return sb.toString(); + return sb.toString(); + } + + /** + * This method returns the owner document of a particular node. + * This method is necessary because it always returns a + * {@link Document}. {@link Node#getOwnerDocument} returns null + * if the {@link Node} is a {@link Document}. + * + * @param node + * @return the owner document of the node + */ + public static Document + getOwnerDocument(Node node) + { + if (node.getNodeType() == Node.DOCUMENT_NODE) { + return (Document) node; } - - /** - * This method returns the owner document of a particular node. - * This method is necessary because it always returns a - * {@link Document}. {@link Node#getOwnerDocument} returns null - * if the {@link Node} is a {@link Document}. - * - * @param node - * @return the owner document of the node - */ - public static Document getOwnerDocument(Node node) { - if (node.getNodeType() == Node.DOCUMENT_NODE) { - return (Document) node; - } - try { - return node.getOwnerDocument(); - } catch (NullPointerException npe) { - throw new NullPointerException(npe.getMessage()); - } + try { + return node.getOwnerDocument(); + } catch (NullPointerException npe) { + throw new NullPointerException(npe.getMessage()); } - - /** - * This method returns the first non-null owner document of the Nodes in this Set. - * This method is necessary because it always returns a - * {@link Document}. {@link Node#getOwnerDocument} returns null - * if the {@link Node} is a {@link Document}. - * - * @param xpathNodeSet - * @return the owner document - */ - public static Document getOwnerDocument(Set xpathNodeSet) { - NullPointerException npe = null; - for (Node node : xpathNodeSet) { - int nodeType = node.getNodeType(); - if (nodeType == Node.DOCUMENT_NODE) { - return (Document) node; - } - try { - if (nodeType == Node.ATTRIBUTE_NODE) { - return ((Attr)node).getOwnerElement().getOwnerDocument(); - } - return node.getOwnerDocument(); - } catch (NullPointerException e) { - npe = e; - } + } + + /** + * This method returns the first non-null owner document of the Nodes in this Set. + * This method is necessary because it always returns a + * {@link Document}. {@link Node#getOwnerDocument} returns null + * if the {@link Node} is a {@link Document}. + * + * @param xpathNodeSet + * @return the owner document + */ + public static Document + getOwnerDocument(Set xpathNodeSet) + { + NullPointerException npe = null; + for (Node node : xpathNodeSet) { + int nodeType = node.getNodeType(); + if (nodeType == Node.DOCUMENT_NODE) { + return (Document) node; + } + try { + if (nodeType == Node.ATTRIBUTE_NODE) { + return ((Attr)node).getOwnerElement().getOwnerDocument(); } - - throw new NullPointerException(npe.getMessage()); + return node.getOwnerDocument(); + } catch (NullPointerException e) { + npe = e; + } } - /** - * Method convertNodelistToSet - * - * @param xpathNodeSet - * @return the set with the nodelist - */ - public static Set convertNodelistToSet(NodeList xpathNodeSet) { - if (xpathNodeSet == null) { - return new HashSet(); - } + throw new NullPointerException(npe.getMessage()); + } + + /** + * Method convertNodelistToSet + * + * @param xpathNodeSet + * @return the set with the nodelist + */ + public static Set + convertNodelistToSet(NodeList xpathNodeSet) + { + if (xpathNodeSet == null) { + return new HashSet(); + } - int length = xpathNodeSet.getLength(); - Set set = new HashSet(length); + int length = xpathNodeSet.getLength(); + Set set = new HashSet(length); - for (int i = 0; i < length; i++) { - set.add(xpathNodeSet.item(i)); - } - - return set; + for (int i = 0; i < length; i++) { + set.add(xpathNodeSet.item(i)); } - /** - * This method spreads all namespace attributes in a DOM document to their - * children. This is needed because the XML Signature XPath transform - * must evaluate the XPath against all nodes in the input, even against - * XPath namespace nodes. Through a bug in XalanJ2, the namespace nodes are - * not fully visible in the Xalan XPath model, so we have to do this by - * hand in DOM spaces so that the nodes become visible in XPath space. - * - * @param doc - * @see - * Namespace axis resolution is not XPath compliant - */ - public static void circumventBug2650(Document doc) { - - Element documentElement = doc.getDocumentElement(); - - // if the document element has no xmlns definition, we add xmlns="" - Attr xmlnsAttr = - documentElement.getAttributeNodeNS(Constants.NamespaceSpecNS, "xmlns"); - - if (xmlnsAttr == null) { - documentElement.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", ""); - } - - XMLUtils.circumventBug2650internal(doc); + return set; + } + + /** + * This method spreads all namespace attributes in a DOM document to their + * children. This is needed because the XML Signature XPath transform + * must evaluate the XPath against all nodes in the input, even against + * XPath namespace nodes. Through a bug in XalanJ2, the namespace nodes are + * not fully visible in the Xalan XPath model, so we have to do this by + * hand in DOM spaces so that the nodes become visible in XPath space. + * + * @param doc + * @see + * Namespace axis resolution is not XPath compliant + */ + public static void + circumventBug2650(Document doc) + { + + Element documentElement = doc.getDocumentElement(); + + // if the document element has no xmlns definition, we add xmlns="" + Attr xmlnsAttr = + documentElement.getAttributeNodeNS(Constants.NamespaceSpecNS, "xmlns"); + + if (xmlnsAttr == null) { + documentElement.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", ""); } - /** - * This is the work horse for {@link #circumventBug2650}. - * - * @param node - * @see - * Namespace axis resolution is not XPath compliant - */ - @SuppressWarnings("fallthrough") - private static void circumventBug2650internal(Node node) { - Node parent = null; - Node sibling = null; - final String namespaceNs = Constants.NamespaceSpecNS; - do { - switch (node.getNodeType()) { - case Node.ELEMENT_NODE : - Element element = (Element) node; - if (!element.hasChildNodes()) { - break; + XMLUtils.circumventBug2650internal(doc); + } + + /** + * This is the work horse for {@link #circumventBug2650}. + * + * @param node + * @see + * Namespace axis resolution is not XPath compliant + */ + @SuppressWarnings("fallthrough") + private static void + circumventBug2650internal(Node node) + { + Node parent = null; + Node sibling = null; + final String namespaceNs = Constants.NamespaceSpecNS; + do { + switch (node.getNodeType()) { + case Node.ELEMENT_NODE : + Element element = (Element) node; + if (!element.hasChildNodes()) { + break; + } + if (element.hasAttributes()) { + NamedNodeMap attributes = element.getAttributes(); + int attributesLength = attributes.getLength(); + + for (Node child = element.getFirstChild(); child != null; + child = child.getNextSibling()) { + + if (child.getNodeType() != Node.ELEMENT_NODE) { + continue; + } + Element childElement = (Element) child; + + for (int i = 0; i < attributesLength; i++) { + Attr currentAttr = (Attr) attributes.item(i); + if (!namespaceNs.equals(currentAttr.getNamespaceURI())) { + continue; } - if (element.hasAttributes()) { - NamedNodeMap attributes = element.getAttributes(); - int attributesLength = attributes.getLength(); - - for (Node child = element.getFirstChild(); child!=null; - child = child.getNextSibling()) { - - if (child.getNodeType() != Node.ELEMENT_NODE) { - continue; - } - Element childElement = (Element) child; - - for (int i = 0; i < attributesLength; i++) { - Attr currentAttr = (Attr) attributes.item(i); - if (!namespaceNs.equals(currentAttr.getNamespaceURI())) { - continue; - } - if (childElement.hasAttributeNS(namespaceNs, - currentAttr.getLocalName())) { - continue; - } - childElement.setAttributeNS(namespaceNs, - currentAttr.getName(), - currentAttr.getNodeValue()); - } - } + if (childElement.hasAttributeNS(namespaceNs, + currentAttr.getLocalName())) { + continue; } - case Node.ENTITY_REFERENCE_NODE : - case Node.DOCUMENT_NODE : - parent = node; - sibling = node.getFirstChild(); - break; - } - while ((sibling == null) && (parent != null)) { - sibling = parent.getNextSibling(); - parent = parent.getParentNode(); + childElement.setAttributeNS(namespaceNs, + currentAttr.getName(), + currentAttr.getNodeValue()); + } } - if (sibling == null) { - return; - } - - node = sibling; - sibling = node.getNextSibling(); - } while (true); + } + case Node.ENTITY_REFERENCE_NODE : + case Node.DOCUMENT_NODE : + parent = node; + sibling = node.getFirstChild(); + break; + } + while ((sibling == null) && (parent != null)) { + sibling = parent.getNextSibling(); + parent = parent.getParentNode(); + } + if (sibling == null) { + return; + } + + node = sibling; + sibling = node.getNextSibling(); + } while (true); + } + + /** + * @param sibling + * @param uri + * @param nodeName + * @param number + * @return nodes with the constrain + */ + public static Text + selectNodeText(Node sibling, String uri, String nodeName, int number) + { + Node n = selectNode(sibling, uri, nodeName, number); + if (n == null) { + return null; } - - /** - * @param sibling - * @param uri - * @param nodeName - * @param number - * @return nodes with the constrain - */ - public static Text selectNodeText(Node sibling, String uri, String nodeName, int number) { - Node n = selectNode(sibling,uri,nodeName,number); - if (n == null) { - return null; - } - n = n.getFirstChild(); - while (n != null && n.getNodeType() != Node.TEXT_NODE) { - n = n.getNextSibling(); - } - return (Text)n; + n = n.getFirstChild(); + while (n != null && n.getNodeType() != Node.TEXT_NODE) { + n = n.getNextSibling(); } - - /** - * @param sibling - * @param uri - * @param nodeName - * @param number - * @return nodes with the constrain - */ - public static Element selectNode(Node sibling, String uri, String nodeName, int number) { - while (sibling != null) { - if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri) - && sibling.getLocalName().equals(nodeName)) { - if (number == 0){ - return (Element)sibling; - } - number--; - } - sibling = sibling.getNextSibling(); + return (Text)n; + } + + /** + * @param sibling + * @param uri + * @param nodeName + * @param number + * @return nodes with the constrain + */ + public static Element + selectNode(Node sibling, String uri, String nodeName, int number) + { + while (sibling != null) { + if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri) + && sibling.getLocalName().equals(nodeName)) { + if (number == 0) { + return (Element)sibling; } - return null; + number--; + } + sibling = sibling.getNextSibling(); } - - /** - * @param sibling - * @param uri - * @param nodeName - * @return nodes with the constraint - */ - public static Element[] selectNodes(Node sibling, String uri, String nodeName) { - List list = new ArrayList(); - while (sibling != null) { - if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri) - && sibling.getLocalName().equals(nodeName)) { - list.add((Element)sibling); - } - sibling = sibling.getNextSibling(); - } - return list.toArray(new Element[list.size()]); + return null; + } + + /** + * @param sibling + * @param uri + * @param nodeName + * @return nodes with the constraint + */ + public static Element[] + selectNodes(Node sibling, String uri, String nodeName) + { + List list = new ArrayList(); + while (sibling != null) { + if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri) + && sibling.getLocalName().equals(nodeName)) { + list.add((Element)sibling); + } + sibling = sibling.getNextSibling(); } - - /** - * @param signatureElement - * @param inputSet - * @return nodes with the constrain - */ - public static Set excludeNodeFromSet(Node signatureElement, Set inputSet) { - Set resultSet = new HashSet(); - Iterator iterator = inputSet.iterator(); - - while (iterator.hasNext()) { - Node inputNode = iterator.next(); - - if (!XMLUtils.isDescendantOrSelf(signatureElement, inputNode)) { - resultSet.add(inputNode); - } - } - return resultSet; + return list.toArray(new Element[list.size()]); + } + + /** + * @param signatureElement + * @param inputSet + * @return nodes with the constrain + */ + public static Set + excludeNodeFromSet(Node signatureElement, Set inputSet) + { + Set resultSet = new HashSet(); + Iterator iterator = inputSet.iterator(); + + while (iterator.hasNext()) { + Node inputNode = iterator.next(); + + if (!XMLUtils.isDescendantOrSelf(signatureElement, inputNode)) { + resultSet.add(inputNode); + } } - - /** - * Method getStrFromNode - * - * @param xpathnode - * @return the string for the node. - */ - public static String getStrFromNode(Node xpathnode) { - if (xpathnode.getNodeType() == Node.TEXT_NODE) { - // we iterate over all siblings of the context node because eventually, - // the text is "polluted" with pi's or comments - StringBuilder sb = new StringBuilder(); - - for (Node currentSibling = xpathnode.getParentNode().getFirstChild(); - currentSibling != null; - currentSibling = currentSibling.getNextSibling()) { - if (currentSibling.getNodeType() == Node.TEXT_NODE) { - sb.append(((Text) currentSibling).getData()); - } - } - - return sb.toString(); - } else if (xpathnode.getNodeType() == Node.ATTRIBUTE_NODE) { - return ((Attr) xpathnode).getNodeValue(); - } else if (xpathnode.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) { - return ((ProcessingInstruction) xpathnode).getNodeValue(); + return resultSet; + } + + /** + * Method getStrFromNode + * + * @param xpathnode + * @return the string for the node. + */ + public static String + getStrFromNode(Node xpathnode) + { + if (xpathnode.getNodeType() == Node.TEXT_NODE) { + // we iterate over all siblings of the context node because eventually, + // the text is "polluted" with pi's or comments + StringBuilder sb = new StringBuilder(); + + for (Node currentSibling = xpathnode.getParentNode().getFirstChild(); + currentSibling != null; + currentSibling = currentSibling.getNextSibling()) { + if (currentSibling.getNodeType() == Node.TEXT_NODE) { + sb.append(((Text) currentSibling).getData()); } + } - return null; + return sb.toString(); + } else if (xpathnode.getNodeType() == Node.ATTRIBUTE_NODE) { + return ((Attr) xpathnode).getNodeValue(); + } else if (xpathnode.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) { + return ((ProcessingInstruction) xpathnode).getNodeValue(); } - /** - * Returns true if the descendantOrSelf is on the descendant-or-self axis - * of the context node. - * - * @param ctx - * @param descendantOrSelf - * @return true if the node is descendant - */ - public static boolean isDescendantOrSelf(Node ctx, Node descendantOrSelf) { - if (ctx == descendantOrSelf) { - return true; - } + return null; + } + + /** + * Returns true if the descendantOrSelf is on the descendant-or-self axis + * of the context node. + * + * @param ctx + * @param descendantOrSelf + * @return true if the node is descendant + */ + public static boolean + isDescendantOrSelf(Node ctx, Node descendantOrSelf) + { + if (ctx == descendantOrSelf) { + return true; + } - Node parent = descendantOrSelf; + Node parent = descendantOrSelf; - while (true) { - if (parent == null) { - return false; - } + while (true) { + if (parent == null) { + return false; + } - if (parent == ctx) { - return true; - } + if (parent == ctx) { + return true; + } - if (parent.getNodeType() == Node.ATTRIBUTE_NODE) { - parent = ((Attr) parent).getOwnerElement(); - } else { - parent = parent.getParentNode(); - } - } + if (parent.getNodeType() == Node.ATTRIBUTE_NODE) { + parent = ((Attr) parent).getOwnerElement(); + } else { + parent = parent.getParentNode(); + } } - - /** - * Returns the attribute value for the attribute with the specified name. - * Returns null if there is no such attribute, or - * the empty string if the attribute value is empty. - * - *

    This works around a limitation of the DOM - * Element.getAttributeNode method, which does not distinguish - * between an unspecified attribute and an attribute with a value of - * "" (it returns "" for both cases). - * - * @param elem the element containing the attribute - * @param name the name of the attribute - * @return the attribute value (may be null if unspecified) - */ - public static String getAttributeValue(Element elem, String name) { - Attr attr = elem.getAttributeNodeNS(null, name); - return (attr == null) ? null : attr.getValue(); + } + + /** + * Returns the attribute value for the attribute with the specified name. + * Returns null if there is no such attribute, or + * the empty string if the attribute value is empty. + * + *

    This works around a limitation of the DOM + * Element.getAttributeNode method, which does not distinguish + * between an unspecified attribute and an attribute with a value of + * "" (it returns "" for both cases). + * + * @param elem the element containing the attribute + * @param name the name of the attribute + * @return the attribute value (may be null if unspecified) + */ + public static String + getAttributeValue(Element elem, String name) + { + Attr attr = elem.getAttributeNodeNS(null, name); + return (attr == null) ? null : attr.getValue(); + } + + /** + * This method is a tree-search to help prevent against wrapping attacks. It checks that no + * two Elements have ID Attributes that match the "value" argument, if this is the case then + * "false" is returned. Note that a return value of "true" does not necessarily mean that + * a matching Element has been found, just that no wrapping attack has been detected. + */ + public static boolean + protectAgainstWrappingAttack(Node startNode, String value) + { + Node startParent = startNode.getParentNode(); + Node processedNode; + Element foundElement = null; + + String id = value.trim(); + if (id.charAt(0) == '#') { + id = id.substring(1); } - /** - * This method is a tree-search to help prevent against wrapping attacks. It checks that no - * two Elements have ID Attributes that match the "value" argument, if this is the case then - * "false" is returned. Note that a return value of "true" does not necessarily mean that - * a matching Element has been found, just that no wrapping attack has been detected. - */ - public static boolean protectAgainstWrappingAttack(Node startNode, String value) { - Node startParent = startNode.getParentNode(); - Node processedNode; - Element foundElement = null; - - String id = value.trim(); - if (id.charAt(0) == '#') { - id = id.substring(1); - } - - while (startNode != null) { - if (startNode.getNodeType() == Node.ELEMENT_NODE) { - Element se = (Element) startNode; - - NamedNodeMap attributes = se.getAttributes(); - if (attributes != null) { - for (int i = 0; i < attributes.getLength(); i++) { - Attr attr = (Attr)attributes.item(i); - if (attr.isId() && id.equals(attr.getValue())) { - if (foundElement == null) { - // Continue searching to find duplicates - foundElement = attr.getOwnerElement(); - } else { - //log.debug("Multiple elements with the same 'Id' attribute value!"); - return false; - } - } - } - } - } - - processedNode = startNode; - startNode = startNode.getFirstChild(); - - // no child, this node is done. - if (startNode == null) { - // close node processing, get sibling - startNode = processedNode.getNextSibling(); - } - - // no more siblings, get parent, all children - // of parent are processed. - while (startNode == null) { - processedNode = processedNode.getParentNode(); - if (processedNode == startParent) { - return true; - } - // close parent node processing (processed node now) - startNode = processedNode.getNextSibling(); + while (startNode != null) { + if (startNode.getNodeType() == Node.ELEMENT_NODE) { + Element se = (Element) startNode; + + NamedNodeMap attributes = se.getAttributes(); + if (attributes != null) { + for (int i = 0; i < attributes.getLength(); i++) { + Attr attr = (Attr)attributes.item(i); + if (attr.isId() && id.equals(attr.getValue())) { + if (foundElement == null) { + // Continue searching to find duplicates + foundElement = attr.getOwnerElement(); + } else { + //log.debug("Multiple elements with the same 'Id' attribute value!"); + return false; + } } + } } - return true; - } - - /** - * This method is a tree-search to help prevent against wrapping attacks. It checks that no other - * Element than the given "knownElement" argument has an ID attribute that matches the "value" - * argument, which is the ID value of "knownElement". If this is the case then "false" is returned. - */ - public static boolean protectAgainstWrappingAttack( - Node startNode, Element knownElement, String value - ) { - Node startParent = startNode.getParentNode(); - Node processedNode; - - String id = value.trim(); - if (id.charAt(0) == '#') { - id = id.substring(1); + } + + processedNode = startNode; + startNode = startNode.getFirstChild(); + + // no child, this node is done. + if (startNode == null) { + // close node processing, get sibling + startNode = processedNode.getNextSibling(); + } + + // no more siblings, get parent, all children + // of parent are processed. + while (startNode == null) { + processedNode = processedNode.getParentNode(); + if (processedNode == startParent) { + return true; } + // close parent node processing (processed node now) + startNode = processedNode.getNextSibling(); + } + } + return true; + } + + /** + * This method is a tree-search to help prevent against wrapping attacks. It checks that no other + * Element than the given "knownElement" argument has an ID attribute that matches the "value" + * argument, which is the ID value of "knownElement". If this is the case then "false" is returned. + */ + public static boolean + protectAgainstWrappingAttack( + Node startNode, Element knownElement, String value + ) + { + Node startParent = startNode.getParentNode(); + Node processedNode; + + String id = value.trim(); + if (id.charAt(0) == '#') { + id = id.substring(1); + } - while (startNode != null) { - if (startNode.getNodeType() == Node.ELEMENT_NODE) { - Element se = (Element) startNode; - - NamedNodeMap attributes = se.getAttributes(); - if (attributes != null) { - for (int i = 0; i < attributes.getLength(); i++) { - Attr attr = (Attr)attributes.item(i); - if (attr.isId() && id.equals(attr.getValue()) && se != knownElement) { - //log.debug("Multiple elements with the same 'Id' attribute value!"); - return false; - } - } - } - } - - processedNode = startNode; - startNode = startNode.getFirstChild(); - - // no child, this node is done. - if (startNode == null) { - // close node processing, get sibling - startNode = processedNode.getNextSibling(); - } - - // no more siblings, get parent, all children - // of parent are processed. - while (startNode == null) { - processedNode = processedNode.getParentNode(); - if (processedNode == startParent) { - return true; - } - // close parent node processing (processed node now) - startNode = processedNode.getNextSibling(); + while (startNode != null) { + if (startNode.getNodeType() == Node.ELEMENT_NODE) { + Element se = (Element) startNode; + + NamedNodeMap attributes = se.getAttributes(); + if (attributes != null) { + for (int i = 0; i < attributes.getLength(); i++) { + Attr attr = (Attr)attributes.item(i); + if (attr.isId() && id.equals(attr.getValue()) && se != knownElement) { + //log.debug("Multiple elements with the same 'Id' attribute value!"); + return false; } + } } - return true; + } + + processedNode = startNode; + startNode = startNode.getFirstChild(); + + // no child, this node is done. + if (startNode == null) { + // close node processing, get sibling + startNode = processedNode.getNextSibling(); + } + + // no more siblings, get parent, all children + // of parent are processed. + while (startNode == null) { + processedNode = processedNode.getParentNode(); + if (processedNode == startParent) { + return true; + } + // close parent node processing (processed node now) + startNode = processedNode.getNextSibling(); + } } + return true; + } } diff --git a/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java b/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java index 7c2b08da24..b330ba04a9 100644 --- a/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +++ b/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java @@ -63,334 +63,331 @@ public class DOM2DTM extends DTMDefaultBaseIterators // static final boolean JJK_DEBUG=false; // static final boolean JJK_NEWCODE=true; - /** Manefest constant - */ - static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace"; - - /** The current position in the DOM tree. Last node examined for - * possible copying to DTM. */ - transient private Node m_pos; - /** The current position in the DTM tree. Who children get appended to. */ - private int m_last_parent=0; - /** The current position in the DTM tree. Who children reference as their - * previous sib. */ - private int m_last_kid=NULL; - - /** The top of the subtree. - * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.' - * */ - transient private Node m_root; - - /** True iff the first element has been processed. This is used to control - synthesis of the implied xml: namespace declaration node. */ - boolean m_processedFirstElement=false; - - /** true if ALL the nodes in the m_root subtree have been processed; - * false if our incremental build has not yet finished scanning the - * DOM tree. */ - transient private boolean m_nodesAreProcessed; - - /** The node objects. The instance part of the handle indexes - * directly into this vector. Each DTM node may actually be - * composed of several DOM nodes (for example, if logically-adjacent - * Text/CDATASection nodes in the DOM have been coalesced into a - * single DTM Text node); this table points only to the first in - * that sequence. */ - protected final java.util.List m_nodes = new java.util.ArrayList(12); - - /** - * Construct a DOM2DTM object from a DOM node. - * - * @param mgr The DTMManager who owns this DTM. - * @param domSource the DOM source that this DTM will wrap. - * @param dtmIdentity The DTM identity ID for this DTM. - * @param whiteSpaceFilter The white space filter for this DTM, which may - * be null. - * @param xstringfactory XMLString factory for creating character content. - * @param doIndexing true if the caller considers it worth it to use - * indexing schemes. - */ - public DOM2DTM(DTMManager mgr, DOMSource domSource, - int dtmIdentity, DTMWSFilter whiteSpaceFilter, - XMLStringFactory xstringfactory, - boolean doIndexing) - { - super(mgr, domSource, dtmIdentity, whiteSpaceFilter, - xstringfactory, doIndexing); - - // Initialize DOM navigation - m_pos=m_root = domSource.getNode(); - // Initialize DTM navigation - m_last_parent=m_last_kid=NULL; - m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL); - - // Apparently the domSource root may not actually be the - // Document node. If it's an Element node, we need to immediately - // add its attributes. Adapted from nextNode(). - // %REVIEW% Move this logic into addNode and recurse? Cleaner! - // - // (If it's an EntityReference node, we're probably in - // seriously bad trouble. For now - // I'm just hoping nobody is ever quite that foolish... %REVIEW%) - // - // %ISSUE% What about inherited namespaces in this case? - // Do we need to special-case initialize them into the DTM model? - if(ELEMENT_NODE == m_root.getNodeType()) - { - NamedNodeMap attrs=m_root.getAttributes(); - int attrsize=(attrs==null) ? 0 : attrs.getLength(); - if(attrsize>0) - { - int attrIndex=NULL; // start with no previous sib - for(int i=0;i m_nodes = new java.util.ArrayList(12); + + /** + * Construct a DOM2DTM object from a DOM node. + * + * @param mgr The DTMManager who owns this DTM. + * @param domSource the DOM source that this DTM will wrap. + * @param dtmIdentity The DTM identity ID for this DTM. + * @param whiteSpaceFilter The white space filter for this DTM, which may + * be null. + * @param xstringfactory XMLString factory for creating character content. + * @param doIndexing true if the caller considers it worth it to use + * indexing schemes. + */ + public + DOM2DTM(DTMManager mgr, DOMSource domSource, + int dtmIdentity, DTMWSFilter whiteSpaceFilter, + XMLStringFactory xstringfactory, + boolean doIndexing) + { + super(mgr, domSource, dtmIdentity, whiteSpaceFilter, + xstringfactory, doIndexing); + + // Initialize DOM navigation + m_pos = m_root = domSource.getNode(); + // Initialize DTM navigation + m_last_parent = m_last_kid = NULL; + m_last_kid = addNode(m_root, m_last_parent, m_last_kid, NULL); + + // Apparently the domSource root may not actually be the + // Document node. If it's an Element node, we need to immediately + // add its attributes. Adapted from nextNode(). + // %REVIEW% Move this logic into addNode and recurse? Cleaner! + // + // (If it's an EntityReference node, we're probably in + // seriously bad trouble. For now + // I'm just hoping nobody is ever quite that foolish... %REVIEW%) + // + // %ISSUE% What about inherited namespaces in this case? + // Do we need to special-case initialize them into the DTM model? + if (ELEMENT_NODE == m_root.getNodeType()) { + NamedNodeMap attrs = m_root.getAttributes(); + int attrsize = (attrs == null) ? 0 : attrs.getLength(); + if (attrsize > 0) { + int attrIndex = NULL; // start with no previous sib + for (int i = 0; i < attrsize; ++i) { + // No need to force nodetype in this case; + // addNode() will take care of switching it from + // Attr to Namespace if necessary. + attrIndex = addNode(attrs.item(i), 0, attrIndex, NULL); + m_firstch.setElementAt(DTM.NULL, attrIndex); + } + // Terminate list of attrs, and make sure they aren't + // considered children of the element + m_nextsib.setElementAt(DTM.NULL, attrIndex); + + // IMPORTANT: This does NOT change m_last_parent or m_last_kid! + } // if attrs exist + } //if(ELEMENT_NODE) + + // Initialize DTM-completed status + m_nodesAreProcessed = false; + } + + /** + * Construct the node map from the node. + * + * @param node The node that is to be added to the DTM. + * @param parentIndex The current parent index. + * @param previousSibling The previous sibling index. + * @param forceNodeType If not DTM.NULL, overrides the DOM node type. + * Used to force nodes to Text rather than CDATASection when their + * coalesced value includes ordinary Text nodes (current DTM behavior). + * + * @return The index identity of the node that was added. + */ + protected int + addNode(Node node, int parentIndex, + int previousSibling, int forceNodeType) + { + int nodeIndex = m_nodes.size(); + + // Have we overflowed a DTM Identity's addressing range? + if (m_dtmIdent.size() == (nodeIndex >>> DTMManager.IDENT_DTM_NODE_BITS)) { + try { + if (m_mgr == null) { + throw new ClassCastException(); + } + + // Handle as Extended Addressing + DTMManagerDefault mgrD = (DTMManagerDefault)m_mgr; + int id = mgrD.getFirstFreeDTMID(); + mgrD.addDTM(this, id, nodeIndex); + m_dtmIdent.addElement(id << DTMManager.IDENT_DTM_NODE_BITS); + } catch (ClassCastException e) { + // %REVIEW% Wrong error message, but I've been told we're trying + // not to add messages right not for I18N reasons. + // %REVIEW% Should this be a Fatal Error? + error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available"; + } } - /** - * Construct the node map from the node. - * - * @param node The node that is to be added to the DTM. - * @param parentIndex The current parent index. - * @param previousSibling The previous sibling index. - * @param forceNodeType If not DTM.NULL, overrides the DOM node type. - * Used to force nodes to Text rather than CDATASection when their - * coalesced value includes ordinary Text nodes (current DTM behavior). - * - * @return The index identity of the node that was added. - */ - protected int addNode(Node node, int parentIndex, - int previousSibling, int forceNodeType) - { - int nodeIndex = m_nodes.size(); - - // Have we overflowed a DTM Identity's addressing range? - if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS)) - { - try - { - if(m_mgr==null) - throw new ClassCastException(); - - // Handle as Extended Addressing - DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr; - int id=mgrD.getFirstFreeDTMID(); - mgrD.addDTM(this,id,nodeIndex); - m_dtmIdent.addElement(id<0) - { - for(int i=0;i 0) { + for (int i = 0; i < attrsize; ++i) { + // No need to force nodetype in this case; + // addNode() will take care of switching it from + // Attr to Namespace if necessary. + attrIndex = addNode(attrs.item(i), nextindex, attrIndex, NULL); + m_firstch.setElementAt(DTM.NULL, attrIndex); + + // If the xml: prefix is explicitly declared + // we don't need to synthesize one. + // + // NOTE that XML Namespaces were not originally + // defined as being namespace-aware (grrr), and + // while the W3C is planning to fix this it's + // safer for now to test the QName and trust the + // parsers to prevent anyone from redefining the + // reserved xmlns: prefix + if (!m_processedFirstElement + && "xmlns:xml".equals(attrs.item(i).getNodeName())) { + m_processedFirstElement = true; + } + } + // Terminate list of attrs, and make sure they aren't + // considered children of the element + } // if attrs exist + if (!m_processedFirstElement) { + // The DOM might not have an explicit declaration for the + // implicit "xml:" prefix, but the XPath data model + // requires that this appear as a Namespace Node so we + // have to synthesize one. You can think of this as + // being a default attribute defined by the XML + // Namespaces spec rather than by the DTD. + attrIndex = addNode(new DOM2DTMdefaultNamespaceDeclarationNode( + (Element)next, "xml", NAMESPACE_DECL_NS, + makeNodeHandle(((attrIndex == NULL) ? nextindex : attrIndex) + 1) + ), + nextindex, attrIndex, NULL); + m_firstch.setElementAt(DTM.NULL, attrIndex); + m_processedFirstElement = true; + } + if (attrIndex != NULL) { m_nextsib.setElementAt(DTM.NULL, attrIndex); } + } //if(ELEMENT_NODE) + } // (if !suppressNode) + + // Text postprocessing: Act on values stored above + //if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) + //{ + // %TBD% If nexttype was forced to TEXT, patch the DTM node + if (lastTextNode != null) { next = lastTextNode; } // Advance the DOM cursor over contiguous text + //} + + // Remember where we left off. + m_pos = next; + return true; + } + + + /** + * Return an DOM node for the given node. + * + * @param nodeHandle The node ID. + * + * @return A node representation of the DTM node. + */ + public Node + getNode(int nodeHandle) + { + + int identity = makeNodeIdentity(nodeHandle); + + return m_nodes.get(identity); + } + + /** + * Get a Node from an identity index. + * + * NEEDSDOC @param nodeIdentity + * + * NEEDSDOC ($objectName$) @return + */ + protected Node + lookupNode(int nodeIdentity) + { + return m_nodes.get(nodeIdentity); + } + + /** + * Get the next node identity value in the list, and call the iterator + * if it hasn't been added yet. + * + * @param identity The node identity (index). + * @return identity+1, or DTM.NULL. + */ + protected int + getNextNodeIdentity(int identity) + { + + identity += 1; + + if (identity >= m_nodes.size()) { + if (!nextNode()) { + identity = DTM.NULL; + } } + return identity; + } + + /** + * Get the handle from a Node. + *

    %OPT% This will be pretty slow.

    + * + *

    %OPT% An XPath-like search (walk up DOM to root, tracking path; + * walk down DTM reconstructing path) might be considerably faster + * on later nodes in large documents. That might also imply improving + * this call to handle nodes which would be in this DTM but + * have not yet been built, which might or might not be a Good Thing.

    + * + * %REVIEW% This relies on being able to test node-identity via + * object-identity. DTM2DOM proxying is a great example of a case where + * that doesn't work. DOM Level 3 will provide the isSameNode() method + * to fix that, but until then this is going to be flaky. + * + * @param node A node, which may be null. + * + * @return The node handle or DTM.NULL. + */ + public int + getHandleFromNode(Node node) + { + if (null != node) { + int len = m_nodes.size(); + boolean isMore; + int i = 0; + do { + for (; i < len; i++) { + if (m_nodes.get(i) == node) { + return makeNodeHandle(i); + } + } - /** - * Return an DOM node for the given node. - * - * @param nodeHandle The node ID. - * - * @return A node representation of the DTM node. - */ - public Node getNode(int nodeHandle) - { - - int identity = makeNodeIdentity(nodeHandle); + isMore = nextNode(); - return (Node) m_nodes.get(identity); - } + len = m_nodes.size(); - /** - * Get a Node from an identity index. - * - * NEEDSDOC @param nodeIdentity - * - * NEEDSDOC ($objectName$) @return - */ - protected Node lookupNode(int nodeIdentity) - { - return (Node) m_nodes.get(nodeIdentity); + } while (isMore || i < len); } - /** - * Get the next node identity value in the list, and call the iterator - * if it hasn't been added yet. - * - * @param identity The node identity (index). - * @return identity+1, or DTM.NULL. - */ - protected int getNextNodeIdentity(int identity) - { - - identity += 1; - - if (identity >= m_nodes.size()) - { - if (!nextNode()) - identity = DTM.NULL; - } - - return identity; + return DTM.NULL; + } + + /** Get the handle from a Node. This is a more robust version of + * getHandleFromNode, intended to be usable by the public. + * + *

    %OPT% This will be pretty slow.

    + * + * %REVIEW% This relies on being able to test node-identity via + * object-identity. DTM2DOM proxying is a great example of a case where + * that doesn't work. DOM Level 3 will provide the isSameNode() method + * to fix that, but until then this is going to be flaky. + * + * @param node A node, which may be null. + * + * @return The node handle or DTM.NULL. */ + public int + getHandleOfNode(Node node) + { + if (null != node) { + // Is Node actually within the same document? If not, don't search! + // This would be easier if m_root was always the Document node, but + // we decided to allow wrapping a DTM around a subtree. + if ((m_root == node) || + (m_root.getNodeType() == DOCUMENT_NODE && + m_root == node.getOwnerDocument()) || + (m_root.getNodeType() != DOCUMENT_NODE && + m_root.getOwnerDocument() == node.getOwnerDocument()) + ) { + // If node _is_ in m_root's tree, find its handle + // + // %OPT% This check may be improved significantly when DOM + // Level 3 nodeKey and relative-order tests become + // available! + for (Node cursor = node; + cursor != null; + cursor = + (cursor.getNodeType() != ATTRIBUTE_NODE) + ? cursor.getParentNode() + : ((org.w3c.dom.Attr)cursor).getOwnerElement()) { + if (cursor == m_root) + // We know this node; find its handle. + { + return getHandleFromNode(node); + } + } // for ancestors of node + } // if node and m_root in same Document + } // if node!=null + + return DTM.NULL; + } + + /** + * Retrieves an attribute node by by qualified name and namespace URI. + * + * @param nodeHandle int Handle of the node upon which to look up this attribute.. + * @param namespaceURI The namespace URI of the attribute to + * retrieve, or null. + * @param name The local name of the attribute to + * retrieve. + * @return The attribute node handle with the specified name ( + * nodeName) or DTM.NULL if there is no such + * attribute. + */ + public int + getAttributeNode(int nodeHandle, String namespaceURI, + String name) + { + + // %OPT% This is probably slower than it needs to be. + if (null == namespaceURI) { + namespaceURI = ""; } - /** - * Get the handle from a Node. - *

    %OPT% This will be pretty slow.

    - * - *

    %OPT% An XPath-like search (walk up DOM to root, tracking path; - * walk down DTM reconstructing path) might be considerably faster - * on later nodes in large documents. That might also imply improving - * this call to handle nodes which would be in this DTM but - * have not yet been built, which might or might not be a Good Thing.

    - * - * %REVIEW% This relies on being able to test node-identity via - * object-identity. DTM2DOM proxying is a great example of a case where - * that doesn't work. DOM Level 3 will provide the isSameNode() method - * to fix that, but until then this is going to be flaky. - * - * @param node A node, which may be null. - * - * @return The node handle or DTM.NULL. - */ - public int getHandleFromNode(Node node) - { - if (null != node) - { - int len = m_nodes.size(); - boolean isMore; - int i = 0; - do - { - for (; i < len; i++) - { - if (m_nodes.get(i) == node) - return makeNodeHandle(i); - } - - isMore = nextNode(); - - len = m_nodes.size(); + int type = getNodeType(nodeHandle); - } - while(isMore || i < len); - } + if (DTM.ELEMENT_NODE == type) { - return DTM.NULL; - } + // Assume that attributes immediately follow the element. + int identity = makeNodeIdentity(nodeHandle); - /** Get the handle from a Node. This is a more robust version of - * getHandleFromNode, intended to be usable by the public. - * - *

    %OPT% This will be pretty slow.

    - * - * %REVIEW% This relies on being able to test node-identity via - * object-identity. DTM2DOM proxying is a great example of a case where - * that doesn't work. DOM Level 3 will provide the isSameNode() method - * to fix that, but until then this is going to be flaky. - * - * @param node A node, which may be null. - * - * @return The node handle or DTM.NULL. */ - public int getHandleOfNode(Node node) - { - if (null != node) - { - // Is Node actually within the same document? If not, don't search! - // This would be easier if m_root was always the Document node, but - // we decided to allow wrapping a DTM around a subtree. - if((m_root==node) || - (m_root.getNodeType()==DOCUMENT_NODE && - m_root==node.getOwnerDocument()) || - (m_root.getNodeType()!=DOCUMENT_NODE && - m_root.getOwnerDocument()==node.getOwnerDocument()) - ) - { - // If node _is_ in m_root's tree, find its handle - // - // %OPT% This check may be improved significantly when DOM - // Level 3 nodeKey and relative-order tests become - // available! - for(Node cursor=node; - cursor!=null; - cursor= - (cursor.getNodeType()!=ATTRIBUTE_NODE) - ? cursor.getParentNode() - : ((org.w3c.dom.Attr)cursor).getOwnerElement()) - { - if(cursor==m_root) - // We know this node; find its handle. - return getHandleFromNode(node); - } // for ancestors of node - } // if node and m_root in same Document - } // if node!=null - - return DTM.NULL; - } + while (DTM.NULL != (identity = getNextNodeIdentity(identity))) { + // Assume this can not be null. + type = _type(identity); - /** - * Retrieves an attribute node by by qualified name and namespace URI. - * - * @param nodeHandle int Handle of the node upon which to look up this attribute.. - * @param namespaceURI The namespace URI of the attribute to - * retrieve, or null. - * @param name The local name of the attribute to - * retrieve. - * @return The attribute node handle with the specified name ( - * nodeName) or DTM.NULL if there is no such - * attribute. - */ - public int getAttributeNode(int nodeHandle, String namespaceURI, - String name) - { - - // %OPT% This is probably slower than it needs to be. - if (null == namespaceURI) - namespaceURI = ""; - - int type = getNodeType(nodeHandle); - - if (DTM.ELEMENT_NODE == type) - { - - // Assume that attributes immediately follow the element. - int identity = makeNodeIdentity(nodeHandle); - - while (DTM.NULL != (identity = getNextNodeIdentity(identity))) - { - // Assume this can not be null. - type = _type(identity); - - // %REVIEW% - // Should namespace nodes be retrievable DOM-style as attrs? - // If not we need a separate function... which may be desirable - // architecturally, but which is ugly from a code point of view. - // (If we REALLY insist on it, this code should become a subroutine - // of both -- retrieve the node, then test if the type matches - // what you're looking for.) - if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE) - { - Node node = lookupNode(identity); - String nodeuri = node.getNamespaceURI(); - - if (null == nodeuri) - nodeuri = ""; - - String nodelocalname = node.getLocalName(); - - if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname)) - return makeNodeHandle(identity); - } - - else // if (DTM.NAMESPACE_NODE != type) - { - break; - } - } - } + // %REVIEW% + // Should namespace nodes be retrievable DOM-style as attrs? + // If not we need a separate function... which may be desirable + // architecturally, but which is ugly from a code point of view. + // (If we REALLY insist on it, this code should become a subroutine + // of both -- retrieve the node, then test if the type matches + // what you're looking for.) + if (type == DTM.ATTRIBUTE_NODE || type == DTM.NAMESPACE_NODE) { + Node node = lookupNode(identity); + String nodeuri = node.getNamespaceURI(); - return DTM.NULL; - } + if (null == nodeuri) { + nodeuri = ""; + } - /** - * Get the string-value of a node as a String object - * (see http://www.w3.org/TR/xpath#data-model - * for the definition of a node's string-value). - * - * @param nodeHandle The node ID. - * - * @return A string object that represents the string-value of the given node. - */ - public XMLString getStringValue(int nodeHandle) - { - - int type = getNodeType(nodeHandle); - Node node = getNode(nodeHandle); - // %TBD% If an element only has one text node, we should just use it - // directly. - if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type - || DTM.DOCUMENT_FRAGMENT_NODE == type) - { - FastStringBuffer buf = StringBufferPool.get(); - String s; - - try - { - getNodeData(node, buf); - - s = (buf.length() > 0) ? buf.toString() : ""; - } - finally - { - StringBufferPool.free(buf); - } + String nodelocalname = node.getLocalName(); - return m_xstrf.newstr( s ); - } - else if(TEXT_NODE == type || CDATA_SECTION_NODE == type) - { - // If this is a DTM text node, it may be made of multiple DOM text - // nodes -- including navigating into Entity References. DOM2DTM - // records the first node in the sequence and requires that we - // pick up the others when we retrieve the DTM node's value. - // - // %REVIEW% DOM Level 3 is expected to add a "whole text" - // retrieval method which performs this function for us. - FastStringBuffer buf = StringBufferPool.get(); - while(node!=null) - { - buf.append(node.getNodeValue()); - node=logicalNextDOMTextNode(node); - } - String s=(buf.length() > 0) ? buf.toString() : ""; - StringBufferPool.free(buf); - return m_xstrf.newstr( s ); + if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname)) { + return makeNodeHandle(identity); + } } - else - return m_xstrf.newstr( node.getNodeValue() ); - } - /** - * Determine if the string-value of a node is whitespace - * - * @param nodeHandle The node Handle. - * - * @return Return true if the given node is whitespace. - */ - public boolean isWhitespace(int nodeHandle) - { - int type = getNodeType(nodeHandle); - Node node = getNode(nodeHandle); - if(TEXT_NODE == type || CDATA_SECTION_NODE == type) - { - // If this is a DTM text node, it may be made of multiple DOM text - // nodes -- including navigating into Entity References. DOM2DTM - // records the first node in the sequence and requires that we - // pick up the others when we retrieve the DTM node's value. - // - // %REVIEW% DOM Level 3 is expected to add a "whole text" - // retrieval method which performs this function for us. - FastStringBuffer buf = StringBufferPool.get(); - while(node!=null) - { - buf.append(node.getNodeValue()); - node=logicalNextDOMTextNode(node); - } - boolean b = buf.isWhitespace(0, buf.length()); - StringBufferPool.free(buf); - return b; + else { // if (DTM.NAMESPACE_NODE != type) + break; } - return false; + } } - /** - * Retrieve the text content of a DOM subtree, appending it into a - * user-supplied FastStringBuffer object. Note that attributes are - * not considered part of the content of an element. - *

    - * There are open questions regarding whitespace stripping. - * Currently we make no special effort in that regard, since the standard - * DOM doesn't yet provide DTD-based information to distinguish - * whitespace-in-element-context from genuine #PCDATA. Note that we - * should probably also consider xml:space if/when we address this. - * DOM Level 3 may solve the problem for us. - *

    - * %REVIEW% Actually, since this method operates on the DOM side of the - * fence rather than the DTM side, it SHOULDN'T do - * any special handling. The DOM does what the DOM does; if you want - * DTM-level abstractions, use DTM-level methods. - * - * @param node Node whose subtree is to be walked, gathering the - * contents of all Text or CDATASection nodes. - * @param buf FastStringBuffer into which the contents of the text - * nodes are to be concatenated. - */ - protected static void getNodeData(Node node, FastStringBuffer buf) - { - - switch (node.getNodeType()) - { - case Node.DOCUMENT_FRAGMENT_NODE : - case Node.DOCUMENT_NODE : - case Node.ELEMENT_NODE : - { - for (Node child = node.getFirstChild(); null != child; - child = child.getNextSibling()) - { - getNodeData(child, buf); - } - } - break; - case Node.TEXT_NODE : - case Node.CDATA_SECTION_NODE : - case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node - buf.append(node.getNodeValue()); - break; - case Node.PROCESSING_INSTRUCTION_NODE : - // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); - break; - default : - // ignore - break; + return DTM.NULL; + } + + /** + * Get the string-value of a node as a String object + * (see http://www.w3.org/TR/xpath#data-model + * for the definition of a node's string-value). + * + * @param nodeHandle The node ID. + * + * @return A string object that represents the string-value of the given node. + */ + public XMLString + getStringValue(int nodeHandle) + { + + int type = getNodeType(nodeHandle); + Node node = getNode(nodeHandle); + // %TBD% If an element only has one text node, we should just use it + // directly. + if (DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type + || DTM.DOCUMENT_FRAGMENT_NODE == type) { + FastStringBuffer buf = StringBufferPool.get(); + String s; + + try { + getNodeData(node, buf); + + s = (buf.length() > 0) ? buf.toString() : ""; + } finally { + StringBufferPool.free(buf); + } + + return m_xstrf.newstr(s); + } else if (TEXT_NODE == type || CDATA_SECTION_NODE == type) { + // If this is a DTM text node, it may be made of multiple DOM text + // nodes -- including navigating into Entity References. DOM2DTM + // records the first node in the sequence and requires that we + // pick up the others when we retrieve the DTM node's value. + // + // %REVIEW% DOM Level 3 is expected to add a "whole text" + // retrieval method which performs this function for us. + FastStringBuffer buf = StringBufferPool.get(); + while (node != null) { + buf.append(node.getNodeValue()); + node = logicalNextDOMTextNode(node); + } + String s = (buf.length() > 0) ? buf.toString() : ""; + StringBufferPool.free(buf); + return m_xstrf.newstr(s); + } else { + return m_xstrf.newstr(node.getNodeValue()); + } + } + + /** + * Determine if the string-value of a node is whitespace + * + * @param nodeHandle The node Handle. + * + * @return Return true if the given node is whitespace. + */ + public boolean + isWhitespace(int nodeHandle) + { + int type = getNodeType(nodeHandle); + Node node = getNode(nodeHandle); + if (TEXT_NODE == type || CDATA_SECTION_NODE == type) { + // If this is a DTM text node, it may be made of multiple DOM text + // nodes -- including navigating into Entity References. DOM2DTM + // records the first node in the sequence and requires that we + // pick up the others when we retrieve the DTM node's value. + // + // %REVIEW% DOM Level 3 is expected to add a "whole text" + // retrieval method which performs this function for us. + FastStringBuffer buf = StringBufferPool.get(); + while (node != null) { + buf.append(node.getNodeValue()); + node = logicalNextDOMTextNode(node); + } + boolean b = buf.isWhitespace(0, buf.length()); + StringBufferPool.free(buf); + return b; + } + return false; + } + + /** + * Retrieve the text content of a DOM subtree, appending it into a + * user-supplied FastStringBuffer object. Note that attributes are + * not considered part of the content of an element. + *

    + * There are open questions regarding whitespace stripping. + * Currently we make no special effort in that regard, since the standard + * DOM doesn't yet provide DTD-based information to distinguish + * whitespace-in-element-context from genuine #PCDATA. Note that we + * should probably also consider xml:space if/when we address this. + * DOM Level 3 may solve the problem for us. + *

    + * %REVIEW% Actually, since this method operates on the DOM side of the + * fence rather than the DTM side, it SHOULDN'T do + * any special handling. The DOM does what the DOM does; if you want + * DTM-level abstractions, use DTM-level methods. + * + * @param node Node whose subtree is to be walked, gathering the + * contents of all Text or CDATASection nodes. + * @param buf FastStringBuffer into which the contents of the text + * nodes are to be concatenated. + */ + protected static void + getNodeData(Node node, FastStringBuffer buf) + { + + switch (node.getNodeType()) { + case Node.DOCUMENT_FRAGMENT_NODE : + case Node.DOCUMENT_NODE : + case Node.ELEMENT_NODE : { + for (Node child = node.getFirstChild(); null != child; + child = child.getNextSibling()) { + getNodeData(child, buf); } + } + break; + case Node.TEXT_NODE : + case Node.CDATA_SECTION_NODE : + case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node + buf.append(node.getNodeValue()); + break; + case Node.PROCESSING_INSTRUCTION_NODE : + // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); + break; + default : + // ignore + break; } - - /** - * Given a node handle, return its DOM-style node name. This will - * include names such as #text or #document. - * - * @param nodeHandle the id of the node. - * @return String Name of this node, which may be an empty string. - * %REVIEW% Document when empty string is possible... - * %REVIEW-COMMENT% It should never be empty, should it? - */ - public String getNodeName(int nodeHandle) - { - + } + + /** + * Given a node handle, return its DOM-style node name. This will + * include names such as #text or #document. + * + * @param nodeHandle the id of the node. + * @return String Name of this node, which may be an empty string. + * %REVIEW% Document when empty string is possible... + * %REVIEW-COMMENT% It should never be empty, should it? + */ + public String + getNodeName(int nodeHandle) + { + + Node node = getNode(nodeHandle); + + // Assume non-null. + return node.getNodeName(); + } + + /** + * Given a node handle, return the XPath node name. This should be + * the name as described by the XPath data model, NOT the DOM-style + * name. + * + * @param nodeHandle the id of the node. + * @return String Name of this node, which may be an empty string. + */ + public String + getNodeNameX(int nodeHandle) + { + + String name; + short type = getNodeType(nodeHandle); + + switch (type) { + case DTM.NAMESPACE_NODE : { Node node = getNode(nodeHandle); - // Assume non-null. - return node.getNodeName(); - } - - /** - * Given a node handle, return the XPath node name. This should be - * the name as described by the XPath data model, NOT the DOM-style - * name. - * - * @param nodeHandle the id of the node. - * @return String Name of this node, which may be an empty string. - */ - public String getNodeNameX(int nodeHandle) - { - - String name; - short type = getNodeType(nodeHandle); - - switch (type) - { - case DTM.NAMESPACE_NODE : - { - Node node = getNode(nodeHandle); - - // assume not null. - name = node.getNodeName(); - if(name.startsWith("xmlns:")) - { - name = QName.getLocalPart(name); - } - else if(name.equals("xmlns")) - { - name = ""; - } - } - break; - case DTM.ATTRIBUTE_NODE : - case DTM.ELEMENT_NODE : - case DTM.ENTITY_REFERENCE_NODE : - case DTM.PROCESSING_INSTRUCTION_NODE : - { - Node node = getNode(nodeHandle); - - // assume not null. - name = node.getNodeName(); - } - break; - default : - name = ""; + // assume not null. + name = node.getNodeName(); + if (name.startsWith("xmlns:")) { + name = QName.getLocalPart(name); + } else if (name.equals("xmlns")) { + name = ""; } + } + break; + case DTM.ATTRIBUTE_NODE : + case DTM.ELEMENT_NODE : + case DTM.ENTITY_REFERENCE_NODE : + case DTM.PROCESSING_INSTRUCTION_NODE : { + Node node = getNode(nodeHandle); - return name; + // assume not null. + name = node.getNodeName(); + } + break; + default : + name = ""; } - /** - * Given a node handle, return its XPath-style localname. - * (As defined in Namespaces, this is the portion of the name after any - * colon character). - * - * @param nodeHandle the id of the node. - * @return String Local name of this node. - */ - public String getLocalName(int nodeHandle) - { + return name; + } + + /** + * Given a node handle, return its XPath-style localname. + * (As defined in Namespaces, this is the portion of the name after any + * colon character). + * + * @param nodeHandle the id of the node. + * @return String Local name of this node. + */ + public String + getLocalName(int nodeHandle) + { // if(JJK_NEWCODE) // { - int id=makeNodeIdentity(nodeHandle); - if(NULL==id) return null; - Node newnode=(Node)m_nodes.get(id); - String newname=newnode.getLocalName(); - if (null == newname) - { - // XSLT treats PIs, and possibly other things, as having QNames. - String qname = newnode.getNodeName(); - if('#'==qname.charAt(0)) - { - // Match old default for this function - // This conversion may or may not be necessary - newname=""; - } - else - { - int index = qname.indexOf(':'); - newname = (index < 0) ? qname : qname.substring(index + 1); - } - } - return newname; + int id = makeNodeIdentity(nodeHandle); + if (NULL == id) { return null; } + Node newnode = m_nodes.get(id); + String newname = newnode.getLocalName(); + if (null == newname) { + // XSLT treats PIs, and possibly other things, as having QNames. + String qname = newnode.getNodeName(); + if ('#' == qname.charAt(0)) { + // Match old default for this function + // This conversion may or may not be necessary + newname = ""; + } else { + int index = qname.indexOf(':'); + newname = (index < 0) ? qname : qname.substring(index + 1); + } + } + return newname; // } // else // { @@ -1062,77 +1034,76 @@ public String getLocalName(int nodeHandle) // } // return name; // } - } + } + + /** + * Given a namespace handle, return the prefix that the namespace decl is + * mapping. + * Given a node handle, return the prefix used to map to the namespace. + * + *

    %REVIEW% Are you sure you want "" for no prefix?

    + *

    %REVIEW-COMMENT% I think so... not totally sure. -sb

    + * + * @param nodeHandle the id of the node. + * @return String prefix of this node's name, or "" if no explicit + * namespace prefix was given. + */ + public String + getPrefix(int nodeHandle) + { + + String prefix; + short type = getNodeType(nodeHandle); + + switch (type) { + case DTM.NAMESPACE_NODE : { + Node node = getNode(nodeHandle); - /** - * Given a namespace handle, return the prefix that the namespace decl is - * mapping. - * Given a node handle, return the prefix used to map to the namespace. - * - *

    %REVIEW% Are you sure you want "" for no prefix?

    - *

    %REVIEW-COMMENT% I think so... not totally sure. -sb

    - * - * @param nodeHandle the id of the node. - * @return String prefix of this node's name, or "" if no explicit - * namespace prefix was given. - */ - public String getPrefix(int nodeHandle) - { - - String prefix; - short type = getNodeType(nodeHandle); - - switch (type) - { - case DTM.NAMESPACE_NODE : - { - Node node = getNode(nodeHandle); - - // assume not null. - String qname = node.getNodeName(); - int index = qname.indexOf(':'); - - prefix = (index < 0) ? "" : qname.substring(index + 1); - } - break; - case DTM.ATTRIBUTE_NODE : - case DTM.ELEMENT_NODE : - { - Node node = getNode(nodeHandle); + // assume not null. + String qname = node.getNodeName(); + int index = qname.indexOf(':'); - // assume not null. - String qname = node.getNodeName(); - int index = qname.indexOf(':'); + prefix = (index < 0) ? "" : qname.substring(index + 1); + } + break; + case DTM.ATTRIBUTE_NODE : + case DTM.ELEMENT_NODE : { + Node node = getNode(nodeHandle); - prefix = (index < 0) ? "" : qname.substring(0, index); - } - break; - default : - prefix = ""; - } + // assume not null. + String qname = node.getNodeName(); + int index = qname.indexOf(':'); - return prefix; + prefix = (index < 0) ? "" : qname.substring(0, index); + } + break; + default : + prefix = ""; } - /** - * Given a node handle, return its DOM-style namespace URI - * (As defined in Namespaces, this is the declared URI which this node's - * prefix -- or default in lieu thereof -- was mapped to.) - * - *

    %REVIEW% Null or ""? -sb

    - * - * @param nodeHandle the id of the node. - * @return String URI value of this node's namespace, or null if no - * namespace was resolved. - */ - public String getNamespaceURI(int nodeHandle) - { + return prefix; + } + + /** + * Given a node handle, return its DOM-style namespace URI + * (As defined in Namespaces, this is the declared URI which this node's + * prefix -- or default in lieu thereof -- was mapped to.) + * + *

    %REVIEW% Null or ""? -sb

    + * + * @param nodeHandle the id of the node. + * @return String URI value of this node's namespace, or null if no + * namespace was resolved. + */ + public String + getNamespaceURI(int nodeHandle) + { // if(JJK_NEWCODE) // { - int id=makeNodeIdentity(nodeHandle); - if(id==NULL) return null; - Node node=(Node)m_nodes.get(id); - return node.getNamespaceURI(); + int id = makeNodeIdentity(nodeHandle); + if (id == NULL) { return null; } + Node node = m_nodes.get(id); + return node.getNamespaceURI(); // } // else // { @@ -1161,584 +1132,580 @@ public String getNamespaceURI(int nodeHandle) // // return nsuri; // } - } - - /** Utility function: Given a DOM Text node, determine whether it is - * logically followed by another Text or CDATASection node. This may - * involve traversing into Entity References. - * - * %REVIEW% DOM Level 3 is expected to add functionality which may - * allow us to retire this. - */ - private Node logicalNextDOMTextNode(Node n) - { - Node p=n.getNextSibling(); - if(p==null) - { - // Walk out of any EntityReferenceNodes that ended with text - for(n=n.getParentNode(); - n!=null && ENTITY_REFERENCE_NODE == n.getNodeType(); - n=n.getParentNode()) - { - p=n.getNextSibling(); - if(p!=null) - break; - } - } - n=p; - while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType()) - { - // Walk into any EntityReferenceNodes that start with text - if(n.hasChildNodes()) - n=n.getFirstChild(); - else - n=n.getNextSibling(); + } + + /** Utility function: Given a DOM Text node, determine whether it is + * logically followed by another Text or CDATASection node. This may + * involve traversing into Entity References. + * + * %REVIEW% DOM Level 3 is expected to add functionality which may + * allow us to retire this. + */ + private Node + logicalNextDOMTextNode(Node n) + { + Node p = n.getNextSibling(); + if (p == null) { + // Walk out of any EntityReferenceNodes that ended with text + for (n = n.getParentNode(); + n != null && ENTITY_REFERENCE_NODE == n.getNodeType(); + n = n.getParentNode()) { + p = n.getNextSibling(); + if (p != null) { + break; } - if(n!=null) - { - // Found a logical next sibling. Is it text? - int ntype=n.getNodeType(); - if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype) - n=null; - } - return n; + } } - - /** - * Given a node handle, return its node value. This is mostly - * as defined by the DOM, but may ignore some conveniences. - *

    - * - * @param nodeHandle The node id. - * @return String Value of this node, or null if not - * meaningful for this node type. - */ - public String getNodeValue(int nodeHandle) - { - // The _type(nodeHandle) call was taking the lion's share of our - // time, and was wrong anyway since it wasn't coverting handle to - // identity. Inlined it. - int type = _exptype(makeNodeIdentity(nodeHandle)); - type=(NULL != type) ? getNodeType(nodeHandle) : NULL; - - if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type) - return getNode(nodeHandle).getNodeValue(); - - // If this is a DTM text node, it may be made of multiple DOM text - // nodes -- including navigating into Entity References. DOM2DTM - // records the first node in the sequence and requires that we - // pick up the others when we retrieve the DTM node's value. - // - // %REVIEW% DOM Level 3 is expected to add a "whole text" - // retrieval method which performs this function for us. - Node node = getNode(nodeHandle); - Node n=logicalNextDOMTextNode(node); - if(n==null) - return node.getNodeValue(); - - FastStringBuffer buf = StringBufferPool.get(); - buf.append(node.getNodeValue()); - while(n!=null) - { - buf.append(n.getNodeValue()); - n=logicalNextDOMTextNode(n); - } - String s = (buf.length() > 0) ? buf.toString() : ""; - StringBufferPool.free(buf); - return s; + n = p; + while (n != null && ENTITY_REFERENCE_NODE == n.getNodeType()) { + // Walk into any EntityReferenceNodes that start with text + if (n.hasChildNodes()) { + n = n.getFirstChild(); + } else { + n = n.getNextSibling(); + } } - - /** - * A document type declaration information item has the following properties: - * - * 1. [system identifier] The system identifier of the external subset, if - * it exists. Otherwise this property has no value. - * - * @return the system identifier String object, or null if there is none. - */ - public String getDocumentTypeDeclarationSystemIdentifier() - { - - Document doc; - - if (m_root.getNodeType() == Node.DOCUMENT_NODE) - doc = (Document) m_root; - else - doc = m_root.getOwnerDocument(); - - if (null != doc) - { - DocumentType dtd = doc.getDoctype(); - - if (null != dtd) - { - return dtd.getSystemId(); - } - } - - return null; + if (n != null) { + // Found a logical next sibling. Is it text? + int ntype = n.getNodeType(); + if (TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype) { + n = null; + } } - - /** - * Return the public identifier of the external subset, - * normalized as described in 4.2.2 External Entities [XML]. If there is - * no external subset or if it has no public identifier, this property - * has no value. - * - * @return the public identifier String object, or null if there is none. - */ - public String getDocumentTypeDeclarationPublicIdentifier() - { - - Document doc; - - if (m_root.getNodeType() == Node.DOCUMENT_NODE) - doc = (Document) m_root; - else - doc = m_root.getOwnerDocument(); - - if (null != doc) - { - DocumentType dtd = doc.getDoctype(); - - if (null != dtd) - { - return dtd.getPublicId(); - } - } - - return null; + return n; + } + + /** + * Given a node handle, return its node value. This is mostly + * as defined by the DOM, but may ignore some conveniences. + *

    + * + * @param nodeHandle The node id. + * @return String Value of this node, or null if not + * meaningful for this node type. + */ + public String + getNodeValue(int nodeHandle) + { + // The _type(nodeHandle) call was taking the lion's share of our + // time, and was wrong anyway since it wasn't coverting handle to + // identity. Inlined it. + int type = _exptype(makeNodeIdentity(nodeHandle)); + type = (NULL != type) ? getNodeType(nodeHandle) : NULL; + + if (TEXT_NODE != type && CDATA_SECTION_NODE != type) { + return getNode(nodeHandle).getNodeValue(); } - /** - * Returns the Element whose ID is given by - * elementId. If no such element exists, returns - * DTM.NULL. Behavior is not defined if more than one element - * has this ID. Attributes (including those - * with the name "ID") are not of type ID unless so defined by DTD/Schema - * information available to the DTM implementation. - * Implementations that do not know whether attributes are of type ID or - * not are expected to return DTM.NULL. - * - *

    %REVIEW% Presumably IDs are still scoped to a single document, - * and this operation searches only within a single document, right? - * Wouldn't want collisions between DTMs in the same process.

    - * - * @param elementId The unique id value for an element. - * @return The handle of the matching element. - */ - public int getElementById(String elementId) - { - - Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) - ? (Document) m_root : m_root.getOwnerDocument(); - - if(null != doc) - { - Node elem = doc.getElementById(elementId); - if(null != elem) - { - int elemHandle = getHandleFromNode(elem); - - if(DTM.NULL == elemHandle) - { - int identity = m_nodes.size()-1; - while (DTM.NULL != (identity = getNextNodeIdentity(identity))) - { - Node node = getNode(identity); - if(node == elem) - { - elemHandle = getHandleFromNode(elem); - break; - } - } - } - - return elemHandle; - } - - } - return DTM.NULL; + // If this is a DTM text node, it may be made of multiple DOM text + // nodes -- including navigating into Entity References. DOM2DTM + // records the first node in the sequence and requires that we + // pick up the others when we retrieve the DTM node's value. + // + // %REVIEW% DOM Level 3 is expected to add a "whole text" + // retrieval method which performs this function for us. + Node node = getNode(nodeHandle); + Node n = logicalNextDOMTextNode(node); + if (n == null) { + return node.getNodeValue(); } - /** - * The getUnparsedEntityURI function returns the URI of the unparsed - * entity with the specified name in the same document as the context - * node (see [3.3 Unparsed Entities]). It returns the empty string if - * there is no such entity. - *

    - * XML processors may choose to use the System Identifier (if one - * is provided) to resolve the entity, rather than the URI in the - * Public Identifier. The details are dependent on the processor, and - * we would have to support some form of plug-in resolver to handle - * this properly. Currently, we simply return the System Identifier if - * present, and hope that it a usable URI or that our caller can - * map it to one. - * TODO: Resolve Public Identifiers... or consider changing function name. - *

    - * If we find a relative URI - * reference, XML expects it to be resolved in terms of the base URI - * of the document. The DOM doesn't do that for us, and it isn't - * entirely clear whether that should be done here; currently that's - * pushed up to a higher level of our application. (Note that DOM Level - * 1 didn't store the document's base URI.) - * TODO: Consider resolving Relative URIs. - *

    - * (The DOM's statement that "An XML processor may choose to - * completely expand entities before the structure model is passed - * to the DOM" refers only to parsed entities, not unparsed, and hence - * doesn't affect this function.) - * - * @param name A string containing the Entity Name of the unparsed - * entity. - * - * @return String containing the URI of the Unparsed Entity, or an - * empty string if no such entity exists. - */ - public String getUnparsedEntityURI(String name) - { - - String url = ""; - Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) - ? (Document) m_root : m_root.getOwnerDocument(); - - if (null != doc) - { - DocumentType doctype = doc.getDoctype(); - - if (null != doctype) - { - NamedNodeMap entities = doctype.getEntities(); - if(null == entities) - return url; - Entity entity = (Entity) entities.getNamedItem(name); - if(null == entity) - return url; - - String notationName = entity.getNotationName(); - - if (null != notationName) // then it's unparsed - { - // The draft says: "The XSLT processor may use the public - // identifier to generate a URI for the entity instead of the URI - // specified in the system identifier. If the XSLT processor does - // not use the public identifier to generate the URI, it must use - // the system identifier; if the system identifier is a relative - // URI, it must be resolved into an absolute URI using the URI of - // the resource containing the entity declaration as the base - // URI [RFC2396]." - // So I'm falling a bit short here. - url = entity.getSystemId(); - - if (null == url) - { - url = entity.getPublicId(); - } - else - { - // This should be resolved to an absolute URL, but that's hard - // to do from here. - } - } - } - } - - return url; + FastStringBuffer buf = StringBufferPool.get(); + buf.append(node.getNodeValue()); + while (n != null) { + buf.append(n.getNodeValue()); + n = logicalNextDOMTextNode(n); } - - /** - * 5. [specified] A flag indicating whether this attribute was actually - * specified in the start-tag of its element, or was defaulted from the - * DTD. - * - * @param attributeHandle the attribute handle - * @return true if the attribute was specified; - * false if it was defaulted. - */ - public boolean isAttributeSpecified(int attributeHandle) - { - int type = getNodeType(attributeHandle); - - if (DTM.ATTRIBUTE_NODE == type) - { - Attr attr = (Attr)getNode(attributeHandle); - return attr.getSpecified(); - } - return false; + String s = (buf.length() > 0) ? buf.toString() : ""; + StringBufferPool.free(buf); + return s; + } + + /** + * A document type declaration information item has the following properties: + * + * 1. [system identifier] The system identifier of the external subset, if + * it exists. Otherwise this property has no value. + * + * @return the system identifier String object, or null if there is none. + */ + public String + getDocumentTypeDeclarationSystemIdentifier() + { + + Document doc; + + if (m_root.getNodeType() == Node.DOCUMENT_NODE) { + doc = (Document) m_root; + } else { + doc = m_root.getOwnerDocument(); } - /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since - * we're wrapped around an existing DOM. - * - * @param source The IncrementalSAXSource that we want to recieve events from - * on demand. - */ - public void setIncrementalSAXSource(IncrementalSAXSource source) - { - } + if (null != doc) { + DocumentType dtd = doc.getDoctype(); - /** getContentHandler returns "our SAX builder" -- the thing that - * someone else should send SAX events to in order to extend this - * DTM model. - * - * @return null if this model doesn't respond to SAX events, - * "this" if the DTM object has a built-in SAX ContentHandler, - * the IncrmentalSAXSource if we're bound to one and should receive - * the SAX stream via it for incremental build purposes... - * */ - public org.xml.sax.ContentHandler getContentHandler() - { - return null; + if (null != dtd) { + return dtd.getSystemId(); + } } - /** - * Return this DTM's lexical handler. - * - * %REVIEW% Should this return null if constrution already done/begun? - * - * @return null if this model doesn't respond to lexical SAX events, - * "this" if the DTM object has a built-in SAX ContentHandler, - * the IncrementalSAXSource if we're bound to one and should receive - * the SAX stream via it for incremental build purposes... - */ - public org.xml.sax.ext.LexicalHandler getLexicalHandler() - { - - return null; + return null; + } + + /** + * Return the public identifier of the external subset, + * normalized as described in 4.2.2 External Entities [XML]. If there is + * no external subset or if it has no public identifier, this property + * has no value. + * + * @return the public identifier String object, or null if there is none. + */ + public String + getDocumentTypeDeclarationPublicIdentifier() + { + + Document doc; + + if (m_root.getNodeType() == Node.DOCUMENT_NODE) { + doc = (Document) m_root; + } else { + doc = m_root.getOwnerDocument(); } + if (null != doc) { + DocumentType dtd = doc.getDoctype(); - /** - * Return this DTM's EntityResolver. - * - * @return null if this model doesn't respond to SAX entity ref events. - */ - public org.xml.sax.EntityResolver getEntityResolver() - { - - return null; + if (null != dtd) { + return dtd.getPublicId(); + } } - /** - * Return this DTM's DTDHandler. - * - * @return null if this model doesn't respond to SAX dtd events. - */ - public org.xml.sax.DTDHandler getDTDHandler() - { - - return null; - } + return null; + } + + /** + * Returns the Element whose ID is given by + * elementId. If no such element exists, returns + * DTM.NULL. Behavior is not defined if more than one element + * has this ID. Attributes (including those + * with the name "ID") are not of type ID unless so defined by DTD/Schema + * information available to the DTM implementation. + * Implementations that do not know whether attributes are of type ID or + * not are expected to return DTM.NULL. + * + *

    %REVIEW% Presumably IDs are still scoped to a single document, + * and this operation searches only within a single document, right? + * Wouldn't want collisions between DTMs in the same process.

    + * + * @param elementId The unique id value for an element. + * @return The handle of the matching element. + */ + public int + getElementById(String elementId) + { + + Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) + ? (Document) m_root : m_root.getOwnerDocument(); + + if (null != doc) { + Node elem = doc.getElementById(elementId); + if (null != elem) { + int elemHandle = getHandleFromNode(elem); + + if (DTM.NULL == elemHandle) { + int identity = m_nodes.size() - 1; + while (DTM.NULL != (identity = getNextNodeIdentity(identity))) { + Node node = getNode(identity); + if (node == elem) { + elemHandle = getHandleFromNode(elem); + break; + } + } + } - /** - * Return this DTM's ErrorHandler. - * - * @return null if this model doesn't respond to SAX error events. - */ - public org.xml.sax.ErrorHandler getErrorHandler() - { + return elemHandle; + } - return null; } + return DTM.NULL; + } + + /** + * The getUnparsedEntityURI function returns the URI of the unparsed + * entity with the specified name in the same document as the context + * node (see [3.3 Unparsed Entities]). It returns the empty string if + * there is no such entity. + *

    + * XML processors may choose to use the System Identifier (if one + * is provided) to resolve the entity, rather than the URI in the + * Public Identifier. The details are dependent on the processor, and + * we would have to support some form of plug-in resolver to handle + * this properly. Currently, we simply return the System Identifier if + * present, and hope that it a usable URI or that our caller can + * map it to one. + * TODO: Resolve Public Identifiers... or consider changing function name. + *

    + * If we find a relative URI + * reference, XML expects it to be resolved in terms of the base URI + * of the document. The DOM doesn't do that for us, and it isn't + * entirely clear whether that should be done here; currently that's + * pushed up to a higher level of our application. (Note that DOM Level + * 1 didn't store the document's base URI.) + * TODO: Consider resolving Relative URIs. + *

    + * (The DOM's statement that "An XML processor may choose to + * completely expand entities before the structure model is passed + * to the DOM" refers only to parsed entities, not unparsed, and hence + * doesn't affect this function.) + * + * @param name A string containing the Entity Name of the unparsed + * entity. + * + * @return String containing the URI of the Unparsed Entity, or an + * empty string if no such entity exists. + */ + public String + getUnparsedEntityURI(String name) + { + + String url = ""; + Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) + ? (Document) m_root : m_root.getOwnerDocument(); + + if (null != doc) { + DocumentType doctype = doc.getDoctype(); + + if (null != doctype) { + NamedNodeMap entities = doctype.getEntities(); + if (null == entities) { + return url; + } + Entity entity = (Entity) entities.getNamedItem(name); + if (null == entity) { + return url; + } - /** - * Return this DTM's DeclHandler. - * - * @return null if this model doesn't respond to SAX Decl events. - */ - public org.xml.sax.ext.DeclHandler getDeclHandler() - { - - return null; + String notationName = entity.getNotationName(); + + if (null != notationName) { // then it's unparsed + // The draft says: "The XSLT processor may use the public + // identifier to generate a URI for the entity instead of the URI + // specified in the system identifier. If the XSLT processor does + // not use the public identifier to generate the URI, it must use + // the system identifier; if the system identifier is a relative + // URI, it must be resolved into an absolute URI using the URI of + // the resource containing the entity declaration as the base + // URI [RFC2396]." + // So I'm falling a bit short here. + url = entity.getSystemId(); + + if (null == url) { + url = entity.getPublicId(); + } else { + // This should be resolved to an absolute URL, but that's hard + // to do from here. + } + } + } } - /** @return true iff we're building this model incrementally (eg - * we're partnered with a IncrementalSAXSource) and thus require that the - * transformation and the parse run simultaneously. Guidance to the - * DTMManager. - * */ - public boolean needsTwoThreads() - { - return false; + return url; + } + + /** + * 5. [specified] A flag indicating whether this attribute was actually + * specified in the start-tag of its element, or was defaulted from the + * DTD. + * + * @param attributeHandle the attribute handle + * @return true if the attribute was specified; + * false if it was defaulted. + */ + public boolean + isAttributeSpecified(int attributeHandle) + { + int type = getNodeType(attributeHandle); + + if (DTM.ATTRIBUTE_NODE == type) { + Attr attr = (Attr)getNode(attributeHandle); + return attr.getSpecified(); } - - // ========== Direct SAX Dispatch, for optimization purposes ======== - - /** - * Returns whether the specified ch conforms to the XML 1.0 definition - * of whitespace. Refer to - * the definition of S for details. - * @param ch Character to check as XML whitespace. - * @return =true if ch is XML whitespace; otherwise =false. - */ - private static boolean isSpace(char ch) - { - return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now. + return false; + } + + /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since + * we're wrapped around an existing DOM. + * + * @param source The IncrementalSAXSource that we want to recieve events from + * on demand. + */ + public void + setIncrementalSAXSource(IncrementalSAXSource source) + { + } + + /** getContentHandler returns "our SAX builder" -- the thing that + * someone else should send SAX events to in order to extend this + * DTM model. + * + * @return null if this model doesn't respond to SAX events, + * "this" if the DTM object has a built-in SAX ContentHandler, + * the IncrmentalSAXSource if we're bound to one and should receive + * the SAX stream via it for incremental build purposes... + * */ + public org.xml.sax.ContentHandler + getContentHandler() + { + return null; + } + + /** + * Return this DTM's lexical handler. + * + * %REVIEW% Should this return null if constrution already done/begun? + * + * @return null if this model doesn't respond to lexical SAX events, + * "this" if the DTM object has a built-in SAX ContentHandler, + * the IncrementalSAXSource if we're bound to one and should receive + * the SAX stream via it for incremental build purposes... + */ + public org.xml.sax.ext.LexicalHandler + getLexicalHandler() + { + + return null; + } + + + /** + * Return this DTM's EntityResolver. + * + * @return null if this model doesn't respond to SAX entity ref events. + */ + public org.xml.sax.EntityResolver + getEntityResolver() + { + + return null; + } + + /** + * Return this DTM's DTDHandler. + * + * @return null if this model doesn't respond to SAX dtd events. + */ + public org.xml.sax.DTDHandler + getDTDHandler() + { + + return null; + } + + /** + * Return this DTM's ErrorHandler. + * + * @return null if this model doesn't respond to SAX error events. + */ + public org.xml.sax.ErrorHandler + getErrorHandler() + { + + return null; + } + + /** + * Return this DTM's DeclHandler. + * + * @return null if this model doesn't respond to SAX Decl events. + */ + public org.xml.sax.ext.DeclHandler + getDeclHandler() + { + + return null; + } + + /** @return true iff we're building this model incrementally (eg + * we're partnered with a IncrementalSAXSource) and thus require that the + * transformation and the parse run simultaneously. Guidance to the + * DTMManager. + * */ + public boolean + needsTwoThreads() + { + return false; + } + + // ========== Direct SAX Dispatch, for optimization purposes ======== + + /** + * Returns whether the specified ch conforms to the XML 1.0 definition + * of whitespace. Refer to + * the definition of S for details. + * @param ch Character to check as XML whitespace. + * @return =true if ch is XML whitespace; otherwise =false. + */ + private static boolean + isSpace(char ch) + { + return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now. + } + + /** + * Directly call the + * characters method on the passed ContentHandler for the + * string-value of the given node (see http://www.w3.org/TR/xpath#data-model + * for the definition of a node's string-value). Multiple calls to the + * ContentHandler's characters methods may well occur for a single call to + * this method. + * + * @param nodeHandle The node ID. + * @param ch A non-null reference to a ContentHandler. + * + * @throws org.xml.sax.SAXException + */ + public void + dispatchCharactersEvents( + int nodeHandle, org.xml.sax.ContentHandler ch, + boolean normalize) + throws org.xml.sax.SAXException + { + if (normalize) { + XMLString str = getStringValue(nodeHandle); + str = str.fixWhiteSpace(true, true, false); + str.dispatchCharactersEvents(ch); + } else { + int type = getNodeType(nodeHandle); + Node node = getNode(nodeHandle); + dispatchNodeData(node, ch, 0); + // Text coalition -- a DTM text node may represent multiple + // DOM nodes. + if (TEXT_NODE == type || CDATA_SECTION_NODE == type) { + while (null != (node = logicalNextDOMTextNode(node))) { + dispatchNodeData(node, ch, 0); + } + } } - - /** - * Directly call the - * characters method on the passed ContentHandler for the - * string-value of the given node (see http://www.w3.org/TR/xpath#data-model - * for the definition of a node's string-value). Multiple calls to the - * ContentHandler's characters methods may well occur for a single call to - * this method. - * - * @param nodeHandle The node ID. - * @param ch A non-null reference to a ContentHandler. - * - * @throws org.xml.sax.SAXException - */ - public void dispatchCharactersEvents( - int nodeHandle, org.xml.sax.ContentHandler ch, - boolean normalize) - throws org.xml.sax.SAXException - { - if(normalize) - { - XMLString str = getStringValue(nodeHandle); - str = str.fixWhiteSpace(true, true, false); - str.dispatchCharactersEvents(ch); + } + + /** + * Retrieve the text content of a DOM subtree, appending it into a + * user-supplied FastStringBuffer object. Note that attributes are + * not considered part of the content of an element. + *

    + * There are open questions regarding whitespace stripping. + * Currently we make no special effort in that regard, since the standard + * DOM doesn't yet provide DTD-based information to distinguish + * whitespace-in-element-context from genuine #PCDATA. Note that we + * should probably also consider xml:space if/when we address this. + * DOM Level 3 may solve the problem for us. + *

    + * %REVIEW% Note that as a DOM-level operation, it can be argued that this + * routine _shouldn't_ perform any processing beyond what the DOM already + * does, and that whitespace stripping and so on belong at the DTM level. + * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM. + * + * @param node Node whose subtree is to be walked, gathering the + * contents of all Text or CDATASection nodes. + */ + @SuppressWarnings("fallthrough") + protected static void + dispatchNodeData(Node node, + org.xml.sax.ContentHandler ch, + int depth) + throws org.xml.sax.SAXException + { + + switch (node.getNodeType()) { + case Node.DOCUMENT_FRAGMENT_NODE : + case Node.DOCUMENT_NODE : + case Node.ELEMENT_NODE : { + for (Node child = node.getFirstChild(); null != child; + child = child.getNextSibling()) { + dispatchNodeData(child, ch, depth + 1); } - else - { - int type = getNodeType(nodeHandle); - Node node = getNode(nodeHandle); - dispatchNodeData(node, ch, 0); - // Text coalition -- a DTM text node may represent multiple - // DOM nodes. - if(TEXT_NODE == type || CDATA_SECTION_NODE == type) - { - while( null != (node=logicalNextDOMTextNode(node)) ) - { - dispatchNodeData(node, ch, 0); - } - } + } + break; + case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW% + case Node.COMMENT_NODE : + if (0 != depth) { + break; } - } - - /** - * Retrieve the text content of a DOM subtree, appending it into a - * user-supplied FastStringBuffer object. Note that attributes are - * not considered part of the content of an element. - *

    - * There are open questions regarding whitespace stripping. - * Currently we make no special effort in that regard, since the standard - * DOM doesn't yet provide DTD-based information to distinguish - * whitespace-in-element-context from genuine #PCDATA. Note that we - * should probably also consider xml:space if/when we address this. - * DOM Level 3 may solve the problem for us. - *

    - * %REVIEW% Note that as a DOM-level operation, it can be argued that this - * routine _shouldn't_ perform any processing beyond what the DOM already - * does, and that whitespace stripping and so on belong at the DTM level. - * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM. - * - * @param node Node whose subtree is to be walked, gathering the - * contents of all Text or CDATASection nodes. - */ - protected static void dispatchNodeData(Node node, - org.xml.sax.ContentHandler ch, - int depth) - throws org.xml.sax.SAXException - { - - switch (node.getNodeType()) - { - case Node.DOCUMENT_FRAGMENT_NODE : - case Node.DOCUMENT_NODE : - case Node.ELEMENT_NODE : - { - for (Node child = node.getFirstChild(); null != child; - child = child.getNextSibling()) - { - dispatchNodeData(child, ch, depth+1); - } - } - break; - case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW% - case Node.COMMENT_NODE : - if(0 != depth) - break; - // NOTE: Because this operation works in the DOM space, it does _not_ attempt - // to perform Text Coalition. That should only be done in DTM space. - case Node.TEXT_NODE : - case Node.CDATA_SECTION_NODE : - case Node.ATTRIBUTE_NODE : - String str = node.getNodeValue(); - if(ch instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler) - { - ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)ch).characters(node); - } - else - { - ch.characters(str.toCharArray(), 0, str.length()); - } - break; + // NOTE: Because this operation works in the DOM space, it does _not_ attempt + // to perform Text Coalition. That should only be done in DTM space. + case Node.TEXT_NODE : + case Node.CDATA_SECTION_NODE : + case Node.ATTRIBUTE_NODE : + String str = node.getNodeValue(); + if (ch instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler) { + ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)ch).characters(node); + } else { + ch.characters(str.toCharArray(), 0, str.length()); + } + break; // /* case Node.PROCESSING_INSTRUCTION_NODE : // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); // break; */ - default : - // ignore - break; - } - } - - TreeWalker m_walker = new TreeWalker(null); - - /** - * Directly create SAX parser events from a subtree. - * - * @param nodeHandle The node ID. - * @param ch A non-null reference to a ContentHandler. - * - * @throws org.xml.sax.SAXException - */ - public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch) - throws org.xml.sax.SAXException - { - TreeWalker treeWalker = m_walker; - ContentHandler prevCH = treeWalker.getContentHandler(); - - if(null != prevCH) - { - treeWalker = new TreeWalker(null); - } - treeWalker.setContentHandler(ch); - - try - { - Node node = getNode(nodeHandle); - treeWalker.traverseFragment(node); - } - finally - { - treeWalker.setContentHandler(null); - } + default : + // ignore + break; } - - /** - * For the moment all the run time properties are ignored by this - * class. - * - * @param property a String value - * @param value an Object value - */ - public void setProperty(String property, Object value) - { + } + + TreeWalker m_walker = new TreeWalker(null); + + /** + * Directly create SAX parser events from a subtree. + * + * @param nodeHandle The node ID. + * @param ch A non-null reference to a ContentHandler. + * + * @throws org.xml.sax.SAXException + */ + public void + dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch) + throws org.xml.sax.SAXException + { + TreeWalker treeWalker = m_walker; + ContentHandler prevCH = treeWalker.getContentHandler(); + + if (null != prevCH) { + treeWalker = new TreeWalker(null); } + treeWalker.setContentHandler(ch); - /** - * No source information is available for DOM2DTM, so return - * null here. - * - * @param node an int value - * @return null - */ - public SourceLocator getSourceLocatorFor(int node) - { - return null; + try { + Node node = getNode(nodeHandle); + treeWalker.traverseFragment(node); + } finally { + treeWalker.setContentHandler(null); } + } + + /** + * For the moment all the run time properties are ignored by this + * class. + * + * @param property a String value + * @param value an Object value + */ + public void + setProperty(String property, Object value) + { + } + + /** + * No source information is available for DOM2DTM, so return + * null here. + * + * @param node an int value + * @return null + */ + public SourceLocator + getSourceLocatorFor(int node) + { + return null; + } } diff --git a/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java b/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java index 42a91c52bd..232deb653d 100644 --- a/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +++ b/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java @@ -1,7 +1,3 @@ -/* - * reserved comment block - * DO NOT REMOVE OR ALTER! - */ /* * Copyright 1999-2004 The Apache Software Foundation. * @@ -57,239 +53,279 @@ * in DTM space, but given how DOM2DTM is currently written I think * this is simplest. * */ -public class DOM2DTMdefaultNamespaceDeclarationNode implements Attr,TypeInfo +public class DOM2DTMdefaultNamespaceDeclarationNode implements Attr, TypeInfo { - final String NOT_SUPPORTED_ERR="Unsupported operation on pseudonode"; + final String NOT_SUPPORTED_ERR = "Unsupported operation on pseudonode"; Element pseudoparent; - String prefix,uri,nodename; + String prefix, uri, nodename; int handle; - DOM2DTMdefaultNamespaceDeclarationNode(Element pseudoparent,String prefix,String uri,int handle) + DOM2DTMdefaultNamespaceDeclarationNode(Element pseudoparent, String prefix, String uri, int handle) { - this.pseudoparent=pseudoparent; - this.prefix=prefix; - this.uri=uri; - this.handle=handle; - this.nodename="xmlns:"+prefix; + this.pseudoparent = pseudoparent; + this.prefix = prefix; + this.uri = uri; + this.handle = handle; + this.nodename = "xmlns:" + prefix; + } + public String + getNodeName() {return nodename;} + public String + getName() {return nodename;} + public String + getNamespaceURI() {return "http://www.w3.org/2000/xmlns/";} + public String + getPrefix() {return prefix;} + public String + getLocalName() {return prefix;} + public String + getNodeValue() {return uri;} + public String + getValue() {return uri;} + public Element + getOwnerElement() {return pseudoparent;} + + public boolean + isSupported(String feature, String version) {return false;} + public boolean + hasChildNodes() {return false;} + public boolean + hasAttributes() {return false;} + public Node + getParentNode() {return null;} + public Node + getFirstChild() {return null;} + public Node + getLastChild() {return null;} + public Node + getPreviousSibling() {return null;} + public Node + getNextSibling() {return null;} + public boolean + getSpecified() {return false;} + public void + normalize() {return;} + public NodeList + getChildNodes() {return null;} + public NamedNodeMap + getAttributes() {return null;} + public short + getNodeType() {return Node.ATTRIBUTE_NODE;} + public void + setNodeValue(String value) {throw new DTMException(NOT_SUPPORTED_ERR);} + public void + setValue(String value) {throw new DTMException(NOT_SUPPORTED_ERR);} + public void + setPrefix(String value) {throw new DTMException(NOT_SUPPORTED_ERR);} + public Node + insertBefore(Node a, Node b) {throw new DTMException(NOT_SUPPORTED_ERR);} + public Node + replaceChild(Node a, Node b) {throw new DTMException(NOT_SUPPORTED_ERR);} + public Node + appendChild(Node a) {throw new DTMException(NOT_SUPPORTED_ERR);} + public Node + removeChild(Node a) {throw new DTMException(NOT_SUPPORTED_ERR);} + public Document + getOwnerDocument() {return pseudoparent.getOwnerDocument();} + public Node + cloneNode(boolean deep) {throw new DTMException(NOT_SUPPORTED_ERR);} + + /** Non-DOM method, part of the temporary kluge + * %REVIEW% This would be a pruning problem, but since it will always be + * added to the root element and we prune on elements, we shouldn't have + * to worry. + */ + public int + getHandleOfNode() + { + return handle; } - public String getNodeName() {return nodename;} - public String getName() {return nodename;} - public String getNamespaceURI() {return "http://www.w3.org/2000/xmlns/";} - public String getPrefix() {return prefix;} - public String getLocalName() {return prefix;} - public String getNodeValue() {return uri;} - public String getValue() {return uri;} - public Element getOwnerElement() {return pseudoparent;} - - public boolean isSupported(String feature, String version) {return false;} - public boolean hasChildNodes() {return false;} - public boolean hasAttributes() {return false;} - public Node getParentNode() {return null;} - public Node getFirstChild() {return null;} - public Node getLastChild() {return null;} - public Node getPreviousSibling() {return null;} - public Node getNextSibling() {return null;} - public boolean getSpecified() {return false;} - public void normalize() {return;} - public NodeList getChildNodes() {return null;} - public NamedNodeMap getAttributes() {return null;} - public short getNodeType() {return Node.ATTRIBUTE_NODE;} - public void setNodeValue(String value) {throw new DTMException(NOT_SUPPORTED_ERR);} - public void setValue(String value) {throw new DTMException(NOT_SUPPORTED_ERR);} - public void setPrefix(String value) {throw new DTMException(NOT_SUPPORTED_ERR);} - public Node insertBefore(Node a, Node b) {throw new DTMException(NOT_SUPPORTED_ERR);} - public Node replaceChild(Node a, Node b) {throw new DTMException(NOT_SUPPORTED_ERR);} - public Node appendChild(Node a) {throw new DTMException(NOT_SUPPORTED_ERR);} - public Node removeChild(Node a) {throw new DTMException(NOT_SUPPORTED_ERR);} - public Document getOwnerDocument() {return pseudoparent.getOwnerDocument();} - public Node cloneNode(boolean deep) {throw new DTMException(NOT_SUPPORTED_ERR);} - - /** Non-DOM method, part of the temporary kluge - * %REVIEW% This would be a pruning problem, but since it will always be - * added to the root element and we prune on elements, we shouldn't have - * to worry. - */ - public int getHandleOfNode() - { - return handle; - } - //RAMESH: PENDING=> Add proper implementation for the below DOM L3 additions + //RAMESH: PENDING=> Add proper implementation for the below DOM L3 additions + + /** + * @see org.w3c.dom.TypeInfo#getTypeName() + */ + public String + getTypeName() {return null; } + + /** + * @see org.w3c.dom.TypeInfo#getTypeNamespace() + */ + public String + getTypeNamespace() { return null;} + + /** + * @see or.gw3c.dom.TypeInfo#isDerivedFrom(String,String,int) + */ + public boolean + isDerivedFrom(String ns, String localName, int derivationMethod) + { + return false; + } - /** - * @see org.w3c.dom.TypeInfo#getTypeName() - */ - public String getTypeName() {return null; } + public TypeInfo + getSchemaTypeInfo() { return this; } + + public boolean + isId() { return false; } + + /** + * Associate an object to a key on this node. The object can later be + * retrieved from this node by calling getUserData with the + * same key. + * @param key The key to associate the object to. + * @param data The object to associate to the given key, or + * null to remove any existing association to that key. + * @param handler The handler to associate to that key, or + * null. + * @return Returns the DOMObject previously associated to + * the given key on this node, or null if there was none. + * @since DOM Level 3 + */ + public Object + setUserData(String key, + Object data, + UserDataHandler handler) + { + return getOwnerDocument().setUserData(key, data, handler); + } - /** - * @see org.w3c.dom.TypeInfo#getTypeNamespace() - */ - public String getTypeNamespace() { return null;} + /** + * Retrieves the object associated to a key on a this node. The object + * must first have been set to this node by calling + * setUserData with the same key. + * @param key The key the object is associated to. + * @return Returns the DOMObject associated to the given key + * on this node, or null if there was none. + * @since DOM Level 3 + */ + public Object + getUserData(String key) + { + return getOwnerDocument().getUserData(key); + } + + /** + * This method returns a specialized object which implements the + * specialized APIs of the specified feature and version. The + * specialized object may also be obtained by using binding-specific + * casting methods but is not necessarily expected to, as discussed in Mixed DOM implementations. + * @param feature The name of the feature requested (case-insensitive). + * @param version This is the version number of the feature to test. If + * the version is null or the empty string, supporting + * any version of the feature will cause the method to return an + * object that supports at least one version of the feature. + * @return Returns an object which implements the specialized APIs of + * the specified feature and version, if any, or null if + * there is no object which implements interfaces associated with that + * feature. If the DOMObject returned by this method + * implements the Node interface, it must delegate to the + * primary core Node and not return results inconsistent + * with the primary core Node such as attributes, + * childNodes, etc. + * @since DOM Level 3 + */ + public Object + getFeature(String feature, String version) + { + // we don't have any alternate node, either this node does the job + // or we don't have anything that does + return isSupported(feature, version) ? this : null; + } - /** - * @see or.gw3c.dom.TypeInfo#isDerivedFrom(String,String,int) - */ - public boolean isDerivedFrom( String ns, String localName, int derivationMethod ) { + /** + * Tests whether two nodes are equal. + *
    This method tests for equality of nodes, not sameness (i.e., + * whether the two nodes are references to the same object) which can be + * tested with Node.isSameNode. All nodes that are the same + * will also be equal, though the reverse may not be true. + *
    Two nodes are equal if and only if the following conditions are + * satisfied: The two nodes are of the same type.The following string + * attributes are equal: nodeName, localName, + * namespaceURI, prefix, nodeValue + * , baseURI. This is: they are both null, or + * they have the same length and are character for character identical. + * The attributes NamedNodeMaps are equal. + * This is: they are both null, or they have the same + * length and for each node that exists in one map there is a node that + * exists in the other map and is equal, although not necessarily at the + * same index.The childNodes NodeLists are + * equal. This is: they are both null, or they have the + * same length and contain equal nodes at the same index. This is true + * for Attr nodes as for any other type of node. Note that + * normalization can affect equality; to avoid this, nodes should be + * normalized before being compared. + *
    For two DocumentType nodes to be equal, the following + * conditions must also be satisfied: The following string attributes + * are equal: publicId, systemId, + * internalSubset.The entities + * NamedNodeMaps are equal.The notations + * NamedNodeMaps are equal. + *
    On the other hand, the following do not affect equality: the + * ownerDocument attribute, the specified + * attribute for Attr nodes, the + * isWhitespaceInElementContent attribute for + * Text nodes, as well as any user data or event listeners + * registered on the nodes. + * @param arg The node to compare equality with. + * @param deep If true, recursively compare the subtrees; if + * false, compare only the nodes themselves (and its + * attributes, if it is an Element). + * @return If the nodes, and possibly subtrees are equal, + * true otherwise false. + * @since DOM Level 3 + */ + public boolean + isEqualNode(Node arg) + { + if (arg == this) { + return true; + } + if (arg.getNodeType() != getNodeType()) { + return false; + } + // in theory nodeName can't be null but better be careful + // who knows what other implementations may be doing?... + if (getNodeName() == null) { + if (arg.getNodeName() != null) { return false; + } + } else if (!getNodeName().equals(arg.getNodeName())) { + return false; } - public TypeInfo getSchemaTypeInfo() { return this; } - - public boolean isId( ) { return false; } - - /** - * Associate an object to a key on this node. The object can later be - * retrieved from this node by calling getUserData with the - * same key. - * @param key The key to associate the object to. - * @param data The object to associate to the given key, or - * null to remove any existing association to that key. - * @param handler The handler to associate to that key, or - * null. - * @return Returns the DOMObject previously associated to - * the given key on this node, or null if there was none. - * @since DOM Level 3 - */ - public Object setUserData(String key, - Object data, - UserDataHandler handler) { - return getOwnerDocument().setUserData( key, data, handler); + if (getLocalName() == null) { + if (arg.getLocalName() != null) { + return false; + } + } else if (!getLocalName().equals(arg.getLocalName())) { + return false; } - /** - * Retrieves the object associated to a key on a this node. The object - * must first have been set to this node by calling - * setUserData with the same key. - * @param key The key the object is associated to. - * @return Returns the DOMObject associated to the given key - * on this node, or null if there was none. - * @since DOM Level 3 - */ - public Object getUserData(String key) { - return getOwnerDocument().getUserData( key); + if (getNamespaceURI() == null) { + if (arg.getNamespaceURI() != null) { + return false; + } + } else if (!getNamespaceURI().equals(arg.getNamespaceURI())) { + return false; } - /** - * This method returns a specialized object which implements the - * specialized APIs of the specified feature and version. The - * specialized object may also be obtained by using binding-specific - * casting methods but is not necessarily expected to, as discussed in Mixed DOM implementations. - * @param feature The name of the feature requested (case-insensitive). - * @param version This is the version number of the feature to test. If - * the version is null or the empty string, supporting - * any version of the feature will cause the method to return an - * object that supports at least one version of the feature. - * @return Returns an object which implements the specialized APIs of - * the specified feature and version, if any, or null if - * there is no object which implements interfaces associated with that - * feature. If the DOMObject returned by this method - * implements the Node interface, it must delegate to the - * primary core Node and not return results inconsistent - * with the primary core Node such as attributes, - * childNodes, etc. - * @since DOM Level 3 - */ - public Object getFeature(String feature, String version) { - // we don't have any alternate node, either this node does the job - // or we don't have anything that does - return isSupported(feature, version) ? this : null; + if (getPrefix() == null) { + if (arg.getPrefix() != null) { + return false; + } + } else if (!getPrefix().equals(arg.getPrefix())) { + return false; } - /** - * Tests whether two nodes are equal. - *
    This method tests for equality of nodes, not sameness (i.e., - * whether the two nodes are references to the same object) which can be - * tested with Node.isSameNode. All nodes that are the same - * will also be equal, though the reverse may not be true. - *
    Two nodes are equal if and only if the following conditions are - * satisfied: The two nodes are of the same type.The following string - * attributes are equal: nodeName, localName, - * namespaceURI, prefix, nodeValue - * , baseURI. This is: they are both null, or - * they have the same length and are character for character identical. - * The attributes NamedNodeMaps are equal. - * This is: they are both null, or they have the same - * length and for each node that exists in one map there is a node that - * exists in the other map and is equal, although not necessarily at the - * same index.The childNodes NodeLists are - * equal. This is: they are both null, or they have the - * same length and contain equal nodes at the same index. This is true - * for Attr nodes as for any other type of node. Note that - * normalization can affect equality; to avoid this, nodes should be - * normalized before being compared. - *
    For two DocumentType nodes to be equal, the following - * conditions must also be satisfied: The following string attributes - * are equal: publicId, systemId, - * internalSubset.The entities - * NamedNodeMaps are equal.The notations - * NamedNodeMaps are equal. - *
    On the other hand, the following do not affect equality: the - * ownerDocument attribute, the specified - * attribute for Attr nodes, the - * isWhitespaceInElementContent attribute for - * Text nodes, as well as any user data or event listeners - * registered on the nodes. - * @param arg The node to compare equality with. - * @param deep If true, recursively compare the subtrees; if - * false, compare only the nodes themselves (and its - * attributes, if it is an Element). - * @return If the nodes, and possibly subtrees are equal, - * true otherwise false. - * @since DOM Level 3 - */ - public boolean isEqualNode(Node arg) { - if (arg == this) { - return true; - } - if (arg.getNodeType() != getNodeType()) { - return false; - } - // in theory nodeName can't be null but better be careful - // who knows what other implementations may be doing?... - if (getNodeName() == null) { - if (arg.getNodeName() != null) { - return false; - } - } - else if (!getNodeName().equals(arg.getNodeName())) { - return false; - } - - if (getLocalName() == null) { - if (arg.getLocalName() != null) { - return false; - } - } - else if (!getLocalName().equals(arg.getLocalName())) { - return false; - } - - if (getNamespaceURI() == null) { - if (arg.getNamespaceURI() != null) { - return false; - } - } - else if (!getNamespaceURI().equals(arg.getNamespaceURI())) { - return false; - } - - if (getPrefix() == null) { - if (arg.getPrefix() != null) { - return false; - } - } - else if (!getPrefix().equals(arg.getPrefix())) { - return false; - } - - if (getNodeValue() == null) { - if (arg.getNodeValue() != null) { - return false; - } - } - else if (!getNodeValue().equals(arg.getNodeValue())) { - return false; - } + if (getNodeValue() == null) { + if (arg.getNodeValue() != null) { + return false; + } + } else if (!getNodeValue().equals(arg.getNodeValue())) { + return false; + } /* if (getBaseURI() == null) { if (((NodeImpl) arg).getBaseURI() != null) { @@ -299,387 +335,403 @@ else if (!getNodeValue().equals(arg.getNodeValue())) { else if (!getBaseURI().equals(((NodeImpl) arg).getBaseURI())) { return false; } -*/ - - return true; - } - - /** - * DOM Level 3 - Experimental: - * Look up the namespace URI associated to the given prefix, starting from this node. - * Use lookupNamespaceURI(null) to lookup the default namespace - * - * @param namespaceURI - * @return th URI for the namespace - * @since DOM Level 3 - */ - public String lookupNamespaceURI(String specifiedPrefix) { - short type = this.getNodeType(); - switch (type) { - case Node.ELEMENT_NODE : { - - String namespace = this.getNamespaceURI(); - String prefix = this.getPrefix(); - if (namespace !=null) { - // REVISIT: is it possible that prefix is empty string? - if (specifiedPrefix== null && prefix==specifiedPrefix) { - // looking for default namespace - return namespace; - } else if (prefix != null && prefix.equals(specifiedPrefix)) { - // non default namespace - return namespace; - } - } - if (this.hasAttributes()) { - NamedNodeMap map = this.getAttributes(); - int length = map.getLength(); - for (int i=0;inamespaceURI
    is the - * default namespace or not. - * @param namespaceURI The namespace URI to look for. - * @return true if the specified namespaceURI - * is the default namespace, false otherwise. - * @since DOM Level 3 - */ - public boolean isDefaultNamespace(String namespaceURI){ - /* - // REVISIT: remove casts when DOM L3 becomes REC. - short type = this.getNodeType(); - switch (type) { - case Node.ELEMENT_NODE: { - String namespace = this.getNamespaceURI(); - String prefix = this.getPrefix(); - - // REVISIT: is it possible that prefix is empty string? - if (prefix == null || prefix.length() == 0) { - if (namespaceURI == null) { - return (namespace == namespaceURI); - } - return namespaceURI.equals(namespace); - } - if (this.hasAttributes()) { - ElementImpl elem = (ElementImpl)this; - NodeImpl attr = (NodeImpl)elem.getAttributeNodeNS("http://www.w3.org/2000/xmlns/", "xmlns"); - if (attr != null) { - String value = attr.getNodeValue(); - if (namespaceURI == null) { - return (namespace == value); - } - return namespaceURI.equals(value); - } + if (this.hasAttributes()) { + NamedNodeMap map = this.getAttributes(); + int length = map.getLength(); + for (int i = 0; i < length; i++) { + Node attr = map.item(i); + String attrPrefix = attr.getPrefix(); + String value = attr.getNodeValue(); + namespace = attr.getNamespaceURI(); + if (namespace != null && namespace.equals("http://www.w3.org/2000/xmlns/")) { + // at this point we are dealing with DOM Level 2 nodes only + if (specifiedPrefix == null && + attr.getNodeName().equals("xmlns")) { + // default namespace + return value; + } else if (attrPrefix != null && + attrPrefix.equals("xmlns") && + attr.getLocalName().equals(specifiedPrefix)) { + // non default namespace + return value; + } } - - NodeImpl ancestor = (NodeImpl)getElementAncestor(this); - if (ancestor != null) { - return ancestor.isDefaultNamespace(namespaceURI); - } - return false; + } } - case Node.DOCUMENT_NODE:{ - return((NodeImpl)((Document)this).getDocumentElement()).isDefaultNamespace(namespaceURI); - } + /* + NodeImpl ancestor = (NodeImpl)getElementAncestor(this); + if (ancestor != null) { + return ancestor.lookupNamespaceURI(specifiedPrefix); + } + */ - case Node.ENTITY_NODE : - case Node.NOTATION_NODE: - case Node.DOCUMENT_FRAGMENT_NODE: - case Node.DOCUMENT_TYPE_NODE: - // type is unknown - return false; - case Node.ATTRIBUTE_NODE:{ - if (this.ownerNode.getNodeType() == Node.ELEMENT_NODE) { - return ownerNode.isDefaultNamespace(namespaceURI); + return null; - } - return false; - } - default:{ - NodeImpl ancestor = (NodeImpl)getElementAncestor(this); - if (ancestor != null) { - return ancestor.isDefaultNamespace(namespaceURI); - } - return false; - } - } -*/ - return false; + } + /* + case Node.DOCUMENT_NODE : { + return((NodeImpl)((Document)this).getDocumentElement()).lookupNamespaceURI(specifiedPrefix) ; + } + */ + case Node.ENTITY_NODE : + case Node.NOTATION_NODE: + case Node.DOCUMENT_FRAGMENT_NODE: + case Node.DOCUMENT_TYPE_NODE: + // type is unknown + return null; + case Node.ATTRIBUTE_NODE: { + if (this.getOwnerElement().getNodeType() == Node.ELEMENT_NODE) { + return getOwnerElement().lookupNamespaceURI(specifiedPrefix); + } + return null; + } + default: { + /* + NodeImpl ancestor = (NodeImpl)getElementAncestor(this); + if (ancestor != null) { + return ancestor.lookupNamespaceURI(specifiedPrefix); + } + */ + return null; + } } + } - /** - * - * DOM Level 3 - Experimental: - * Look up the prefix associated to the given namespace URI, starting from this node. - * - * @param namespaceURI - * @return the prefix for the namespace - */ - public String lookupPrefix(String namespaceURI){ - - // REVISIT: When Namespaces 1.1 comes out this may not be true - // Prefix can't be bound to null namespace - if (namespaceURI == null) { - return null; - } + /** + * DOM Level 3: Experimental + * This method checks if the specified namespaceURI is the + * default namespace or not. + * @param namespaceURI The namespace URI to look for. + * @return true if the specified namespaceURI + * is the default namespace, false otherwise. + * @since DOM Level 3 + */ + public boolean + isDefaultNamespace(String namespaceURI) + { + /* + // REVISIT: remove casts when DOM L3 becomes REC. + short type = this.getNodeType(); + switch (type) { + case Node.ELEMENT_NODE: { + String namespace = this.getNamespaceURI(); + String prefix = this.getPrefix(); + + // REVISIT: is it possible that prefix is empty string? + if (prefix == null || prefix.length() == 0) { + if (namespaceURI == null) { + return (namespace == namespaceURI); + } + return namespaceURI.equals(namespace); + } + if (this.hasAttributes()) { + ElementImpl elem = (ElementImpl)this; + NodeImpl attr = (NodeImpl)elem.getAttributeNodeNS("http://www.w3.org/2000/xmlns/", "xmlns"); + if (attr != null) { + String value = attr.getNodeValue(); + if (namespaceURI == null) { + return (namespace == value); + } + return namespaceURI.equals(value); + } + } - short type = this.getNodeType(); + NodeImpl ancestor = (NodeImpl)getElementAncestor(this); + if (ancestor != null) { + return ancestor.isDefaultNamespace(namespaceURI); + } + return false; + } + case Node.DOCUMENT_NODE:{ + return((NodeImpl)((Document)this).getDocumentElement()).isDefaultNamespace(namespaceURI); + } - switch (type) { -/* - case Node.ELEMENT_NODE: { + case Node.ENTITY_NODE : + case Node.NOTATION_NODE: + case Node.DOCUMENT_FRAGMENT_NODE: + case Node.DOCUMENT_TYPE_NODE: + // type is unknown + return false; + case Node.ATTRIBUTE_NODE:{ + if (this.ownerNode.getNodeType() == Node.ELEMENT_NODE) { + return ownerNode.isDefaultNamespace(namespaceURI); + + } + return false; + } + default:{ + NodeImpl ancestor = (NodeImpl)getElementAncestor(this); + if (ancestor != null) { + return ancestor.isDefaultNamespace(namespaceURI); + } + return false; + } - String namespace = this.getNamespaceURI(); // to flip out children - return lookupNamespacePrefix(namespaceURI, (ElementImpl)this); - } + } + */ + return false; - case Node.DOCUMENT_NODE:{ - return((NodeImpl)((Document)this).getDocumentElement()).lookupPrefix(namespaceURI); - } -*/ - case Node.ENTITY_NODE : - case Node.NOTATION_NODE: - case Node.DOCUMENT_FRAGMENT_NODE: - case Node.DOCUMENT_TYPE_NODE: - // type is unknown - return null; - case Node.ATTRIBUTE_NODE:{ - if (this.getOwnerElement().getNodeType() == Node.ELEMENT_NODE) { - return getOwnerElement().lookupPrefix(namespaceURI); - - } - return null; - } - default:{ -/* - NodeImpl ancestor = (NodeImpl)getElementAncestor(this); - if (ancestor != null) { - return ancestor.lookupPrefix(namespaceURI); - } -*/ - return null; - } - } - } - /** - * Returns whether this node is the same node as the given one. - *
    This method provides a way to determine whether two - * Node references returned by the implementation reference - * the same object. When two Node references are references - * to the same object, even if through a proxy, the references may be - * used completely interchangably, such that all attributes have the - * same values and calling the same DOM method on either reference - * always has exactly the same effect. - * @param other The node to test against. - * @return Returns true if the nodes are the same, - * false otherwise. - * @since DOM Level 3 - */ - public boolean isSameNode(Node other) { - // we do not use any wrapper so the answer is obvious - return this == other; - } + } - /** - * This attribute returns the text content of this node and its - * descendants. When it is defined to be null, setting it has no effect. - * When set, any possible children this node may have are removed and - * replaced by a single Text node containing the string - * this attribute is set to. On getting, no serialization is performed, - * the returned string does not contain any markup. No whitespace - * normalization is performed, the returned string does not contain the - * element content whitespaces . Similarly, on setting, no parsing is - * performed either, the input string is taken as pure textual content. - *
    The string returned is made of the text content of this node - * depending on its type, as defined below: - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
    Node typeContent
    - * ELEMENT_NODE, ENTITY_NODE, ENTITY_REFERENCE_NODE, - * DOCUMENT_FRAGMENT_NODEconcatenation of the textContent - * attribute value of every child node, excluding COMMENT_NODE and - * PROCESSING_INSTRUCTION_NODE nodes
    ATTRIBUTE_NODE, TEXT_NODE, - * CDATA_SECTION_NODE, COMMENT_NODE, PROCESSING_INSTRUCTION_NODE - * nodeValue
    DOCUMENT_NODE, DOCUMENT_TYPE_NODE, NOTATION_NODE - * null
    - * @exception DOMException - * NO_MODIFICATION_ALLOWED_ERR: Raised when the node is readonly. - * @exception DOMException - * DOMSTRING_SIZE_ERR: Raised when it would return more characters than - * fit in a DOMString variable on the implementation - * platform. - * @since DOM Level 3 - */ - public void setTextContent(String textContent) - throws DOMException { - setNodeValue(textContent); - } + /** + * + * DOM Level 3 - Experimental: + * Look up the prefix associated to the given namespace URI, starting from this node. + * + * @param namespaceURI + * @return the prefix for the namespace + */ + public String + lookupPrefix(String namespaceURI) + { - /** - * This attribute returns the text content of this node and its - * descendants. When it is defined to be null, setting it has no effect. - * When set, any possible children this node may have are removed and - * replaced by a single Text node containing the string - * this attribute is set to. On getting, no serialization is performed, - * the returned string does not contain any markup. No whitespace - * normalization is performed, the returned string does not contain the - * element content whitespaces . Similarly, on setting, no parsing is - * performed either, the input string is taken as pure textual content. - *
    The string returned is made of the text content of this node - * depending on its type, as defined below: - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
    Node typeContent
    - * ELEMENT_NODE, ENTITY_NODE, ENTITY_REFERENCE_NODE, - * DOCUMENT_FRAGMENT_NODEconcatenation of the textContent - * attribute value of every child node, excluding COMMENT_NODE and - * PROCESSING_INSTRUCTION_NODE nodes
    ATTRIBUTE_NODE, TEXT_NODE, - * CDATA_SECTION_NODE, COMMENT_NODE, PROCESSING_INSTRUCTION_NODE - * nodeValue
    DOCUMENT_NODE, DOCUMENT_TYPE_NODE, NOTATION_NODE - * null
    - * @exception DOMException - * NO_MODIFICATION_ALLOWED_ERR: Raised when the node is readonly. - * @exception DOMException - * DOMSTRING_SIZE_ERR: Raised when it would return more characters than - * fit in a DOMString variable on the implementation - * platform. - * @since DOM Level 3 - */ - public String getTextContent() throws DOMException { - return getNodeValue(); // overriden in some subclasses + // REVISIT: When Namespaces 1.1 comes out this may not be true + // Prefix can't be bound to null namespace + if (namespaceURI == null) { + return null; } - /** - * Compares a node with this node with regard to their position in the - * document. - * @param other The node to compare against this node. - * @return Returns how the given node is positioned relatively to this - * node. - * @since DOM Level 3 - */ - public short compareDocumentPosition(Node other) throws DOMException { - return 0; - } + short type = this.getNodeType(); + + switch (type) { + /* + case Node.ELEMENT_NODE: { + + String namespace = this.getNamespaceURI(); // to flip out children + return lookupNamespacePrefix(namespaceURI, (ElementImpl)this); + } + + case Node.DOCUMENT_NODE:{ + return((NodeImpl)((Document)this).getDocumentElement()).lookupPrefix(namespaceURI); + } + */ + case Node.ENTITY_NODE : + case Node.NOTATION_NODE: + case Node.DOCUMENT_FRAGMENT_NODE: + case Node.DOCUMENT_TYPE_NODE: + // type is unknown + return null; + case Node.ATTRIBUTE_NODE: { + if (this.getOwnerElement().getNodeType() == Node.ELEMENT_NODE) { + return getOwnerElement().lookupPrefix(namespaceURI); - /** - * The absolute base URI of this node or null if undefined. - * This value is computed according to . However, when the - * Document supports the feature "HTML" , the base URI is - * computed using first the value of the href attribute of the HTML BASE - * element if any, and the value of the documentURI - * attribute from the Document interface otherwise. - *
    When the node is an Element, a Document - * or a a ProcessingInstruction, this attribute represents - * the properties [base URI] defined in . When the node is a - * Notation, an Entity, or an - * EntityReference, this attribute represents the - * properties [declaration base URI] in the . How will this be affected - * by resolution of relative namespace URIs issue?It's not.Should this - * only be on Document, Element, ProcessingInstruction, Entity, and - * Notation nodes, according to the infoset? If not, what is it equal to - * on other nodes? Null? An empty string? I think it should be the - * parent's.No.Should this be read-only and computed or and actual - * read-write attribute?Read-only and computed (F2F 19 Jun 2000 and - * teleconference 30 May 2001).If the base HTML element is not yet - * attached to a document, does the insert change the Document.baseURI? - * Yes. (F2F 26 Sep 2001) - * @since DOM Level 3 - */ - public String getBaseURI() { + } + return null; + } + default: { + /* + NodeImpl ancestor = (NodeImpl)getElementAncestor(this); + if (ancestor != null) { + return ancestor.lookupPrefix(namespaceURI); + } + */ return null; + } } + } + + /** + * Returns whether this node is the same node as the given one. + *
    This method provides a way to determine whether two + * Node references returned by the implementation reference + * the same object. When two Node references are references + * to the same object, even if through a proxy, the references may be + * used completely interchangably, such that all attributes have the + * same values and calling the same DOM method on either reference + * always has exactly the same effect. + * @param other The node to test against. + * @return Returns true if the nodes are the same, + * false otherwise. + * @since DOM Level 3 + */ + public boolean + isSameNode(Node other) + { + // we do not use any wrapper so the answer is obvious + return this == other; + } + + /** + * This attribute returns the text content of this node and its + * descendants. When it is defined to be null, setting it has no effect. + * When set, any possible children this node may have are removed and + * replaced by a single Text node containing the string + * this attribute is set to. On getting, no serialization is performed, + * the returned string does not contain any markup. No whitespace + * normalization is performed, the returned string does not contain the + * element content whitespaces . Similarly, on setting, no parsing is + * performed either, the input string is taken as pure textual content. + *
    The string returned is made of the text content of this node + * depending on its type, as defined below: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
    Node typeContent
    + * ELEMENT_NODE, ENTITY_NODE, ENTITY_REFERENCE_NODE, + * DOCUMENT_FRAGMENT_NODEconcatenation of the textContent + * attribute value of every child node, excluding COMMENT_NODE and + * PROCESSING_INSTRUCTION_NODE nodes
    ATTRIBUTE_NODE, TEXT_NODE, + * CDATA_SECTION_NODE, COMMENT_NODE, PROCESSING_INSTRUCTION_NODE + * nodeValue
    DOCUMENT_NODE, DOCUMENT_TYPE_NODE, NOTATION_NODE + * null
    + * @exception DOMException + * NO_MODIFICATION_ALLOWED_ERR: Raised when the node is readonly. + * @exception DOMException + * DOMSTRING_SIZE_ERR: Raised when it would return more characters than + * fit in a DOMString variable on the implementation + * platform. + * @since DOM Level 3 + */ + public void + setTextContent(String textContent) + throws DOMException + { + setNodeValue(textContent); + } + + /** + * This attribute returns the text content of this node and its + * descendants. When it is defined to be null, setting it has no effect. + * When set, any possible children this node may have are removed and + * replaced by a single Text node containing the string + * this attribute is set to. On getting, no serialization is performed, + * the returned string does not contain any markup. No whitespace + * normalization is performed, the returned string does not contain the + * element content whitespaces . Similarly, on setting, no parsing is + * performed either, the input string is taken as pure textual content. + *
    The string returned is made of the text content of this node + * depending on its type, as defined below: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
    Node typeContent
    + * ELEMENT_NODE, ENTITY_NODE, ENTITY_REFERENCE_NODE, + * DOCUMENT_FRAGMENT_NODEconcatenation of the textContent + * attribute value of every child node, excluding COMMENT_NODE and + * PROCESSING_INSTRUCTION_NODE nodes
    ATTRIBUTE_NODE, TEXT_NODE, + * CDATA_SECTION_NODE, COMMENT_NODE, PROCESSING_INSTRUCTION_NODE + * nodeValue
    DOCUMENT_NODE, DOCUMENT_TYPE_NODE, NOTATION_NODE + * null
    + * @exception DOMException + * NO_MODIFICATION_ALLOWED_ERR: Raised when the node is readonly. + * @exception DOMException + * DOMSTRING_SIZE_ERR: Raised when it would return more characters than + * fit in a DOMString variable on the implementation + * platform. + * @since DOM Level 3 + */ + public String + getTextContent() throws DOMException + { + return getNodeValue(); // overriden in some subclasses + } + + /** + * Compares a node with this node with regard to their position in the + * document. + * @param other The node to compare against this node. + * @return Returns how the given node is positioned relatively to this + * node. + * @since DOM Level 3 + */ + public short + compareDocumentPosition(Node other) throws DOMException + { + return 0; + } + + /** + * The absolute base URI of this node or null if undefined. + * This value is computed according to . However, when the + * Document supports the feature "HTML" , the base URI is + * computed using first the value of the href attribute of the HTML BASE + * element if any, and the value of the documentURI + * attribute from the Document interface otherwise. + *
    When the node is an Element, a Document + * or a a ProcessingInstruction, this attribute represents + * the properties [base URI] defined in . When the node is a + * Notation, an Entity, or an + * EntityReference, this attribute represents the + * properties [declaration base URI] in the . How will this be affected + * by resolution of relative namespace URIs issue?It's not.Should this + * only be on Document, Element, ProcessingInstruction, Entity, and + * Notation nodes, according to the infoset? If not, what is it equal to + * on other nodes? Null? An empty string? I think it should be the + * parent's.No.Should this be read-only and computed or and actual + * read-write attribute?Read-only and computed (F2F 19 Jun 2000 and + * teleconference 30 May 2001).If the base HTML element is not yet + * attached to a document, does the insert change the Document.baseURI? + * Yes. (F2F 26 Sep 2001) + * @since DOM Level 3 + */ + public String + getBaseURI() + { + return null; + } } diff --git a/ext/nokogiri/depend b/ext/nokogiri/depend index be204336e1..24f5908865 100644 --- a/ext/nokogiri/depend +++ b/ext/nokogiri/depend @@ -1,358 +1,38 @@ -html_document.o: html_document.c html_document.h nokogiri.h xml_io.h \ - xml_document.h html_entity_lookup.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -html_element_description.o: html_element_description.c \ - html_element_description.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h xml_namespace.h \ - xml_encoding_handler.h - -html_entity_lookup.o: html_entity_lookup.c html_entity_lookup.h \ - nokogiri.h xml_io.h xml_document.h html_document.h xml_node.h \ - xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -html_sax_parser_context.o: html_sax_parser_context.c \ - html_sax_parser_context.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h xslt_stylesheet.h xml_syntax_error.h xml_schema.h \ - xml_relax_ng.h html_element_description.h xml_namespace.h \ - xml_encoding_handler.h - -nokogiri.o: nokogiri.c nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_attr.o: xml_attr.c xml_attr.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_attribute_decl.o: xml_attribute_decl.c xml_attribute_decl.h \ - nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \ - html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_element_decl.h xml_entity_decl.h xml_xpath_context.h \ - xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \ - xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \ - xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_cdata.o: xml_cdata.c xml_cdata.h nokogiri.h xml_io.h \ - xml_document.h html_entity_lookup.h html_document.h xml_node.h \ - xml_text.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_comment.o: xml_comment.c xml_comment.h nokogiri.h xml_io.h \ - xml_document.h html_entity_lookup.h html_document.h xml_node.h \ - xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_node_set.h \ - xml_dtd.h xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_document.o: xml_document.c xml_document.h nokogiri.h xml_io.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_document_fragment.o: xml_document_fragment.c \ - xml_document_fragment.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_dtd.o: xml_dtd.c xml_dtd.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_element_content.o: xml_element_content.c xml_element_content.h \ - nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \ - html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_sax_parser_context.h xml_sax_parser.h \ - xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \ - xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_element_decl.o: xml_element_decl.c xml_element_decl.h nokogiri.h \ - xml_io.h xml_document.h html_entity_lookup.h html_document.h \ - xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_entity_decl.h xml_xpath_context.h \ - xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \ - xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \ - xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_encoding_handler.o: xml_encoding_handler.c xml_encoding_handler.h \ - nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \ - html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h - -xml_entity_decl.o: xml_entity_decl.c xml_entity_decl.h nokogiri.h \ - xml_io.h xml_document.h html_entity_lookup.h html_document.h \ - xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_xpath_context.h \ - xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \ - xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \ - xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_entity_reference.o: xml_entity_reference.c xml_entity_reference.h \ - nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \ - html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_io.o: xml_io.c xml_io.h nokogiri.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_namespace.o: xml_namespace.c xml_namespace.h nokogiri.h xml_io.h \ - xml_document.h html_entity_lookup.h html_document.h xml_node.h \ - xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_encoding_handler.h - -xml_node.o: xml_node.c xml_node.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_text.h xml_cdata.h \ - xml_attr.h xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_node_set.o: xml_node_set.c xml_node_set.h nokogiri.h xml_io.h \ - xml_document.h html_entity_lookup.h html_document.h xml_node.h \ - xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_dtd.h xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_processing_instruction.o: xml_processing_instruction.c \ - xml_processing_instruction.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_entity_reference.h xml_document_fragment.h \ - xml_comment.h xml_node_set.h xml_dtd.h xml_attribute_decl.h \ - xml_element_decl.h xml_entity_decl.h xml_xpath_context.h \ - xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \ - xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \ - xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_reader.o: xml_reader.c xml_reader.h nokogiri.h xml_io.h \ - xml_document.h html_entity_lookup.h html_document.h xml_node.h \ - xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_relax_ng.o: xml_relax_ng.c xml_relax_ng.h nokogiri.h xml_io.h \ - xml_document.h html_entity_lookup.h html_document.h xml_node.h \ - xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_schema.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_sax_parser.o: xml_sax_parser.c xml_sax_parser.h nokogiri.h \ - xml_io.h xml_document.h html_entity_lookup.h html_document.h \ - xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \ - xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_sax_parser_context.o: xml_sax_parser_context.c \ - xml_sax_parser_context.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_text.h \ - xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_sax_push_parser.o: xml_sax_push_parser.c xml_sax_push_parser.h \ - nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \ - html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_reader.h html_sax_parser_context.h \ - xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xml_schema.o: xml_schema.c xml_schema.h nokogiri.h xml_io.h \ - xml_document.h html_entity_lookup.h html_document.h xml_node.h \ - xml_text.h xml_cdata.h xml_attr.h xml_processing_instruction.h \ - xml_entity_reference.h xml_document_fragment.h xml_comment.h \ - xml_node_set.h xml_dtd.h xml_attribute_decl.h xml_element_decl.h \ - xml_entity_decl.h xml_xpath_context.h xml_element_content.h \ - xml_sax_parser_context.h xml_sax_parser.h xml_sax_push_parser.h \ - xml_reader.h html_sax_parser_context.h xslt_stylesheet.h \ - xml_syntax_error.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_syntax_error.o: xml_syntax_error.c xml_syntax_error.h nokogiri.h \ - xml_io.h xml_document.h html_entity_lookup.h html_document.h \ - xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_schema.h \ - xml_relax_ng.h html_element_description.h xml_namespace.h \ - xml_encoding_handler.h - -xml_text.o: xml_text.c xml_text.h nokogiri.h xml_io.h xml_document.h \ - html_entity_lookup.h html_document.h xml_node.h xml_cdata.h \ - xml_attr.h xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xslt_stylesheet.h xml_syntax_error.h \ - xml_schema.h xml_relax_ng.h html_element_description.h \ - xml_namespace.h xml_encoding_handler.h - -xml_xpath_context.o: xml_xpath_context.c xml_xpath_context.h \ - nokogiri.h xml_io.h xml_document.h html_entity_lookup.h \ - html_document.h xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_element_content.h xml_sax_parser_context.h xml_sax_parser.h \ - xml_sax_push_parser.h xml_reader.h html_sax_parser_context.h \ - xslt_stylesheet.h xml_syntax_error.h xml_schema.h xml_relax_ng.h \ - html_element_description.h xml_namespace.h xml_encoding_handler.h - -xslt_stylesheet.o: xslt_stylesheet.c xslt_stylesheet.h nokogiri.h \ - xml_io.h xml_document.h html_entity_lookup.h html_document.h \ - xml_node.h xml_text.h xml_cdata.h xml_attr.h \ - xml_processing_instruction.h xml_entity_reference.h \ - xml_document_fragment.h xml_comment.h xml_node_set.h xml_dtd.h \ - xml_attribute_decl.h xml_element_decl.h xml_entity_decl.h \ - xml_xpath_context.h xml_element_content.h xml_sax_parser_context.h \ - xml_sax_parser.h xml_sax_push_parser.h xml_reader.h \ - html_sax_parser_context.h xml_syntax_error.h xml_schema.h \ - xml_relax_ng.h html_element_description.h xml_namespace.h \ - xml_encoding_handler.h +# -*-makefile-*- +# DO NOT DELETE + +gumbo.o: $(srcdir)/nokogiri.h +html_document.o: $(srcdir)/nokogiri.h +html_element_description.o: $(srcdir)/nokogiri.h +html_entity_lookup.o: $(srcdir)/nokogiri.h +html_sax_parser_context.o: $(srcdir)/nokogiri.h +html_sax_push_parser.o: $(srcdir)/nokogiri.h +libxml2_backwards_compat.o: $(srcdir)/nokogiri.h +nokogiri.o: $(srcdir)/nokogiri.h +test_global_handlers.o: $(srcdir)/nokogiri.h +xml_attr.o: $(srcdir)/nokogiri.h +xml_attribute_decl.o: $(srcdir)/nokogiri.h +xml_cdata.o: $(srcdir)/nokogiri.h +xml_comment.o: $(srcdir)/nokogiri.h +xml_document.o: $(srcdir)/nokogiri.h +xml_document_fragment.o: $(srcdir)/nokogiri.h +xml_dtd.o: $(srcdir)/nokogiri.h +xml_element_content.o: $(srcdir)/nokogiri.h +xml_element_decl.o: $(srcdir)/nokogiri.h +xml_encoding_handler.o: $(srcdir)/nokogiri.h +xml_entity_decl.o: $(srcdir)/nokogiri.h +xml_entity_reference.o: $(srcdir)/nokogiri.h +xml_namespace.o: $(srcdir)/nokogiri.h +xml_node.o: $(srcdir)/nokogiri.h +xml_node_set.o: $(srcdir)/nokogiri.h +xml_processing_instruction.o: $(srcdir)/nokogiri.h +xml_reader.o: $(srcdir)/nokogiri.h +xml_relax_ng.o: $(srcdir)/nokogiri.h +xml_sax_parser.o: $(srcdir)/nokogiri.h +xml_sax_parser_context.o: $(srcdir)/nokogiri.h +xml_sax_push_parser.o: $(srcdir)/nokogiri.h +xml_schema.o: $(srcdir)/nokogiri.h +xml_syntax_error.o: $(srcdir)/nokogiri.h +xml_text.o: $(srcdir)/nokogiri.h +xml_xpath_context.o: $(srcdir)/nokogiri.h +xslt_stylesheet.o: $(srcdir)/nokogiri.h diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index 2e540518b8..70763bbea9 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -1,208 +1,386 @@ -# :stopdoc: -ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/ +# frozen_string_literal: true -require 'mkmf' +# rubocop:disable Style/GlobalVars -ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..')) +ENV["RC_ARCHS"] = "" if RUBY_PLATFORM.include?("darwin") + +require "mkmf" +require "rbconfig" +require "fileutils" +require "shellwords" +require "pathname" + +# helpful constants +PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", "..")) +REQUIRED_LIBXML_VERSION = "2.6.21" +RECOMMENDED_LIBXML_VERSION = "2.9.3" + +REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.0" # keep this version in sync with the one in the gemspec +REQUIRED_PKG_CONFIG_VERSION = "~> 1.1" + +# Keep track of what versions of what libraries we build against +OTHER_LIBRARY_VERSIONS = {} + +NOKOGIRI_HELP_MESSAGE = <<~HELP + USAGE: ruby #{$PROGRAM_NAME} [options] + + Flags that are always valid: + + --use-system-libraries + --enable-system-libraries + Use system libraries instead of building and using the packaged libraries. + + --disable-system-libraries + Use the packaged libraries, and ignore the system libraries. This is the default on most + platforms, and overrides `--use-system-libraries` and the environment variable + `NOKOGIRI_USE_SYSTEM_LIBRARIES`. + + --disable-clean + Do not clean out intermediate files after successful build. + + --prevent-strip + Take steps to prevent stripping the symbol table and debugging info from the shared + library, potentially overriding RbConfig's CFLAGS/LDFLAGS/DLDFLAGS. + + + Flags only used when using system libraries: + + General: + + --with-opt-dir=DIRECTORY + Look for headers and libraries in DIRECTORY. + + --with-opt-lib=DIRECTORY + Look for libraries in DIRECTORY. + + --with-opt-include=DIRECTORY + Look for headers in DIRECTORY. + + + Related to zlib: + + --with-zlib-dir=DIRECTORY + Look for zlib headers and library in DIRECTORY. + + --with-zlib-lib=DIRECTORY + Look for zlib library in DIRECTORY. + + --with-zlib-include=DIRECTORY + Look for zlib headers in DIRECTORY. + + + Related to iconv: + + --with-iconv-dir=DIRECTORY + Look for iconv headers and library in DIRECTORY. + + --with-iconv-lib=DIRECTORY + Look for iconv library in DIRECTORY. + + --with-iconv-include=DIRECTORY + Look for iconv headers in DIRECTORY. + + + Related to libxml2: + + --with-xml2-dir=DIRECTORY + Look for xml2 headers and library in DIRECTORY. + + --with-xml2-lib=DIRECTORY + Look for xml2 library in DIRECTORY. + + --with-xml2-include=DIRECTORY + Look for xml2 headers in DIRECTORY. + + --with-xml2-source-dir=DIRECTORY + (dev only) Build libxml2 from the source code in DIRECTORY + + + Related to libxslt: + + --with-xslt-dir=DIRECTORY + Look for xslt headers and library in DIRECTORY. + + --with-xslt-lib=DIRECTORY + Look for xslt library in DIRECTORY. + + --with-xslt-include=DIRECTORY + Look for xslt headers in DIRECTORY. + + --with-xslt-source-dir=DIRECTORY + (dev only) Build libxslt from the source code in DIRECTORY + + + Related to libexslt: + + --with-exslt-dir=DIRECTORY + Look for exslt headers and library in DIRECTORY. + + --with-exslt-lib=DIRECTORY + Look for exslt library in DIRECTORY. + + --with-exslt-include=DIRECTORY + Look for exslt headers in DIRECTORY. + + + Flags only used when building and using the packaged libraries: + + --disable-static + Do not statically link packaged libraries, instead use shared libraries. + + --enable-cross-build + Enable cross-build mode. (You probably do not want to set this manually.) + + + Environment variables used: + + NOKOGIRI_USE_SYSTEM_LIBRARIES + Equivalent to `--enable-system-libraries` when set, even if nil or blank. + + CC + Use this path to invoke the compiler instead of `RbConfig::CONFIG['CC']` + + CPPFLAGS + If this string is accepted by the C preprocessor, add it to the flags passed to the C preprocessor + + CFLAGS + If this string is accepted by the compiler, add it to the flags passed to the compiler + + LDFLAGS + If this string is accepted by the linker, add it to the flags passed to the linker + + LIBS + Add this string to the flags passed to the linker +HELP # -# functions +# utility functions # +def config_clean? + enable_config("clean", true) +end + +def config_static? + default_static = !truffle? + enable_config("static", default_static) +end + +def config_cross_build? + enable_config("cross-build") +end + +def config_system_libraries? + enable_config("system-libraries", ENV.key?("NOKOGIRI_USE_SYSTEM_LIBRARIES")) do |_, default| + arg_config("--use-system-libraries", default) + end +end + def windows? - RbConfig::CONFIG['target_os'] =~ /mingw32|mswin/ + RbConfig::CONFIG["target_os"].match?(/mingw|mswin/) end def solaris? - RbConfig::CONFIG['target_os'] =~ /solaris/ + RbConfig::CONFIG["target_os"].include?("solaris") end def darwin? - RbConfig::CONFIG['target_os'] =~ /darwin/ + RbConfig::CONFIG["target_os"].include?("darwin") end def openbsd? - RbConfig::CONFIG['target_os'] =~ /openbsd/ + RbConfig::CONFIG["target_os"].include?("openbsd") end -def nix? - ! (windows? || solaris? || darwin?) +def aix? + RbConfig::CONFIG["target_os"].include?("aix") end -def sh_export_path path - # because libxslt 1.1.29 configure.in uses AC_PATH_TOOL which treats ":" - # as a $PATH separator, we need to convert windows paths from - # - # C:/path/to/foo - # - # to - # - # /C/path/to/foo - # - # which is sh-compatible, in order to find things properly during - # configuration - if windows? - match = Regexp.new("^([A-Z]):(/.*)").match(path) - if match && match.length == 3 - return File.join("/", match[1], match[2]) - end - end - path +def nix? + !(windows? || solaris? || darwin?) end -def do_help - print < - SRC - else - version_int = sprintf "%d%2.2d%2.2d", *(version.split(".")) - <<-SRC -#include -#if LIBXML_VERSION < #{version_int} -#error libxml2 is older than #{version} -#endif - SRC - end - - try_cpp source -end - -def add_cflags(flags) - print "checking if the C compiler accepts #{flags}... " - with_cflags("#{$CFLAGS} #{flags}") do - if nokogiri_try_compile - puts 'yes' - true - else - puts 'no' - false - end - end +def ensure_func(func, headers = nil) + have_func(func, headers) || abort_could_not_find_library(func) end def preserving_globals - values = [ - $arg_config, - $CFLAGS, $CPPFLAGS, - $LDFLAGS, $LIBPATH, $libs - ].map(&:dup) + values = [$arg_config, $INCFLAGS, $CFLAGS, $CPPFLAGS, $LDFLAGS, $DLDFLAGS, $LIBPATH, $libs].map(&:dup) yield ensure - $arg_config, - $CFLAGS, $CPPFLAGS, - $LDFLAGS, $LIBPATH, $libs = - values + $arg_config, $INCFLAGS, $CFLAGS, $CPPFLAGS, $LDFLAGS, $DLDFLAGS, $LIBPATH, $libs = values +end + +def abort_could_not_find_library(lib) + callers = caller(1..2).join("\n") + abort("-----\n#{callers}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----") end -def asplode(lib) - abort "-----\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----" +def chdir_for_build(&block) + # When using rake-compiler-dock on Windows, the underlying Virtualbox shared + # folders don't support symlinks, but libiconv expects it for a build on + # Linux. We work around this limitation by using the temp dir for cooking. + build_dir = /mingw|mswin|cygwin/.match?(ENV["RCD_HOST_RUBY_PLATFORM"].to_s) ? "/tmp" : "." + Dir.chdir(build_dir, &block) +end + +def sh_export_path(path) + # because libxslt 1.1.29 configure.in uses AC_PATH_TOOL which treats ":" + # as a $PATH separator, we need to convert windows paths from + # + # C:/path/to/foo + # + # to + # + # /C/path/to/foo + # + # which is sh-compatible, in order to find things properly during + # configuration + return path unless windows? + + match = Regexp.new("^([A-Z]):(/.*)").match(path) + if match && match.length == 3 + return File.join("/", match[1], match[2]) + end + + path +end + +def libflag_to_filename(ldflag) + case ldflag + when /\A-l(.+)/ + "lib#{Regexp.last_match(1)}.#{$LIBEXT}" + end end -def have_iconv?(using = nil) - checking_for(using ? "iconv using #{using}" : 'iconv') do - ['', '-liconv'].any? do |opt| +def have_libxml_headers?(version = nil) + source = if version.nil? + <<~SRC + #include + SRC + else + version_int = format("%d%2.2d%2.2d", *version.split(".")) + <<~SRC + #include + #if LIBXML_VERSION < #{version_int} + # error libxml2 is older than #{version} + #endif + SRC + end + + try_cpp(source) +end + +def try_link_iconv(using = nil) + checking_for(using ? "iconv using #{using}" : "iconv") do + ["", "-liconv"].any? do |opt| preserving_globals do yield if block_given? - try_link(<<-'SRC', opt) -#include -#include - -int main(void) -{ - iconv_t cd = iconv_open("", ""); - iconv(cd, NULL, NULL, NULL, NULL); - return EXIT_SUCCESS; -} + try_link(<<~'SRC', opt) + #include + #include + int main(void) + { + iconv_t cd = iconv_open("", ""); + iconv(cd, NULL, NULL, NULL, NULL); + return EXIT_SUCCESS; + } SRC end end @@ -210,67 +388,70 @@ def have_iconv?(using = nil) end def iconv_configure_flags - # If --with-iconv-dir or --with-opt-dir is given, it should be - # the first priority - %w[iconv opt].each do |name| - if (config = preserving_globals { dir_config(name) }).any? && - have_iconv?("--with-#{name}-* flags") { dir_config(name) } - idirs, ldirs = config.map do |dirs| - Array(dirs).flat_map do |dir| - dir.split(File::PATH_SEPARATOR) - end if dirs - end - - return [ - '--with-iconv=yes', - *("CPPFLAGS=#{idirs.map { |dir| '-I' + dir }.join(' ')}" if idirs), - *("LDFLAGS=#{ldirs.map { |dir| '-L' + dir }.join(' ')}" if ldirs), - ] + # give --with-iconv-dir and --with-opt-dir first priority + ["iconv", "opt"].each do |target| + config = preserving_globals { dir_config(target) } + next unless config.any? && try_link_iconv("--with-#{target}-* flags") { dir_config(target) } + + idirs, ldirs = config.map do |dirs| + Array(dirs).flat_map do |dir| + dir.split(File::PATH_SEPARATOR) + end if dirs end + + return [ + "--with-iconv=yes", + *("CPPFLAGS=#{idirs.map { |dir| "-I" + dir }.join(" ")}" if idirs), + *("LDFLAGS=#{ldirs.map { |dir| "-L" + dir }.join(" ")}" if ldirs), + ] end - if have_iconv? - return ['--with-iconv=yes'] + if try_link_iconv + return ["--with-iconv=yes"] end - if (config = preserving_globals { package_config('libiconv') }) && - have_iconv?('pkg-config libiconv') { package_config('libiconv') } + config = preserving_globals { pkg_config("libiconv") } + if config && try_link_iconv("pkg-config libiconv") { pkg_config("libiconv") } cflags, ldflags, libs = config return [ - '--with-iconv=yes', + "--with-iconv=yes", "CPPFLAGS=#{cflags}", "LDFLAGS=#{ldflags}", "LIBS=#{libs}", ] end - asplode "libiconv" + abort_could_not_find_library("libiconv") end -# When using rake-compiler-dock on Windows, the underlying Virtualbox shared -# folders don't support symlinks, but libiconv expects it for a build on -# Linux. We work around this limitation by using the temp dir for cooking. -def chdir_for_build - build_dir = ENV['RCD_HOST_RUBY_PLATFORM'].to_s =~ /mingw|mswin|cygwin/ ? '/tmp' : '.' - Dir.chdir(build_dir) do - yield +def process_recipe(name, version, static_p, cross_p, cacheable_p = true) + require "rubygems" + gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION) # gemspec is not respected at install time + require "mini_portile2" + message("Using mini_portile version #{MiniPortile::VERSION}\n") + + unless ["libxml2", "libxslt"].include?(name) + OTHER_LIBRARY_VERSIONS[name] = version end -end -def process_recipe(name, version, static_p, cross_p) MiniPortile.new(name, version).tap do |recipe| - recipe.target = File.join(ROOT, "ports") - # Prefer host_alias over host in order to use i586-mingw32msvc as - # correct compiler prefix for cross build, but use host if not set. + def recipe.port_path + "#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}" + end + + # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And + # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits). recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"] - recipe.patch_files = Dir[File.join(ROOT, "patches", name, "*.patch")].sort + recipe.host = recipe.host.gsub(/i386/, "i686") + + recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}" yield recipe env = Hash.new do |hash, key| - hash[key] = "#{ENV[key]}" # (ENV[key].dup rescue '') + hash[key] = (ENV[key]).to_s end recipe.configure_options.flatten! @@ -278,7 +459,11 @@ def process_recipe(name, version, static_p, cross_p) recipe.configure_options.delete_if do |option| case option when /\A(\w+)=(.*)\z/ - env[$1] = $2 + env[Regexp.last_match(1)] = if env.key?(Regexp.last_match(1)) + concat_flags(env[Regexp.last_match(1)], Regexp.last_match(2)) + else + Regexp.last_match(2) + end true else false @@ -290,7 +475,7 @@ def process_recipe(name, version, static_p, cross_p) "--disable-shared", "--enable-static", ] - env['CFLAGS'] = "-fPIC #{env['CFLAGS']}" + env["CFLAGS"] = concat_flags(env["CFLAGS"], "-fPIC") else recipe.configure_options += [ "--enable-shared", @@ -305,382 +490,595 @@ def process_recipe(name, version, static_p, cross_p) ] end - if RbConfig::CONFIG['target_cpu'] == 'universal' - %w[CFLAGS LDFLAGS].each do |key| - unless env[key].include?('-arch') - env[key] += ' ' + RbConfig::CONFIG['ARCH_FLAG'] + if RbConfig::CONFIG["target_cpu"] == "universal" + ["CFLAGS", "LDFLAGS"].each do |key| + unless env[key].include?("-arch") + env[key] = concat_flags(env[key], RbConfig::CONFIG["ARCH_FLAG"]) end end end recipe.configure_options += env.map do |key, value| - "#{key}=#{value}" + "#{key}=#{value.strip}" end - message <<-"EOS" -************************************************************************ -IMPORTANT NOTICE: - -Building Nokogiri with a packaged version of #{name}-#{version}#{'.' if recipe.patch_files.empty?} - EOS + checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{RUBY_PLATFORM}.installed" + if File.exist?(checkpoint) && !recipe.source_directory + message("Building Nokogiri with a packaged version of #{name}-#{version}.\n") + else + message(<<~EOM) + ---------- IMPORTANT NOTICE ---------- + Building Nokogiri with a packaged version of #{name}-#{version}. + Configuration options: #{recipe.configure_options.shelljoin} + EOM - unless recipe.patch_files.empty? - message "with the following patches applied:\n" + unless recipe.patch_files.empty? + message("The following patches are being applied:\n") - recipe.patch_files.each do |patch| - message "\t- %s\n" % File.basename(patch) + recipe.patch_files.each do |patch| + message(format(" - %s\n", File.basename(patch))) + end end - end - - message <<-"EOS" -Team Nokogiri will keep on doing their best to provide security -updates in a timely manner, but if this is a concern for you and want -to use the system library instead; abort this installation process and -reinstall nokogiri as follows: + message(<<~EOM) if name != "libgumbo" - gem install nokogiri -- --use-system-libraries - [--with-xml2-config=/path/to/xml2-config] - [--with-xslt-config=/path/to/xslt-config] + The Nokogiri maintainers intend to provide timely security updates, but if + this is a concern for you and want to use your OS/distro system library + instead, then abort this installation process and install nokogiri as + instructed at: -If you are using Bundler, tell it to use the option: + https://nokogiri.org/tutorials/installing_nokogiri.html#installing-using-standard-system-libraries - bundle config build.nokogiri --use-system-libraries - bundle install - EOS + EOM - message <<-"EOS" if name == 'libxml2' + message(<<~EOM) if name == "libxml2" + Note, however, that nokogiri cannot guarantee compatibility with every + version of libxml2 that may be provided by OS/package vendors. -Note, however, that nokogiri is not fully compatible with arbitrary -versions of libxml2 provided by OS/package vendors. - EOS + EOM - message <<-"EOS" -************************************************************************ - EOS - - checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed" - unless File.exist?(checkpoint) - chdir_for_build do - recipe.cook - end - FileUtils.touch checkpoint + chdir_for_build { recipe.cook } + FileUtils.touch(checkpoint) end recipe.activate end end -def lib_a(ldflag) - case ldflag - when /\A-l(.+)/ - "lib#{$1}.#{$LIBEXT}" +def copy_packaged_libraries_headers(to_path:, from_recipes:) + FileUtils.rm_rf(to_path, secure: true) + FileUtils.mkdir(to_path) + from_recipes.each do |recipe| + FileUtils.cp_r(Dir[File.join(recipe.path, "include/*")], to_path) end end -def using_system_libraries? - arg_config('--use-system-libraries', !!ENV['NOKOGIRI_USE_SYSTEM_LIBRARIES']) +def do_help + print(NOKOGIRI_HELP_MESSAGE) + exit!(0) end -# -# main -# +def do_clean + root = Pathname(PACKAGE_ROOT_DIR) + pwd = Pathname(Dir.pwd) + + # Skip if this is a development work tree + unless (root + ".git").exist? + message("Cleaning files only used during build.\n") + + # (root + 'tmp') cannot be removed at this stage because + # nokogiri.so is yet to be copied to lib. + + # clean the ports build directory + Pathname.glob(pwd.join("tmp", "*", "ports")) do |dir| + FileUtils.rm_rf(dir, verbose: true) + end + + if config_static? + # ports installation can be safely removed if statically linked. + FileUtils.rm_rf(root + "ports", verbose: true) + else + FileUtils.rm_rf(root + "ports" + "archives", verbose: true) + end + end -case -when arg_config('--help') - do_help -when arg_config('--clean') - do_clean + exit!(0) end -if darwin? - ENV['CFLAGS'] = "#{ENV['CFLAGS']} -I /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/libxml2" +# In ruby 3.2, symbol resolution changed on Darwin, to introduce the `-bundle_loader` flag to +# resolve symbols against the ruby binary. +# +# This makes it challenging to build a single extension that works with both a ruby with +# `--enable-shared` and one with `--disable-shared. To work around that, we choose to add +# `-flat_namespace` to the link line (later in this file). +# +# The `-flat_namespace` line introduces its own behavior change, which is that (similar to on +# Linux), any symbols in the extension that are exported may now be resolved by shared libraries +# loaded by the Ruby process. Specifically, that means that libxml2 and libxslt, which are +# statically linked into the nokogiri bundle, will resolve (at runtime) to a system libxml2 loaded +# by Ruby on Darwin. And it appears that often Ruby on Darwin does indeed load the system libxml2, +# and that messes with our assumptions about whether we're running with a patched libxml2 or a +# vanilla libxml2. +# +# We choose to use `-load_hidden` in this case to prevent exporting those symbols from libxml2 and +# libxslt, which ensures that they will be resolved to the static libraries in the bundle. In other +# words, when we use `load_hidden`, what happens in the extension stays in the extension. +# +# See https://github.com/rake-compiler/rake-compiler-dock/issues/87 for more info. +# +# Anyway, this method is the logical bit to tell us when to turn on these workarounds. +def needs_darwin_linker_hack + config_cross_build? && + darwin? && + Gem::Requirement.new("~> 3.2").satisfied_by?(Gem::Version.new(RbConfig::CONFIG["ruby_version"].split("+").first)) end -if openbsd? && !using_system_libraries? - if `#{ENV['CC'] || '/usr/bin/cc'} -v 2>&1` !~ /clang/ - ENV['CC'] ||= find_executable('egcc') or - abort "Please install gcc 4.9+ from ports using `pkg_add -v gcc`" +# +# main +# +do_help if arg_config("--help") +do_clean if arg_config("--clean") + +if openbsd? && !config_system_libraries? + unless %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1).include?("clang") + (ENV["CC"] ||= find_executable("egcc")) || + abort("Please install gcc 4.9+ from ports using `pkg_add -v gcc`") end - ENV['CFLAGS'] = "#{ENV['CFLAGS']} -I /usr/local/include" + append_cppflags "-I/usr/local/include" end -if ENV['CC'] - RbConfig::CONFIG['CC'] = RbConfig::MAKEFILE_CONFIG['CC'] = ENV['CC'] +if ENV["CC"] + RbConfig::CONFIG["CC"] = RbConfig::MAKEFILE_CONFIG["CC"] = ENV["CC"] end + # use same c compiler for libxml and libxslt -ENV['CC'] = RbConfig::CONFIG['CC'] +ENV["CC"] = RbConfig::CONFIG["CC"] + +if arg_config("--prevent-strip") + old_cflags = $CFLAGS.split.join(" ") + old_ldflags = $LDFLAGS.split.join(" ") + old_dldflags = $DLDFLAGS.split.join(" ") + $CFLAGS = $CFLAGS.split.reject { |flag| flag == "-s" }.join(" ") + $LDFLAGS = $LDFLAGS.split.reject { |flag| flag == "-s" }.join(" ") + $DLDFLAGS = $DLDFLAGS.split.reject { |flag| flag == "-s" }.join(" ") + puts "Prevent stripping by removing '-s' from $CFLAGS" if old_cflags != $CFLAGS + puts "Prevent stripping by removing '-s' from $LDFLAGS" if old_ldflags != $LDFLAGS + puts "Prevent stripping by removing '-s' from $DLDFLAGS" if old_dldflags != $DLDFLAGS +end -$LIBS << " #{ENV["LIBS"]}" +# adopt environment config +append_cflags(ENV["CFLAGS"].split) unless ENV["CFLAGS"].nil? +append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil? +append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil? +$LIBS = concat_flags($LIBS, ENV["LIBS"]) -# Read CFLAGS from ENV and make sure compiling works. -add_cflags(ENV["CFLAGS"]) +# nokogumbo code uses C90/C99 features, let's make sure older compilers won't give +# errors/warnings. see #2302 +append_cflags(["-std=c99", "-Wno-declaration-after-statement"]) -if windows? - $CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF" -end +# gumbo html5 serialization is slower with O3, let's make sure we use O2 +append_cflags("-O2") -if solaris? - $CFLAGS << " -DUSE_INCLUDED_VASPRINTF" -end +# always include debugging information +append_cflags("-g") -if darwin? - # Let Apple LLVM/clang 5.1 ignore unknown compiler flags - add_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future") -end +# we use at least one inline function in the C extension +append_cflags("-Winline") -if nix? - $CFLAGS << " -g -DXP_UNIX" -end +# good to have no matter what Ruby was compiled with +append_cflags("-Wmissing-noreturn") -if RUBY_PLATFORM =~ /mingw/i - # Work around a character escaping bug in MSYS by passing an arbitrary - # double quoted parameter to gcc. See https://sourceforge.net/p/mingw/bugs/2142 - $CPPFLAGS << ' "-Idummypath"' +# check integer loss of precision +if darwin? + append_cflags("-Wshorten-64-to-32") +else + append_cflags("-Wconversion -Wno-sign-conversion") end -if RbConfig::CONFIG['CC'] =~ /gcc/ - $CFLAGS << " -O3" unless $CFLAGS[/-O\d/] - $CFLAGS << " -Wall -Wcast-qual -Wwrite-strings -Wmissing-noreturn -Winline" +# handle clang variations, see #1101 +if darwin? + append_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future") + append_cflags("-Wno-unknown-warning-option") end -case -when using_system_libraries? - message "Building nokogiri using system libraries.\n" - - dir_config('zlib') +# these tend to be noisy, but on occasion useful during development +# append_cflags(["-Wcast-qual", "-Wwrite-strings"]) - # Using system libraries means we rely on the system libxml2 with - # regard to the iconv support. +# Add SDK-specific include path for macOS and brew versions before v2.2.12 (2020-04-08) [#1851, #1801] +macos_mojave_sdk_include_path = "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/libxml2" +if config_system_libraries? && darwin? && Dir.exist?(macos_mojave_sdk_include_path) + append_cppflags("-I#{macos_mojave_sdk_include_path}") +end - dir_config('xml2').any? or package_config('libxml-2.0') - dir_config('xslt').any? or package_config('libxslt') - dir_config('exslt').any? or package_config('libexslt') +# Work around a character escaping bug in MSYS by passing an arbitrary double-quoted parameter to gcc. +# See https://sourceforge.net/p/mingw/bugs/2142 +append_cppflags(' "-Idummypath"') if windows? - check_libxml_version or abort "ERROR: cannot discover where libxml2 is located on your system. please make sure `pkg-config` is installed." - check_libxml_version("2.6.21") or abort "ERROR: libxml2 version 2.6.21 or later is required!" - check_libxml_version("2.9.3") or warn "WARNING: libxml2 version 2.9.3 or later is highly recommended, but proceeding anyway." +if config_system_libraries? + message "Building nokogiri using system libraries.\n" + ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z", + headers: "zlib.h", func: "gzdopen") + ensure_package_configuration(opt: "xml2", pc: "libxml-2.0", lib: "xml2", + headers: "libxml/parser.h", func: "xmlParseDoc") + ensure_package_configuration(opt: "xslt", pc: "libxslt", lib: "xslt", + headers: "libxslt/xslt.h", func: "xsltParseStylesheetDoc") + ensure_package_configuration(opt: "exslt", pc: "libexslt", lib: "exslt", + headers: "libexslt/exslt.h", func: "exsltFuncRegister") + + have_libxml_headers?(REQUIRED_LIBXML_VERSION) || + abort("ERROR: libxml2 version #{REQUIRED_LIBXML_VERSION} or later is required!") + have_libxml_headers?(RECOMMENDED_LIBXML_VERSION) || + warn("WARNING: libxml2 version #{RECOMMENDED_LIBXML_VERSION} or later is highly recommended, but proceeding anyway.") else message "Building nokogiri using packaged libraries.\n" - # The gem version constraint in the Rakefile is not respected at install time. - # Keep this version in sync with the one in the Rakefile ! - require 'rubygems' - gem 'mini_portile2', '~> 2.4.0' - require 'mini_portile2' - message "Using mini_portile version #{MiniPortile::VERSION}\n" + static_p = config_static? + message "Static linking is #{static_p ? "enabled" : "disabled"}.\n" - require 'yaml' + cross_build_p = config_cross_build? + message "Cross build is #{cross_build_p ? "enabled" : "disabled"}.\n" - static_p = enable_config('static', true) or - message "Static linking is disabled.\n" + if needs_darwin_linker_hack + append_ldflags("-Wl,-flat_namespace") + end - dir_config('zlib') + require "yaml" + dependencies = YAML.load_file(File.join(PACKAGE_ROOT_DIR, "dependencies.yml")) - dependencies = YAML.load_file(File.join(ROOT, "dependencies.yml")) + dir_config("zlib") - cross_build_p = enable_config("cross-build") if cross_build_p || windows? zlib_recipe = process_recipe("zlib", dependencies["zlib"]["version"], static_p, cross_build_p) do |recipe| recipe.files = [{ - url: "http://zlib.net/fossils/#{recipe.name}-#{recipe.version}.tar.gz", - sha256: dependencies["zlib"]["sha256"] - }] - class << recipe - attr_accessor :cross_build_p - - def configure - Dir.chdir work_path do - mk = File.read 'win32/Makefile.gcc' - File.open 'win32/Makefile.gcc', 'wb' do |f| - f.puts "BINARY_PATH = #{path}/bin" - f.puts "LIBRARY_PATH = #{path}/lib" - f.puts "INCLUDE_PATH = #{path}/include" - mk.sub!(/^PREFIX\s*=\s*$/, "PREFIX = #{host}-") if cross_build_p - f.puts mk + url: zlib_source(recipe.version), + sha256: dependencies["zlib"]["sha256"], + }] + if windows? + class << recipe + attr_accessor :cross_build_p + + def configure + Dir.chdir(work_path) do + mk = File.read("win32/Makefile.gcc") + File.open("win32/Makefile.gcc", "wb") do |f| + f.puts "BINARY_PATH = #{path}/bin" + f.puts "LIBRARY_PATH = #{path}/lib" + f.puts "INCLUDE_PATH = #{path}/include" + mk.sub!(/^PREFIX\s*=\s*$/, "PREFIX = #{host}-") if cross_build_p + f.puts mk + end end end - end - def configured? - Dir.chdir work_path do - !! (File.read('win32/Makefile.gcc') =~ /^BINARY_PATH/) + def configured? + Dir.chdir(work_path) do + !!(File.read("win32/Makefile.gcc") =~ /^BINARY_PATH/) + end end - end - def compile - execute "compile", "make -f win32/Makefile.gcc" - end + def compile + execute("compile", "make -f win32/Makefile.gcc") + end - def install - execute "install", "make -f win32/Makefile.gcc install" + def install + execute("install", "make -f win32/Makefile.gcc install") + end + end + recipe.cross_build_p = cross_build_p + else + class << recipe + def configure + env = {} + env["CFLAGS"] = concat_flags(ENV["CFLAGS"], "-fPIC", "-g") + env["CHOST"] = host + execute("configure", ["./configure", "--static", configure_prefix], { env: env }) + if darwin? + # needed as of zlib 1.2.13 + Dir.chdir(work_path) do + makefile = File.read("Makefile").gsub(/^AR=.*$/, "AR=#{host}-libtool") + File.open("Makefile", "w") { |m| m.write(makefile) } + end + end + end end end - recipe.cross_build_p = cross_build_p end - libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p, cross_build_p) do |recipe| - recipe.files = [{ - url: "http://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz", - sha256: dependencies["libiconv"]["sha256"] + unless nix? + libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p, + cross_build_p) do |recipe| + recipe.files = [{ + url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz", + sha256: dependencies["libiconv"]["sha256"], }] - recipe.configure_options += [ - "CPPFLAGS=-Wall", - "CFLAGS=-O2 -g", - "CXXFLAGS=-O2 -g", - "LDFLAGS=" - ] - end - else - if darwin? && !have_header('iconv.h') - abort <<'EOM'.chomp ------ -The file "iconv.h" is missing in your build environment, -which means you haven't installed Xcode Command Line Tools properly. - -To install Command Line Tools, try running `xcode-select --install` on -terminal and follow the instructions. If it fails, open Xcode.app, -select from the menu "Xcode" - "Open Developer Tool" - "More Developer -Tools" to open the developer site, download the installer for your OS -version and run it. ------ -EOM + + # The libiconv configure script doesn't accept "arm64" host string but "aarch64" + recipe.host = recipe.host.gsub("arm64-apple-darwin", "aarch64-apple-darwin") + + cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g") + + recipe.configure_options += [ + "--disable-dependency-tracking", + "CPPFLAGS=-Wall", + "CFLAGS=#{cflags}", + "CXXFLAGS=#{cflags}", + "LDFLAGS=", + ] + end end + elsif darwin? && !have_header("iconv.h") + abort(<<~EOM.chomp) + ----- + The file "iconv.h" is missing in your build environment, + which means you haven't installed Xcode Command Line Tools properly. + + To install Command Line Tools, try running `xcode-select --install` on + terminal and follow the instructions. If it fails, open Xcode.app, + select from the menu "Xcode" - "Open Developer Tool" - "More Developer + Tools" to open the developer site, download the installer for your OS + version and run it. + ----- + EOM end - unless windows? - preserving_globals { - have_library('z', 'gzdopen', 'zlib.h') - } or abort 'zlib is missing; necessary for building libxml2' + if zlib_recipe + append_cppflags("-I#{zlib_recipe.path}/include") + $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH + ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z", + headers: "zlib.h", func: "gzdopen") + end + + if libiconv_recipe + append_cppflags("-I#{libiconv_recipe.path}/include") + $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH + ensure_package_configuration(opt: "iconv", pc: "iconv", lib: "iconv", + headers: "iconv.h", func: "iconv_open") end libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe| - recipe.files = [{ - url: "http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz", - sha256: dependencies["libxml2"]["sha256"] + source_dir = arg_config("--with-xml2-source-dir") + if source_dir + recipe.source_directory = source_dir + else + minor_version = Gem::Version.new(recipe.version).segments.take(2).join(".") + recipe.files = [{ + url: "#{gnome_source}/sources/libxml2/#{minor_version}/#{recipe.name}-#{recipe.version}.tar.xz", + sha256: dependencies["libxml2"]["sha256"], }] + recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort + end + + cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g") + + if zlib_recipe + recipe.configure_options << "--with-zlib=#{zlib_recipe.path}" + end + + if libiconv_recipe + recipe.configure_options << "--with-iconv=#{libiconv_recipe.path}" + else + recipe.configure_options += iconv_configure_flags + end + + if darwin? && !cross_build_p + recipe.configure_options += ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"] + end + + if windows? + cflags = concat_flags(cflags, "-ULIBXML_STATIC", "-DIN_LIBXML") + end + + recipe.configure_options << if source_dir + "--config-cache" + else + "--disable-dependency-tracking" + end + recipe.configure_options += [ "--without-python", "--without-readline", - *(zlib_recipe ? ["--with-zlib=#{zlib_recipe.path}", "CFLAGS=-I#{zlib_recipe.path}/include"] : []), - *(libiconv_recipe ? "--with-iconv=#{libiconv_recipe.path}" : iconv_configure_flags), "--with-c14n", "--with-debug", "--with-threads", - *(darwin? ? ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"] : "") + "CFLAGS=#{cflags}", ] end libxslt_recipe = process_recipe("libxslt", dependencies["libxslt"]["version"], static_p, cross_build_p) do |recipe| - recipe.files = [{ - url: "http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz", - sha256: dependencies["libxslt"]["sha256"] + source_dir = arg_config("--with-xslt-source-dir") + if source_dir + recipe.source_directory = source_dir + else + minor_version = Gem::Version.new(recipe.version).segments.take(2).join(".") + recipe.files = [{ + url: "#{gnome_source}/sources/libxslt/#{minor_version}/#{recipe.name}-#{recipe.version}.tar.xz", + sha256: dependencies["libxslt"]["sha256"], }] + recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxslt", "*.patch")].sort + end + + cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g") + + if darwin? && !cross_build_p + recipe.configure_options += ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"] + end + + if windows? + cflags = concat_flags(cflags, "-ULIBXSLT_STATIC", "-DIN_LIBXSLT") + cflags = concat_flags(cflags, "-ULIBEXSLT_STATIC", "-DIN_LIBEXSLT") + end + + recipe.configure_options << if source_dir + "--config-cache" + else + "--disable-dependency-tracking" + end + recipe.configure_options += [ "--without-python", "--without-crypto", "--with-debug", "--with-libxml-prefix=#{sh_export_path(libxml2_recipe.path)}", - *(darwin? ? ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"] : "") + "CFLAGS=#{cflags}", ] end - $CFLAGS << ' ' << '-DNOKOGIRI_USE_PACKAGED_LIBRARIES' - $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH if zlib_recipe - $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH if libiconv_recipe - - have_lzma = preserving_globals { - have_library('lzma') - } + append_cppflags("-DNOKOGIRI_PACKAGED_LIBRARIES") + append_cppflags("-DNOKOGIRI_PRECOMPILED_LIBRARIES") if cross_build_p $libs = $libs.shellsplit.tap do |libs| [libxml2_recipe, libxslt_recipe].each do |recipe| libname = recipe.name[/\Alib(.+)\z/, 1] - File.join(recipe.path, "bin", "#{libname}-config").tap do |config| + config_basename = "#{libname}-config" + File.join(recipe.path, "bin", config_basename).tap do |config| # call config scripts explicit with 'sh' for compat with Windows - $CPPFLAGS = `sh #{config} --cflags`.strip << ' ' << $CPPFLAGS - `sh #{config} --libs`.strip.shellsplit.each do |arg| + cflags = %x(sh #{config} --cflags).strip + message("#{config_basename} cflags: #{cflags}\n") + $CPPFLAGS = concat_flags(cflags, $CPPFLAGS) # prepend + + %x(sh #{config} --libs).strip.shellsplit.each do |arg| case arg when /\A-L(.+)\z/ # Prioritize ports' directories - if $1.start_with?(ROOT + '/') - $LIBPATH = [$1] | $LIBPATH + $LIBPATH = if Regexp.last_match(1).start_with?(PACKAGE_ROOT_DIR + "/") + [Regexp.last_match(1)] | $LIBPATH else - $LIBPATH = $LIBPATH | [$1] + $LIBPATH | [Regexp.last_match(1)] end when /\A-l./ libs.unshift(arg) else - $LDFLAGS << ' ' << arg.shellescape + $LDFLAGS << " " << arg.shellescape end end end - # Defining a macro that expands to a C string; double quotes are significant. - $CPPFLAGS << ' ' << "-DNOKOGIRI_#{recipe.name.upcase}_PATH=\"#{recipe.path}\"".inspect - $CPPFLAGS << ' ' << "-DNOKOGIRI_#{recipe.name.upcase}_PATCHES=\"#{recipe.patch_files.map { |path| File.basename(path) }.join(' ')}\"".inspect + patches_string = recipe.patch_files.map { |path| File.basename(path) }.join(" ") + append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\"#{patches_string}\\""]) case libname - when 'xml2' + when "xml2" # xslt-config --libs or pkg-config libxslt --libs does not include # -llzma, so we need to add it manually when linking statically. - if static_p && have_lzma + if static_p && preserving_globals { local_have_library("lzma") } # Add it at the end; GH #988 - libs << '-llzma' + libs << "-llzma" end - when 'xslt' + when "xslt" # xslt-config does not have a flag to emit options including # -lexslt, so add it manually. - libs.unshift('-lexslt') + libs.unshift("-lexslt") end end end.shelljoin if static_p + static_archive_ld_flag = needs_darwin_linker_hack ? ["-load_hidden"] : [] $libs = $libs.shellsplit.map do |arg| case arg - when '-lxml2' - File.join(libxml2_recipe.path, 'lib', lib_a(arg)) - when '-lxslt', '-lexslt' - File.join(libxslt_recipe.path, 'lib', lib_a(arg)) + when "-lxml2" + static_archive_ld_flag + [File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))] + when "-lxslt", "-lexslt" + static_archive_ld_flag + [File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))] else arg end - end.shelljoin + end.flatten.shelljoin end -end -{ - "xml2" => ['xmlParseDoc', 'libxml/parser.h'], - "xslt" => ['xsltParseStylesheetDoc', 'libxslt/xslt.h'], - "exslt" => ['exsltFuncRegister', 'libexslt/exslt.h'], -}.each do |lib, (func, header)| - have_func(func, header) || - have_library(lib, func, header) || - have_library("lib#{lib}", func, header) or - asplode("lib#{lib}") + ensure_func("xmlParseDoc", "libxml/parser.h") + ensure_func("xsltParseStylesheetDoc", "libxslt/xslt.h") + ensure_func("exsltFuncRegister", "libexslt/exslt.h") end -have_func('xmlHasFeature') or abort "xmlHasFeature() is missing." -have_func('xmlFirstElementChild') -have_func('xmlRelaxNGSetParserStructuredErrors') -have_func('xmlRelaxNGSetParserStructuredErrors') -have_func('xmlRelaxNGSetValidStructuredErrors') -have_func('xmlSchemaSetValidStructuredErrors') -have_func('xmlSchemaSetParserStructuredErrors') +libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_build_p, false) do |recipe| + recipe.configure_options = [] + + class << recipe + def downloaded? + true + end + + def extract + target = File.join(tmp_path, "gumbo-parser") + output("Copying gumbo-parser files into #{target}...") + FileUtils.mkdir_p(target) + FileUtils.cp(Dir.glob(File.join(PACKAGE_ROOT_DIR, "gumbo-parser/src/*")), target) + end + + def configured? + true + end + + def install + lib_dir = File.join(port_path, "lib") + inc_dir = File.join(port_path, "include") + FileUtils.mkdir_p([lib_dir, inc_dir]) + FileUtils.cp(File.join(work_path, "libgumbo.a"), lib_dir) + FileUtils.cp(Dir.glob(File.join(work_path, "*.h")), inc_dir) + end + + def compile + cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-O2", "-g") + + env = { "CC" => gcc_cmd, "CFLAGS" => cflags } + if config_cross_build? + if /darwin/.match?(host) + env["AR"] = "#{host}-libtool" + env["ARFLAGS"] = "-o" + else + env["AR"] = "#{host}-ar" + end + env["RANLIB"] = "#{host}-ranlib" + end + + execute("compile", make_cmd, { env: env }) + end + end +end +append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}") +$libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a") +$LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")] +ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h") + +have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21 +have_func("xmlFirstElementChild") # introduced in libxml 2.7.3 +have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24 +have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21 +have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23 +have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23 +have_func("rb_gc_location") # introduced in Ruby 2.7 +have_func("rb_category_warning") # introduced in Ruby 3.0 + +other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",") +append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\"#{other_library_versions_string}\\""]) + +unless config_system_libraries? + if cross_build_p + # When precompiling native gems, copy packaged libraries' headers to ext/nokogiri/include + # These are packaged up by the cross-compiling callback in the ExtensionTask + copy_packaged_libraries_headers(to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"), + from_recipes: [libxml2_recipe, libxslt_recipe]) + else + # When compiling during installation, install packaged libraries' header files into ext/nokogiri/include + copy_packaged_libraries_headers(to_path: "include", + from_recipes: [libxml2_recipe, libxslt_recipe]) + $INSTALLFILES << ["include/**/*.h", "$(rubylibdir)"] + end +end -create_makefile('nokogiri/nokogiri') +create_makefile("nokogiri/nokogiri") -if enable_config('clean', true) +if config_clean? # Do not clean if run in a development work tree. - File.open('Makefile', 'at') do |mk| - mk.print < + +#include "nokogiri_gumbo.h" + +VALUE cNokogiriHtml5Document; + +// Interned symbols +static ID internal_subset; +static ID parent; + +/* Backwards compatibility to Ruby 2.1.0 */ +#if RUBY_API_VERSION_CODE < 20200 +#define ONIG_ESCAPE_UCHAR_COLLISION 1 +#include + +static VALUE +rb_utf8_str_new(const char *str, long length) +{ + return rb_enc_str_new(str, length, rb_utf8_encoding()); +} + +static VALUE +rb_utf8_str_new_cstr(const char *str) +{ + return rb_enc_str_new_cstr(str, rb_utf8_encoding()); +} + +static VALUE +rb_utf8_str_new_static(const char *str, long length) +{ + return rb_enc_str_new(str, length, rb_utf8_encoding()); +} +#endif + +#include +#include +#include + +// URI = system id +// external id = public id +static xmlDocPtr +new_html_doc(const char *dtd_name, const char *system, const char *public) +{ + // These two libxml2 functions take the public and system ids in + // opposite orders. + htmlDocPtr doc = htmlNewDocNoDtD(/* URI */ NULL, /* ExternalID */NULL); + assert(doc); + if (dtd_name) { + xmlCreateIntSubset(doc, (const xmlChar *)dtd_name, (const xmlChar *)public, (const xmlChar *)system); + } + return doc; +} + +static xmlNodePtr +get_parent(xmlNodePtr node) +{ + return node->parent; +} + +static GumboOutput * +perform_parse(const GumboOptions *options, VALUE input) +{ + assert(RTEST(input)); + Check_Type(input, T_STRING); + GumboOutput *output = gumbo_parse_with_options( + options, + RSTRING_PTR(input), + RSTRING_LEN(input) + ); + + const char *status_string = gumbo_status_to_string(output->status); + switch (output->status) { + case GUMBO_STATUS_OK: + break; + case GUMBO_STATUS_TOO_MANY_ATTRIBUTES: + case GUMBO_STATUS_TREE_TOO_DEEP: + gumbo_destroy_output(output); + rb_raise(rb_eArgError, "%s", status_string); + case GUMBO_STATUS_OUT_OF_MEMORY: + gumbo_destroy_output(output); + rb_raise(rb_eNoMemError, "%s", status_string); + } + return output; +} + +static xmlNsPtr +lookup_or_add_ns( + xmlDocPtr doc, + xmlNodePtr root, + const char *href, + const char *prefix +) +{ + xmlNsPtr ns = xmlSearchNs(doc, root, (const xmlChar *)prefix); + if (ns) { + return ns; + } + return xmlNewNs(root, (const xmlChar *)href, (const xmlChar *)prefix); +} + +static void +set_line(xmlNodePtr node, size_t line) +{ + // libxml2 uses 65535 to mean look elsewhere for the line number on some + // nodes. + if (line < 65535) { + node->line = (unsigned short)line; + } +} + +// Construct an XML tree rooted at xml_output_node from the Gumbo tree rooted +// at gumbo_node. +static void +build_tree( + xmlDocPtr doc, + xmlNodePtr xml_output_node, + const GumboNode *gumbo_node +) +{ + xmlNodePtr xml_root = NULL; + xmlNodePtr xml_node = xml_output_node; + size_t child_index = 0; + + while (true) { + assert(gumbo_node != NULL); + const GumboVector *children = gumbo_node->type == GUMBO_NODE_DOCUMENT ? + &gumbo_node->v.document.children : &gumbo_node->v.element.children; + if (child_index >= children->length) { + // Move up the tree and to the next child. + if (xml_node == xml_output_node) { + // We've built as much of the tree as we can. + return; + } + child_index = gumbo_node->index_within_parent + 1; + gumbo_node = gumbo_node->parent; + xml_node = get_parent(xml_node); + // Children of fragments don't share the same root, so reset it and + // it'll be set below. In the non-fragment case, this will only happen + // after the html element has been finished at which point there are no + // further elements. + if (xml_node == xml_output_node) { + xml_root = NULL; + } + continue; + } + const GumboNode *gumbo_child = children->data[child_index++]; + xmlNodePtr xml_child; + + switch (gumbo_child->type) { + case GUMBO_NODE_DOCUMENT: + abort(); // Bug in Gumbo. + + case GUMBO_NODE_TEXT: + case GUMBO_NODE_WHITESPACE: + xml_child = xmlNewDocText(doc, (const xmlChar *)gumbo_child->v.text.text); + set_line(xml_child, gumbo_child->v.text.start_pos.line); + xmlAddChild(xml_node, xml_child); + break; + + case GUMBO_NODE_CDATA: + xml_child = xmlNewCDataBlock(doc, (const xmlChar *)gumbo_child->v.text.text, + (int) strlen(gumbo_child->v.text.text)); + set_line(xml_child, gumbo_child->v.text.start_pos.line); + xmlAddChild(xml_node, xml_child); + break; + + case GUMBO_NODE_COMMENT: + xml_child = xmlNewDocComment(doc, (const xmlChar *)gumbo_child->v.text.text); + set_line(xml_child, gumbo_child->v.text.start_pos.line); + xmlAddChild(xml_node, xml_child); + break; + + case GUMBO_NODE_TEMPLATE: + // XXX: Should create a template element and a new DocumentFragment + case GUMBO_NODE_ELEMENT: { + xml_child = xmlNewDocNode(doc, NULL, (const xmlChar *)gumbo_child->v.element.name, NULL); + set_line(xml_child, gumbo_child->v.element.start_pos.line); + if (xml_root == NULL) { + xml_root = xml_child; + } + xmlNsPtr ns = NULL; + switch (gumbo_child->v.element.tag_namespace) { + case GUMBO_NAMESPACE_HTML: + break; + case GUMBO_NAMESPACE_SVG: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/svg", "svg"); + break; + case GUMBO_NAMESPACE_MATHML: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1998/Math/MathML", "math"); + break; + } + if (ns != NULL) { + xmlSetNs(xml_child, ns); + } + xmlAddChild(xml_node, xml_child); + + // Add the attributes. + const GumboVector *attrs = &gumbo_child->v.element.attributes; + for (size_t i = 0; i < attrs->length; i++) { + const GumboAttribute *attr = attrs->data[i]; + + switch (attr->attr_namespace) { + case GUMBO_ATTR_NAMESPACE_XLINK: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1999/xlink", "xlink"); + break; + + case GUMBO_ATTR_NAMESPACE_XML: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/XML/1998/namespace", "xml"); + break; + + case GUMBO_ATTR_NAMESPACE_XMLNS: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/xmlns/", "xmlns"); + break; + + default: + ns = NULL; + } + xmlNewNsProp(xml_child, ns, (const xmlChar *)attr->name, (const xmlChar *)attr->value); + } + + // Add children for this element. + child_index = 0; + gumbo_node = gumbo_child; + xml_node = xml_child; + } + } + } +} + +static void +add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url) +{ + const char *input_str = RSTRING_PTR(input); + size_t input_len = RSTRING_LEN(input); + + // Add parse errors to rdoc. + if (output->errors.length) { + const GumboVector *errors = &output->errors; + VALUE rerrors = rb_ary_new2(errors->length); + + for (size_t i = 0; i < errors->length; i++) { + GumboError *err = errors->data[i]; + GumboSourcePosition position = gumbo_error_position(err); + char *msg; + size_t size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg); + VALUE err_str = rb_utf8_str_new(msg, size); + free(msg); + VALUE syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError); + const char *error_code = gumbo_error_code(err); + VALUE str1 = error_code ? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil; + rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER + rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR + rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR + rb_iv_set(syntax_error, "@file", url); + rb_iv_set(syntax_error, "@line", SIZET2NUM(position.line)); + rb_iv_set(syntax_error, "@str1", str1); + rb_iv_set(syntax_error, "@str2", Qnil); + rb_iv_set(syntax_error, "@str3", Qnil); + rb_iv_set(syntax_error, "@int1", INT2NUM(0)); + rb_iv_set(syntax_error, "@column", SIZET2NUM(position.column)); + rb_ary_push(rerrors, syntax_error); + } + rb_iv_set(rdoc, "@errors", rerrors); + } +} + +typedef struct { + GumboOutput *output; + VALUE input; + VALUE url_or_frag; + VALUE klass; + xmlDocPtr doc; +} ParseArgs; + +static VALUE +parse_cleanup(VALUE parse_args) +{ + ParseArgs *args = (ParseArgs *)parse_args; + gumbo_destroy_output(args->output); + // Make sure garbage collection doesn't mark the objects as being live based + // on references from the ParseArgs. This may be unnecessary. + args->input = Qnil; + args->url_or_frag = Qnil; + if (args->doc != NULL) { + xmlFreeDoc(args->doc); + } + return Qnil; +} + +static VALUE parse_continue(VALUE parse_args); + +/* + * @!visibility protected + */ +static VALUE +parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth, VALUE klass) +{ + GumboOptions options = kGumboDefaultOptions; + options.max_attributes = NUM2INT(max_attributes); + options.max_errors = NUM2INT(max_errors); + options.max_tree_depth = NUM2INT(max_depth); + + GumboOutput *output = perform_parse(&options, input); + ParseArgs args = { + .output = output, + .input = input, + .url_or_frag = url, + .klass = klass, + .doc = NULL, + }; + + return rb_ensure(parse_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args)); +} + +static VALUE +parse_continue(VALUE parse_args) +{ + ParseArgs *args = (ParseArgs *)parse_args; + GumboOutput *output = args->output; + xmlDocPtr doc; + if (output->document->v.document.has_doctype) { + const char *name = output->document->v.document.name; + const char *public = output->document->v.document.public_identifier; + const char *system = output->document->v.document.system_identifier; + public = public[0] ? public : NULL; + system = system[0] ? system : NULL; + doc = new_html_doc(name, system, public); + } else { + doc = new_html_doc(NULL, NULL, NULL); + } + args->doc = doc; // Make sure doc gets cleaned up if an error is thrown. + build_tree(doc, (xmlNodePtr)doc, output->document); + VALUE rdoc = noko_xml_document_wrap(args->klass, doc); + rb_iv_set(rdoc, "@url", args->url_or_frag); + rb_iv_set(rdoc, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode)); + args->doc = NULL; // The Ruby runtime now owns doc so don't delete it. + add_errors(output, rdoc, args->input, args->url_or_frag); + return rdoc; +} + +static int +lookup_namespace(VALUE node, bool require_known_ns) +{ + ID namespace, href; + CONST_ID(namespace, "namespace"); + CONST_ID(href, "href"); + VALUE ns = rb_funcall(node, namespace, 0); + + if (NIL_P(ns)) { + return GUMBO_NAMESPACE_HTML; + } + ns = rb_funcall(ns, href, 0); + assert(RTEST(ns)); + Check_Type(ns, T_STRING); + + const char *href_ptr = RSTRING_PTR(ns); + size_t href_len = RSTRING_LEN(ns); +#define NAMESPACE_P(uri) (href_len == sizeof uri - 1 && !memcmp(href_ptr, uri, href_len)) + if (NAMESPACE_P("http://www.w3.org/1999/xhtml")) { + return GUMBO_NAMESPACE_HTML; + } + if (NAMESPACE_P("http://www.w3.org/1998/Math/MathML")) { + return GUMBO_NAMESPACE_MATHML; + } + if (NAMESPACE_P("http://www.w3.org/2000/svg")) { + return GUMBO_NAMESPACE_SVG; + } +#undef NAMESPACE_P + if (require_known_ns) { + rb_raise(rb_eArgError, "Unexpected namespace URI \"%*s\"", (int)href_len, href_ptr); + } + return -1; +} + +static xmlNodePtr +extract_xml_node(VALUE node) +{ + xmlNodePtr xml_node; + Noko_Node_Get_Struct(node, xmlNode, xml_node); + return xml_node; +} + +static VALUE fragment_continue(VALUE parse_args); + +/* + * @!visibility protected + */ +static VALUE +fragment( + VALUE self, + VALUE doc_fragment, + VALUE tags, + VALUE ctx, + VALUE max_attributes, + VALUE max_errors, + VALUE max_depth +) +{ + ID name = rb_intern_const("name"); + const char *ctx_tag; + GumboNamespaceEnum ctx_ns; + GumboQuirksModeEnum quirks_mode; + bool form = false; + const char *encoding = NULL; + + if (NIL_P(ctx)) { + ctx_tag = "body"; + ctx_ns = GUMBO_NAMESPACE_HTML; + } else if (TYPE(ctx) == T_STRING) { + ctx_tag = StringValueCStr(ctx); + ctx_ns = GUMBO_NAMESPACE_HTML; + size_t len = RSTRING_LEN(ctx); + const char *colon = memchr(ctx_tag, ':', len); + if (colon) { + switch (colon - ctx_tag) { + case 3: + if (st_strncasecmp(ctx_tag, "svg", 3) != 0) { + goto error; + } + ctx_ns = GUMBO_NAMESPACE_SVG; + break; + case 4: + if (st_strncasecmp(ctx_tag, "html", 4) == 0) { + ctx_ns = GUMBO_NAMESPACE_HTML; + } else if (st_strncasecmp(ctx_tag, "math", 4) == 0) { + ctx_ns = GUMBO_NAMESPACE_MATHML; + } else { + goto error; + } + break; + default: +error: + rb_raise(rb_eArgError, "Invalid context namespace '%*s'", (int)(colon - ctx_tag), ctx_tag); + } + ctx_tag = colon + 1; + } else { + // For convenience, put 'svg' and 'math' in their namespaces. + if (len == 3 && st_strncasecmp(ctx_tag, "svg", 3) == 0) { + ctx_ns = GUMBO_NAMESPACE_SVG; + } else if (len == 4 && st_strncasecmp(ctx_tag, "math", 4) == 0) { + ctx_ns = GUMBO_NAMESPACE_MATHML; + } + } + + // Check if it's a form. + form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0; + } else { + ID element_ = rb_intern_const("element?"); + + // Context fragment name. + VALUE tag_name = rb_funcall(ctx, name, 0); + assert(RTEST(tag_name)); + Check_Type(tag_name, T_STRING); + ctx_tag = StringValueCStr(tag_name); + + // Context fragment namespace. + ctx_ns = lookup_namespace(ctx, true); + + // Check for a form ancestor, including self. + for (VALUE node = ctx; + !NIL_P(node); + node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) { + if (!RTEST(rb_funcall(node, element_, 0))) { + continue; + } + VALUE element_name = rb_funcall(node, name, 0); + if (RSTRING_LEN(element_name) == 4 + && !st_strcasecmp(RSTRING_PTR(element_name), "form") + && lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) { + form = true; + break; + } + } + + // Encoding. + if (ctx_ns == GUMBO_NAMESPACE_MATHML + && RSTRING_LEN(tag_name) == 14 + && !st_strcasecmp(ctx_tag, "annotation-xml")) { + VALUE enc = rb_funcall(ctx, rb_intern_const("[]"), + 1, + rb_utf8_str_new_static("encoding", 8)); + if (RTEST(enc)) { + Check_Type(enc, T_STRING); + encoding = StringValueCStr(enc); + } + } + } + + // Quirks mode. + VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0); + VALUE dtd = rb_funcall(doc, internal_subset, 0); + VALUE doc_quirks_mode = rb_iv_get(doc, "@quirks_mode"); + if (NIL_P(ctx) || NIL_P(doc_quirks_mode)) { + quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS; + } else if (NIL_P(dtd)) { + quirks_mode = GUMBO_DOCTYPE_QUIRKS; + } else { + VALUE dtd_name = rb_funcall(dtd, name, 0); + VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0); + VALUE sysid = rb_funcall(dtd, rb_intern_const("system_id"), 0); + quirks_mode = gumbo_compute_quirks_mode( + NIL_P(dtd_name) ? NULL : StringValueCStr(dtd_name), + NIL_P(pubid) ? NULL : StringValueCStr(pubid), + NIL_P(sysid) ? NULL : StringValueCStr(sysid) + ); + } + + // Perform a fragment parse. + int depth = NUM2INT(max_depth); + GumboOptions options = kGumboDefaultOptions; + options.max_attributes = NUM2INT(max_attributes); + options.max_errors = NUM2INT(max_errors); + // Add one to account for the HTML element. + options.max_tree_depth = depth < 0 ? -1 : (depth + 1); + options.fragment_context = ctx_tag; + options.fragment_namespace = ctx_ns; + options.fragment_encoding = encoding; + options.quirks_mode = quirks_mode; + options.fragment_context_has_form_ancestor = form; + + GumboOutput *output = perform_parse(&options, tags); + ParseArgs args = { + .output = output, + .input = tags, + .url_or_frag = doc_fragment, + .doc = (xmlDocPtr)extract_xml_node(doc), + }; + rb_ensure(fragment_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args)); + return Qnil; +} + +static VALUE +fragment_continue(VALUE parse_args) +{ + ParseArgs *args = (ParseArgs *)parse_args; + GumboOutput *output = args->output; + VALUE doc_fragment = args->url_or_frag; + xmlDocPtr xml_doc = args->doc; + + args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it. + xmlNodePtr xml_frag = extract_xml_node(doc_fragment); + build_tree(xml_doc, xml_frag, output->root); + rb_iv_set(doc_fragment, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode)); + add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9)); + return Qnil; +} + +// Initialize the Nokogumbo class and fetch constants we will use later. +void +noko_init_gumbo(void) +{ + // Class constants. + cNokogiriHtml5Document = rb_define_class_under(mNokogiriHtml5, "Document", cNokogiriHtml4Document); + rb_gc_register_mark_object(cNokogiriHtml5Document); + + // Interned symbols. + internal_subset = rb_intern_const("internal_subset"); + parent = rb_intern_const("parent"); + + // Define Nokogumbo module with parse and fragment methods. + rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 6); + rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6); +} + +// vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab: diff --git a/ext/nokogiri/html4_document.c b/ext/nokogiri/html4_document.c new file mode 100644 index 0000000000..939321e3bd --- /dev/null +++ b/ext/nokogiri/html4_document.c @@ -0,0 +1,166 @@ +#include + +VALUE cNokogiriHtml4Document ; + +static ID id_encoding_found; +static ID id_to_s; + +/* + * call-seq: + * new + * + * Create a new document + */ +static VALUE +rb_html_document_s_new(int argc, VALUE *argv, VALUE klass) +{ + VALUE uri, external_id, rest, rb_doc; + htmlDocPtr doc; + + rb_scan_args(argc, argv, "0*", &rest); + uri = rb_ary_entry(rest, (long)0); + external_id = rb_ary_entry(rest, (long)1); + + doc = htmlNewDoc( + RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL, + RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL + ); + rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv); + return rb_doc ; +} + +/* + * call-seq: + * read_io(io, url, encoding, options) + * + * Read the HTML document from +io+ with given +url+, +encoding+, + * and +options+. See Nokogiri::HTML4.parse + */ +static VALUE +rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_encoding, VALUE rb_options) +{ + VALUE rb_doc; + VALUE rb_error_list = rb_ary_new(); + htmlDocPtr c_doc; + const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url); + const char *c_encoding = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding); + int options = NUM2INT(rb_options); + + xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher); + + c_doc = htmlReadIO(noko_io_read, noko_io_close, (void *)rb_io, c_url, c_encoding, options); + + xmlSetStructuredErrorFunc(NULL, NULL); + + /* + * If EncodingFound has occurred in EncodingReader, make sure to do + * a cleanup and propagate the error. + */ + if (rb_respond_to(rb_io, id_encoding_found)) { + VALUE encoding_found = rb_funcall(rb_io, id_encoding_found, 0); + if (!NIL_P(encoding_found)) { + xmlFreeDoc(c_doc); + rb_exc_raise(encoding_found); + } + } + + if ((c_doc == NULL) || (!(options & XML_PARSE_RECOVER) && (RARRAY_LEN(rb_error_list) > 0))) { + VALUE rb_error ; + + xmlFreeDoc(c_doc); + + rb_error = rb_ary_entry(rb_error_list, 0); + if (rb_error == Qnil) { + rb_raise(rb_eRuntimeError, "Could not parse document"); + } else { + VALUE exception_message = rb_funcall(rb_error, id_to_s, 0); + exception_message = rb_str_concat(rb_str_new2("Parser without recover option encountered error or warning: "), + exception_message); + rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError)); + } + + return Qnil; + } + + rb_doc = noko_xml_document_wrap(klass, c_doc); + rb_iv_set(rb_doc, "@errors", rb_error_list); + return rb_doc; +} + +/* + * call-seq: + * read_memory(string, url, encoding, options) + * + * Read the HTML document contained in +string+ with given +url+, +encoding+, + * and +options+. See Nokogiri::HTML4.parse + */ +static VALUE +rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE rb_encoding, VALUE rb_options) +{ + VALUE rb_doc; + VALUE rb_error_list = rb_ary_new(); + htmlDocPtr c_doc; + const char *c_buffer = StringValuePtr(rb_html); + const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url); + const char *c_encoding = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding); + int html_len = (int)RSTRING_LEN(rb_html); + int options = NUM2INT(rb_options); + + xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher); + + c_doc = htmlReadMemory(c_buffer, html_len, c_url, c_encoding, options); + + xmlSetStructuredErrorFunc(NULL, NULL); + + if ((c_doc == NULL) || (!(options & XML_PARSE_RECOVER) && (RARRAY_LEN(rb_error_list) > 0))) { + VALUE rb_error ; + + xmlFreeDoc(c_doc); + + rb_error = rb_ary_entry(rb_error_list, 0); + if (rb_error == Qnil) { + rb_raise(rb_eRuntimeError, "Could not parse document"); + } else { + VALUE exception_message = rb_funcall(rb_error, id_to_s, 0); + exception_message = rb_str_concat(rb_str_new2("Parser without recover option encountered error or warning: "), + exception_message); + rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError)); + } + + return Qnil; + } + + rb_doc = noko_xml_document_wrap(klass, c_doc); + rb_iv_set(rb_doc, "@errors", rb_error_list); + return rb_doc; +} + +/* + * call-seq: + * type + * + * The type for this document + */ +static VALUE +rb_html_document_type(VALUE self) +{ + htmlDocPtr doc; + Data_Get_Struct(self, xmlDoc, doc); + return INT2NUM(doc->type); +} + +void +noko_init_html_document(void) +{ + assert(cNokogiriXmlDocument); + cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument); + + rb_define_singleton_method(cNokogiriHtml4Document, "read_memory", rb_html_document_s_read_memory, 4); + rb_define_singleton_method(cNokogiriHtml4Document, "read_io", rb_html_document_s_read_io, 4); + rb_define_singleton_method(cNokogiriHtml4Document, "new", rb_html_document_s_new, -1); + + rb_define_method(cNokogiriHtml4Document, "type", rb_html_document_type, 0); + + id_encoding_found = rb_intern("encoding_found"); + id_to_s = rb_intern("to_s"); +} diff --git a/ext/nokogiri/html4_element_description.c b/ext/nokogiri/html4_element_description.c new file mode 100644 index 0000000000..0d88c16ca3 --- /dev/null +++ b/ext/nokogiri/html4_element_description.c @@ -0,0 +1,294 @@ +#include + +VALUE cNokogiriHtml4ElementDescription ; + +/* + * call-seq: + * required_attributes + * + * A list of required attributes for this element + */ +static VALUE +required_attributes(VALUE self) +{ + const htmlElemDesc *description; + VALUE list; + int i; + + Data_Get_Struct(self, htmlElemDesc, description); + + list = rb_ary_new(); + + if (NULL == description->attrs_req) { return list; } + + for (i = 0; description->attrs_depr[i]; i++) { + rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i])); + } + + return list; +} + +/* + * call-seq: + * deprecated_attributes + * + * A list of deprecated attributes for this element + */ +static VALUE +deprecated_attributes(VALUE self) +{ + const htmlElemDesc *description; + VALUE list; + int i; + + Data_Get_Struct(self, htmlElemDesc, description); + + list = rb_ary_new(); + + if (NULL == description->attrs_depr) { return list; } + + for (i = 0; description->attrs_depr[i]; i++) { + rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i])); + } + + return list; +} + +/* + * call-seq: + * optional_attributes + * + * A list of optional attributes for this element + */ +static VALUE +optional_attributes(VALUE self) +{ + const htmlElemDesc *description; + VALUE list; + int i; + + Data_Get_Struct(self, htmlElemDesc, description); + + list = rb_ary_new(); + + if (NULL == description->attrs_opt) { return list; } + + for (i = 0; description->attrs_opt[i]; i++) { + rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i])); + } + + return list; +} + +/* + * call-seq: + * default_sub_element + * + * The default sub element for this element + */ +static VALUE +default_sub_element(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + if (description->defaultsubelt) { + return NOKOGIRI_STR_NEW2(description->defaultsubelt); + } + + return Qnil; +} + +/* + * call-seq: + * sub_elements + * + * A list of allowed sub elements for this element. + */ +static VALUE +sub_elements(VALUE self) +{ + const htmlElemDesc *description; + VALUE list; + int i; + + Data_Get_Struct(self, htmlElemDesc, description); + + list = rb_ary_new(); + + if (NULL == description->subelts) { return list; } + + for (i = 0; description->subelts[i]; i++) { + rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i])); + } + + return list; +} + +/* + * call-seq: + * description + * + * The description for this element + */ +static VALUE +description(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + return NOKOGIRI_STR_NEW2(description->desc); +} + +/* + * call-seq: + * inline? + * + * Is this element an inline element? + */ +static VALUE +inline_eh(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + if (description->isinline) { return Qtrue; } + return Qfalse; +} + +/* + * call-seq: + * deprecated? + * + * Is this element deprecated? + */ +static VALUE +deprecated_eh(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + if (description->depr) { return Qtrue; } + return Qfalse; +} + +/* + * call-seq: + * empty? + * + * Is this an empty element? + */ +static VALUE +empty_eh(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + if (description->empty) { return Qtrue; } + return Qfalse; +} + +/* + * call-seq: + * save_end_tag? + * + * Should the end tag be saved? + */ +static VALUE +save_end_tag_eh(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + if (description->saveEndTag) { return Qtrue; } + return Qfalse; +} + +/* + * call-seq: + * implied_end_tag? + * + * Can the end tag be implied for this tag? + */ +static VALUE +implied_end_tag_eh(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + if (description->endTag) { return Qtrue; } + return Qfalse; +} + +/* + * call-seq: + * implied_start_tag? + * + * Can the start tag be implied for this tag? + */ +static VALUE +implied_start_tag_eh(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + if (description->startTag) { return Qtrue; } + return Qfalse; +} + +/* + * call-seq: + * name + * + * Get the tag name for this ElemementDescription + */ +static VALUE +name(VALUE self) +{ + const htmlElemDesc *description; + Data_Get_Struct(self, htmlElemDesc, description); + + if (NULL == description->name) { return Qnil; } + return NOKOGIRI_STR_NEW2(description->name); +} + +/* + * call-seq: + * [](tag_name) + * + * Get ElemementDescription for +tag_name+ + */ +static VALUE +get_description(VALUE klass, VALUE tag_name) +{ + const htmlElemDesc *description = htmlTagLookup( + (const xmlChar *)StringValueCStr(tag_name) + ); + + if (NULL == description) { return Qnil; } + return Data_Wrap_Struct(klass, 0, 0, DISCARD_CONST_QUAL(void *, description)); +} + +void +noko_init_html_element_description(void) +{ + cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject); + + rb_undef_alloc_func(cNokogiriHtml4ElementDescription); + + rb_define_singleton_method(cNokogiriHtml4ElementDescription, "[]", get_description, 1); + + rb_define_method(cNokogiriHtml4ElementDescription, "name", name, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "implied_start_tag?", implied_start_tag_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "implied_end_tag?", implied_end_tag_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "save_end_tag?", save_end_tag_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "empty?", empty_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "deprecated?", deprecated_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "inline?", inline_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "description", description, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "sub_elements", sub_elements, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "default_sub_element", default_sub_element, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "optional_attributes", optional_attributes, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "deprecated_attributes", deprecated_attributes, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "required_attributes", required_attributes, 0); +} diff --git a/ext/nokogiri/html4_entity_lookup.c b/ext/nokogiri/html4_entity_lookup.c new file mode 100644 index 0000000000..85ad384253 --- /dev/null +++ b/ext/nokogiri/html4_entity_lookup.c @@ -0,0 +1,37 @@ +#include + +static VALUE cNokogiriHtml4EntityLookup; + +/* + * call-seq: + * get(key) + * + * Get the HTML4::EntityDescription for +key+ + */ +static VALUE +get(VALUE _, VALUE rb_entity_name) +{ + VALUE cNokogiriHtml4EntityDescription; + const htmlEntityDesc *c_entity_desc; + VALUE rb_constructor_args[3]; + + c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name)); + if (NULL == c_entity_desc) { + return Qnil; + } + + rb_constructor_args[0] = UINT2NUM(c_entity_desc->value); + rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name); + rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc); + + cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription")); + return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription); +} + +void +noko_init_html_entity_lookup(void) +{ + cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject); + + rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1); +} diff --git a/ext/nokogiri/html4_sax_parser_context.c b/ext/nokogiri/html4_sax_parser_context.c new file mode 100644 index 0000000000..6615bb2684 --- /dev/null +++ b/ext/nokogiri/html4_sax_parser_context.c @@ -0,0 +1,114 @@ +#include + +VALUE cNokogiriHtml4SaxParserContext ; + +static void +deallocate(xmlParserCtxtPtr ctxt) +{ + ctxt->sax = NULL; + htmlFreeParserCtxt(ctxt); +} + +static VALUE +parse_memory(VALUE klass, VALUE data, VALUE encoding) +{ + htmlParserCtxtPtr ctxt; + + Check_Type(data, T_STRING); + + if (!(int)RSTRING_LEN(data)) { + rb_raise(rb_eRuntimeError, "data cannot be empty"); + } + + ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data), + (int)RSTRING_LEN(data)); + if (ctxt->sax) { + xmlFree(ctxt->sax); + ctxt->sax = NULL; + } + + if (RTEST(encoding)) { + xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding)); + if (enc != NULL) { + xmlSwitchToEncoding(ctxt, enc); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + rb_raise(rb_eRuntimeError, "Unsupported encoding %s", + StringValueCStr(encoding)); + } + } + } + + return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); +} + +static VALUE +parse_file(VALUE klass, VALUE filename, VALUE encoding) +{ + htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt( + StringValueCStr(filename), + StringValueCStr(encoding) + ); + return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); +} + +static VALUE +parse_doc(VALUE ctxt_val) +{ + htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val; + htmlParseDocument(ctxt); + return Qnil; +} + +static VALUE +parse_doc_finalize(VALUE ctxt_val) +{ + htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val; + + if (ctxt->myDoc) { + xmlFreeDoc(ctxt->myDoc); + } + + NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData); + return Qnil; +} + +static VALUE +parse_with(VALUE self, VALUE sax_handler) +{ + htmlParserCtxtPtr ctxt; + htmlSAXHandlerPtr sax; + + if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) { + rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser"); + } + + Data_Get_Struct(self, htmlParserCtxt, ctxt); + Data_Get_Struct(sax_handler, htmlSAXHandler, sax); + + /* Free the sax handler since we'll assign our own */ + if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) { + xmlFree(ctxt->sax); + } + + ctxt->sax = sax; + ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); + + xmlSetStructuredErrorFunc(NULL, NULL); + + rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); + + return self; +} + +void +noko_init_html_sax_parser_context(void) +{ + assert(cNokogiriXmlSaxParserContext); + cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext", + cNokogiriXmlSaxParserContext); + + rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2); + rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2); + + rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1); +} diff --git a/ext/nokogiri/html4_sax_push_parser.c b/ext/nokogiri/html4_sax_push_parser.c new file mode 100644 index 0000000000..61d72a3ee3 --- /dev/null +++ b/ext/nokogiri/html4_sax_push_parser.c @@ -0,0 +1,95 @@ +#include + +VALUE cNokogiriHtml4SaxPushParser; + +/* + * call-seq: + * native_write(chunk, last_chunk) + * + * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle + */ +static VALUE +native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) +{ + xmlParserCtxtPtr ctx; + const char *chunk = NULL; + int size = 0; + int status = 0; + libxmlStructuredErrorHandlerState handler_state; + + Data_Get_Struct(self, xmlParserCtxt, ctx); + + if (Qnil != _chunk) { + chunk = StringValuePtr(_chunk); + size = (int)RSTRING_LEN(_chunk); + } + + Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL); + + status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0); + + Nokogiri_structured_error_func_restore(&handler_state); + + if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) { + // TODO: there appear to be no tests for this block + xmlErrorPtr e = xmlCtxtGetLastError(ctx); + Nokogiri_error_raise(NULL, e); + } + + return self; +} + +/* + * call-seq: + * initialize_native(xml_sax, filename) + * + * Initialize the push parser with +xml_sax+ using +filename+ + */ +static VALUE +initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename, + VALUE encoding) +{ + htmlSAXHandlerPtr sax; + const char *filename = NULL; + htmlParserCtxtPtr ctx; + xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; + + Data_Get_Struct(_xml_sax, xmlSAXHandler, sax); + + if (_filename != Qnil) { filename = StringValueCStr(_filename); } + + if (!NIL_P(encoding)) { + enc = xmlParseCharEncoding(StringValueCStr(encoding)); + if (enc == XML_CHAR_ENCODING_ERROR) { + rb_raise(rb_eArgError, "Unsupported Encoding"); + } + } + + ctx = htmlCreatePushParserCtxt( + sax, + NULL, + NULL, + 0, + filename, + enc + ); + if (ctx == NULL) { + rb_raise(rb_eRuntimeError, "Could not create a parser context"); + } + + ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self); + + ctx->sax2 = 1; + DATA_PTR(self) = ctx; + return self; +} + +void +noko_init_html_sax_push_parser(void) +{ + assert(cNokogiriXmlSaxPushParser); + cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser); + + rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3); + rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2); +} diff --git a/ext/nokogiri/html_document.c b/ext/nokogiri/html_document.c deleted file mode 100644 index 5cf93a8413..0000000000 --- a/ext/nokogiri/html_document.c +++ /dev/null @@ -1,170 +0,0 @@ -#include - -static ID id_encoding_found; - -/* - * call-seq: - * new - * - * Create a new document - */ -static VALUE new(int argc, VALUE *argv, VALUE klass) -{ - VALUE uri, external_id, rest, rb_doc; - htmlDocPtr doc; - - rb_scan_args(argc, argv, "0*", &rest); - uri = rb_ary_entry(rest, (long)0); - external_id = rb_ary_entry(rest, (long)1); - - doc = htmlNewDoc( - RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL, - RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL - ); - rb_doc = Nokogiri_wrap_xml_document(klass, doc); - rb_obj_call_init(rb_doc, argc, argv); - return rb_doc ; -} - -/* - * call-seq: - * read_io(io, url, encoding, options) - * - * Read the HTML document from +io+ with given +url+, +encoding+, - * and +options+. See Nokogiri::HTML.parse - */ -static VALUE read_io( VALUE klass, - VALUE io, - VALUE url, - VALUE encoding, - VALUE options ) -{ - const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url); - const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding); - VALUE error_list = rb_ary_new(); - VALUE document; - htmlDocPtr doc; - - xmlResetLastError(); - xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); - - doc = htmlReadIO( - io_read_callback, - io_close_callback, - (void *)io, - c_url, - c_enc, - (int)NUM2INT(options) - ); - xmlSetStructuredErrorFunc(NULL, NULL); - - /* - * If EncodingFound has occurred in EncodingReader, make sure to do - * a cleanup and propagate the error. - */ - if (rb_respond_to(io, id_encoding_found)) { - VALUE encoding_found = rb_funcall(io, id_encoding_found, 0); - if (!NIL_P(encoding_found)) { - xmlFreeDoc(doc); - rb_exc_raise(encoding_found); - } - } - - if(doc == NULL) { - xmlErrorPtr error; - - xmlFreeDoc(doc); - - error = xmlGetLastError(); - if(error) - rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); - else - rb_raise(rb_eRuntimeError, "Could not parse document"); - - return Qnil; - } - - document = Nokogiri_wrap_xml_document(klass, doc); - rb_iv_set(document, "@errors", error_list); - return document; -} - -/* - * call-seq: - * read_memory(string, url, encoding, options) - * - * Read the HTML document contained in +string+ with given +url+, +encoding+, - * and +options+. See Nokogiri::HTML.parse - */ -static VALUE read_memory( VALUE klass, - VALUE string, - VALUE url, - VALUE encoding, - VALUE options ) -{ - const char * c_buffer = StringValuePtr(string); - const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url); - const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding); - int len = (int)RSTRING_LEN(string); - VALUE error_list = rb_ary_new(); - VALUE document; - htmlDocPtr doc; - - xmlResetLastError(); - xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); - - doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); - xmlSetStructuredErrorFunc(NULL, NULL); - - if(doc == NULL) { - xmlErrorPtr error; - - xmlFreeDoc(doc); - - error = xmlGetLastError(); - if(error) - rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); - else - rb_raise(rb_eRuntimeError, "Could not parse document"); - - return Qnil; - } - - document = Nokogiri_wrap_xml_document(klass, doc); - rb_iv_set(document, "@errors", error_list); - return document; -} - -/* - * call-seq: - * type - * - * The type for this document - */ -static VALUE type(VALUE self) -{ - htmlDocPtr doc; - Data_Get_Struct(self, xmlDoc, doc); - return INT2NUM((long)doc->type); -} - -VALUE cNokogiriHtmlDocument ; -void init_html_document() -{ - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE html = rb_define_module_under(nokogiri, "HTML"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - VALUE xml_doc = rb_define_class_under(xml, "Document", node); - VALUE klass = rb_define_class_under(html, "Document", xml_doc); - - cNokogiriHtmlDocument = klass; - - rb_define_singleton_method(klass, "read_memory", read_memory, 4); - rb_define_singleton_method(klass, "read_io", read_io, 4); - rb_define_singleton_method(klass, "new", new, -1); - - rb_define_method(klass, "type", type, 0); - - id_encoding_found = rb_intern("encoding_found"); -} diff --git a/ext/nokogiri/html_document.h b/ext/nokogiri/html_document.h deleted file mode 100644 index 42271875d6..0000000000 --- a/ext/nokogiri/html_document.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_HTML_DOCUMENT -#define NOKOGIRI_HTML_DOCUMENT - -#include - -void init_html_document(); - -extern VALUE cNokogiriHtmlDocument ; - -#endif diff --git a/ext/nokogiri/html_element_description.c b/ext/nokogiri/html_element_description.c deleted file mode 100644 index f5e9270d34..0000000000 --- a/ext/nokogiri/html_element_description.c +++ /dev/null @@ -1,279 +0,0 @@ -#include - -/* - * call-seq: - * required_attributes - * - * A list of required attributes for this element - */ -static VALUE required_attributes(VALUE self) -{ - const htmlElemDesc * description; - VALUE list; - int i; - - Data_Get_Struct(self, htmlElemDesc, description); - - list = rb_ary_new(); - - if(NULL == description->attrs_req) return list; - - for(i = 0; description->attrs_depr[i]; i++) { - rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i])); - } - - return list; -} - -/* - * call-seq: - * deprecated_attributes - * - * A list of deprecated attributes for this element - */ -static VALUE deprecated_attributes(VALUE self) -{ - const htmlElemDesc * description; - VALUE list; - int i; - - Data_Get_Struct(self, htmlElemDesc, description); - - list = rb_ary_new(); - - if(NULL == description->attrs_depr) return list; - - for(i = 0; description->attrs_depr[i]; i++) { - rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i])); - } - - return list; -} - -/* - * call-seq: - * optional_attributes - * - * A list of optional attributes for this element - */ -static VALUE optional_attributes(VALUE self) -{ - const htmlElemDesc * description; - VALUE list; - int i; - - Data_Get_Struct(self, htmlElemDesc, description); - - list = rb_ary_new(); - - if(NULL == description->attrs_opt) return list; - - for(i = 0; description->attrs_opt[i]; i++) { - rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i])); - } - - return list; -} - -/* - * call-seq: - * default_sub_element - * - * The default sub element for this element - */ -static VALUE default_sub_element(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - if (description->defaultsubelt) - return NOKOGIRI_STR_NEW2(description->defaultsubelt); - - return Qnil; -} - -/* - * call-seq: - * sub_elements - * - * A list of allowed sub elements for this element. - */ -static VALUE sub_elements(VALUE self) -{ - const htmlElemDesc * description; - VALUE list; - int i; - - Data_Get_Struct(self, htmlElemDesc, description); - - list = rb_ary_new(); - - if(NULL == description->subelts) return list; - - for(i = 0; description->subelts[i]; i++) { - rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i])); - } - - return list; -} - -/* - * call-seq: - * description - * - * The description for this element - */ -static VALUE description(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - return NOKOGIRI_STR_NEW2(description->desc); -} - -/* - * call-seq: - * inline? - * - * Is this element an inline element? - */ -static VALUE inline_eh(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - if(description->isinline) return Qtrue; - return Qfalse; -} - -/* - * call-seq: - * deprecated? - * - * Is this element deprecated? - */ -static VALUE deprecated_eh(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - if(description->depr) return Qtrue; - return Qfalse; -} - -/* - * call-seq: - * empty? - * - * Is this an empty element? - */ -static VALUE empty_eh(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - if(description->empty) return Qtrue; - return Qfalse; -} - -/* - * call-seq: - * save_end_tag? - * - * Should the end tag be saved? - */ -static VALUE save_end_tag_eh(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - if(description->saveEndTag) return Qtrue; - return Qfalse; -} - -/* - * call-seq: - * implied_end_tag? - * - * Can the end tag be implied for this tag? - */ -static VALUE implied_end_tag_eh(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - if(description->endTag) return Qtrue; - return Qfalse; -} - -/* - * call-seq: - * implied_start_tag? - * - * Can the start tag be implied for this tag? - */ -static VALUE implied_start_tag_eh(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - if(description->startTag) return Qtrue; - return Qfalse; -} - -/* - * call-seq: - * name - * - * Get the tag name for this ElemementDescription - */ -static VALUE name(VALUE self) -{ - const htmlElemDesc * description; - Data_Get_Struct(self, htmlElemDesc, description); - - if(NULL == description->name) return Qnil; - return NOKOGIRI_STR_NEW2(description->name); -} - -/* - * call-seq: - * [](tag_name) - * - * Get ElemementDescription for +tag_name+ - */ -static VALUE get_description(VALUE klass, VALUE tag_name) -{ - const htmlElemDesc * description = htmlTagLookup( - (const xmlChar *)StringValueCStr(tag_name) - ); - - if(NULL == description) return Qnil; - return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description); -} - -VALUE cNokogiriHtmlElementDescription ; -void init_html_element_description() -{ - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE html = rb_define_module_under(nokogiri, "HTML"); - VALUE klass = rb_define_class_under(html, "ElementDescription",rb_cObject); - - cNokogiriHtmlElementDescription = klass; - - rb_define_singleton_method(klass, "[]", get_description, 1); - - rb_define_method(klass, "name", name, 0); - rb_define_method(klass, "implied_start_tag?", implied_start_tag_eh, 0); - rb_define_method(klass, "implied_end_tag?", implied_end_tag_eh, 0); - rb_define_method(klass, "save_end_tag?", save_end_tag_eh, 0); - rb_define_method(klass, "empty?", empty_eh, 0); - rb_define_method(klass, "deprecated?", deprecated_eh, 0); - rb_define_method(klass, "inline?", inline_eh, 0); - rb_define_method(klass, "description", description, 0); - rb_define_method(klass, "sub_elements", sub_elements, 0); - rb_define_method(klass, "default_sub_element", default_sub_element, 0); - rb_define_method(klass, "optional_attributes", optional_attributes, 0); - rb_define_method(klass, "deprecated_attributes", deprecated_attributes, 0); - rb_define_method(klass, "required_attributes", required_attributes, 0); -} diff --git a/ext/nokogiri/html_element_description.h b/ext/nokogiri/html_element_description.h deleted file mode 100644 index ac058262cf..0000000000 --- a/ext/nokogiri/html_element_description.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_HTML_ELEMENT_DESCRIPTION -#define NOKOGIRI_HTML_ELEMENT_DESCRIPTION - -#include - -void init_html_element_description(); - -extern VALUE cNokogiriHtmlElementDescription ; - -#endif diff --git a/ext/nokogiri/html_entity_lookup.c b/ext/nokogiri/html_entity_lookup.c deleted file mode 100644 index c9f745550e..0000000000 --- a/ext/nokogiri/html_entity_lookup.c +++ /dev/null @@ -1,32 +0,0 @@ -#include - -/* - * call-seq: - * get(key) - * - * Get the HTML::EntityDescription for +key+ - */ -static VALUE get(VALUE self, VALUE key) -{ - const htmlEntityDesc * desc = - htmlEntityLookup((const xmlChar *)StringValueCStr(key)); - VALUE klass, args[3]; - - if(NULL == desc) return Qnil; - klass = rb_const_get(mNokogiriHtml, rb_intern("EntityDescription")); - - args[0] = INT2NUM((long)desc->value); - args[1] = NOKOGIRI_STR_NEW2(desc->name); - args[2] = NOKOGIRI_STR_NEW2(desc->desc); - - return rb_class_new_instance(3, args, klass); -} - -void init_html_entity_lookup() -{ - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE html = rb_define_module_under(nokogiri, "HTML"); - VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject); - - rb_define_method(klass, "get", get, 1); -} diff --git a/ext/nokogiri/html_entity_lookup.h b/ext/nokogiri/html_entity_lookup.h deleted file mode 100644 index 1c75966d90..0000000000 --- a/ext/nokogiri/html_entity_lookup.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef NOKOGIRI_HTML_ENTITY_LOOKUP -#define NOKOGIRI_HTML_ENTITY_LOOKUP - -#include - -void init_html_entity_lookup(); - -#endif diff --git a/ext/nokogiri/html_sax_parser_context.c b/ext/nokogiri/html_sax_parser_context.c deleted file mode 100644 index c75c5cb1f1..0000000000 --- a/ext/nokogiri/html_sax_parser_context.c +++ /dev/null @@ -1,116 +0,0 @@ -#include - -VALUE cNokogiriHtmlSaxParserContext ; - -static void deallocate(xmlParserCtxtPtr ctxt) -{ - NOKOGIRI_DEBUG_START(handler); - - ctxt->sax = NULL; - - htmlFreeParserCtxt(ctxt); - - NOKOGIRI_DEBUG_END(handler); -} - -static VALUE -parse_memory(VALUE klass, VALUE data, VALUE encoding) -{ - htmlParserCtxtPtr ctxt; - - if (NIL_P(data)) - rb_raise(rb_eArgError, "data cannot be nil"); - if (!(int)RSTRING_LEN(data)) - rb_raise(rb_eRuntimeError, "data cannot be empty"); - - ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data), - (int)RSTRING_LEN(data)); - if (ctxt->sax) { - xmlFree(ctxt->sax); - ctxt->sax = NULL; - } - - if (RTEST(encoding)) { - xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding)); - if (enc != NULL) { - xmlSwitchToEncoding(ctxt, enc); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { - rb_raise(rb_eRuntimeError, "Unsupported encoding %s", - StringValueCStr(encoding)); - } - } - } - - return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); -} - -static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding) -{ - htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt( - StringValueCStr(filename), - StringValueCStr(encoding) - ); - return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); -} - -static VALUE -parse_doc(VALUE ctxt_val) -{ - htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val; - htmlParseDocument(ctxt); - return Qnil; -} - -static VALUE -parse_doc_finalize(VALUE ctxt_val) -{ - htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val; - - if (ctxt->myDoc) - xmlFreeDoc(ctxt->myDoc); - - NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData); - return Qnil; -} - -static VALUE -parse_with(VALUE self, VALUE sax_handler) -{ - htmlParserCtxtPtr ctxt; - htmlSAXHandlerPtr sax; - - if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) - rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser"); - - Data_Get_Struct(self, htmlParserCtxt, ctxt); - Data_Get_Struct(sax_handler, htmlSAXHandler, sax); - - /* Free the sax handler since we'll assign our own */ - if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) - xmlFree(ctxt->sax); - - ctxt->sax = sax; - ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); - - rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); - - return self; -} - -void init_html_sax_parser_context() -{ - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE html = rb_define_module_under(nokogiri, "HTML"); - VALUE sax = rb_define_module_under(xml, "SAX"); - VALUE hsax = rb_define_module_under(html, "SAX"); - VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject); - VALUE klass = rb_define_class_under(hsax, "ParserContext", pc); - - cNokogiriHtmlSaxParserContext = klass; - - rb_define_singleton_method(klass, "memory", parse_memory, 2); - rb_define_singleton_method(klass, "file", parse_file, 2); - - rb_define_method(klass, "parse_with", parse_with, 1); -} diff --git a/ext/nokogiri/html_sax_parser_context.h b/ext/nokogiri/html_sax_parser_context.h deleted file mode 100644 index 6e08dcf303..0000000000 --- a/ext/nokogiri/html_sax_parser_context.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef NOKOGIRI_HTML_SAX_PARSER_CONTEXT -#define NOKOGIRI_HTML_SAX_PARSER_CONTEXT - -#include - -extern VALUE cNokogiriHtmlSaxParserContext; - -void init_html_sax_parser_context(); - -#endif - diff --git a/ext/nokogiri/html_sax_push_parser.c b/ext/nokogiri/html_sax_push_parser.c deleted file mode 100644 index 2df4532f10..0000000000 --- a/ext/nokogiri/html_sax_push_parser.c +++ /dev/null @@ -1,87 +0,0 @@ -#include - -/* - * call-seq: - * native_write(chunk, last_chunk) - * - * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle - */ -static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) -{ - xmlParserCtxtPtr ctx; - const char * chunk = NULL; - int size = 0; - - - Data_Get_Struct(self, xmlParserCtxt, ctx); - - if(Qnil != _chunk) { - chunk = StringValuePtr(_chunk); - size = (int)RSTRING_LEN(_chunk); - } - - if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { - if (!(ctx->options & XML_PARSE_RECOVER)) { - xmlErrorPtr e = xmlCtxtGetLastError(ctx); - Nokogiri_error_raise(NULL, e); - } - } - - return self; -} - -/* - * call-seq: - * initialize_native(xml_sax, filename) - * - * Initialize the push parser with +xml_sax+ using +filename+ - */ -static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename, - VALUE encoding) -{ - htmlSAXHandlerPtr sax; - const char * filename = NULL; - htmlParserCtxtPtr ctx; - xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; - - Data_Get_Struct(_xml_sax, xmlSAXHandler, sax); - - if(_filename != Qnil) filename = StringValueCStr(_filename); - - if (!NIL_P(encoding)) { - enc = xmlParseCharEncoding(StringValueCStr(encoding)); - if (enc == XML_CHAR_ENCODING_ERROR) - rb_raise(rb_eArgError, "Unsupported Encoding"); - } - - ctx = htmlCreatePushParserCtxt( - sax, - NULL, - NULL, - 0, - filename, - enc - ); - if(ctx == NULL) - rb_raise(rb_eRuntimeError, "Could not create a parser context"); - - ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self); - - ctx->sax2 = 1; - DATA_PTR(self) = ctx; - return self; -} - -VALUE cNokogiriHtmlSaxPushParser; -void init_html_sax_push_parser() -{ - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE html = rb_define_module_under(nokogiri, "HTML"); - VALUE sax = rb_define_module_under(html, "SAX"); - VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser); - - cNokogiriHtmlSaxPushParser = klass; - - rb_define_private_method(klass, "initialize_native", initialize_native, 3); - rb_define_private_method(klass, "native_write", native_write, 2); -} diff --git a/ext/nokogiri/html_sax_push_parser.h b/ext/nokogiri/html_sax_push_parser.h deleted file mode 100644 index 1c5f239904..0000000000 --- a/ext/nokogiri/html_sax_push_parser.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_HTML_SAX_PUSH_PARSER -#define NOKOGIRI_HTML_SAX_PUSH_PARSER - -#include - -void init_html_sax_push_parser(); - -extern VALUE cNokogiriHtmlSaxPushParser ; -#endif diff --git a/ext/nokogiri/libxml2_backwards_compat.c b/ext/nokogiri/libxml2_backwards_compat.c new file mode 100644 index 0000000000..f5255cb989 --- /dev/null +++ b/ext/nokogiri/libxml2_backwards_compat.c @@ -0,0 +1,121 @@ +#ifndef HAVE_XMLFIRSTELEMENTCHILD +#include +/** + * xmlFirstElementChild: + * @parent: the parent node + * + * Finds the first child node of that element which is a Element node + * Note the handling of entities references is different than in + * the W3C DOM element traversal spec since we don't have back reference + * from entities content to entities references. + * + * Returns the first element child or NULL if not available + */ +xmlNodePtr +xmlFirstElementChild(xmlNodePtr parent) +{ + xmlNodePtr cur = NULL; + + if (parent == NULL) { + return (NULL); + } + switch (parent->type) { + case XML_ELEMENT_NODE: + case XML_ENTITY_NODE: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + cur = parent->children; + break; + default: + return (NULL); + } + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + return (cur); + } + cur = cur->next; + } + return (NULL); +} + +/** + * xmlNextElementSibling: + * @node: the current node + * + * Finds the first closest next sibling of the node which is an + * element node. + * Note the handling of entities references is different than in + * the W3C DOM element traversal spec since we don't have back reference + * from entities content to entities references. + * + * Returns the next element sibling or NULL if not available + */ +xmlNodePtr +xmlNextElementSibling(xmlNodePtr node) +{ + if (node == NULL) { + return (NULL); + } + switch (node->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DTD_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + node = node->next; + break; + default: + return (NULL); + } + while (node != NULL) { + if (node->type == XML_ELEMENT_NODE) { + return (node); + } + node = node->next; + } + return (NULL); +} + +/** + * xmlLastElementChild: + * @parent: the parent node + * + * Finds the last child node of that element which is a Element node + * Note the handling of entities references is different than in + * the W3C DOM element traversal spec since we don't have back reference + * from entities content to entities references. + * + * Returns the last element child or NULL if not available + */ +xmlNodePtr +xmlLastElementChild(xmlNodePtr parent) +{ + xmlNodePtr cur = NULL; + + if (parent == NULL) { + return (NULL); + } + switch (parent->type) { + case XML_ELEMENT_NODE: + case XML_ENTITY_NODE: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + cur = parent->last; + break; + default: + return (NULL); + } + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + return (cur); + } + cur = cur->prev; + } + return (NULL); +} +#endif diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index d4dedee5b8..3b6ef7625b 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -1,101 +1,175 @@ #include VALUE mNokogiri ; +VALUE mNokogiriGumbo ; +VALUE mNokogiriHtml4 ; +VALUE mNokogiriHtml4Sax ; +VALUE mNokogiriHtml5 ; VALUE mNokogiriXml ; -VALUE mNokogiriHtml ; -VALUE mNokogiriXslt ; VALUE mNokogiriXmlSax ; -VALUE mNokogiriHtmlSax ; - -#ifdef USE_INCLUDED_VASPRINTF -/* - * I srsly hate windows. it doesn't have vasprintf. - * Thank you Geoffroy Couprie for this implementation of vasprintf! - */ -int vasprintf (char **strp, const char *fmt, va_list ap) +VALUE mNokogiriXmlXpath ; +VALUE mNokogiriXslt ; + +VALUE cNokogiriSyntaxError; +VALUE cNokogiriXmlCharacterData; +VALUE cNokogiriXmlElement; +VALUE cNokogiriXmlXpathSyntaxError; + +void noko_init_xml_attr(void); +void noko_init_xml_attribute_decl(void); +void noko_init_xml_cdata(void); +void noko_init_xml_comment(void); +void noko_init_xml_document(void); +void noko_init_xml_document_fragment(void); +void noko_init_xml_dtd(void); +void noko_init_xml_element_content(void); +void noko_init_xml_element_decl(void); +void noko_init_xml_encoding_handler(void); +void noko_init_xml_entity_decl(void); +void noko_init_xml_entity_reference(void); +void noko_init_xml_namespace(void); +void noko_init_xml_node(void); +void noko_init_xml_node_set(void); +void noko_init_xml_processing_instruction(void); +void noko_init_xml_reader(void); +void noko_init_xml_relax_ng(void); +void noko_init_xml_sax_parser(void); +void noko_init_xml_sax_parser_context(void); +void noko_init_xml_sax_push_parser(void); +void noko_init_xml_schema(void); +void noko_init_xml_syntax_error(void); +void noko_init_xml_text(void); +void noko_init_xml_xpath_context(void); +void noko_init_xslt_stylesheet(void); +void noko_init_html_document(void); +void noko_init_html_element_description(void); +void noko_init_html_entity_lookup(void); +void noko_init_html_sax_parser_context(void); +void noko_init_html_sax_push_parser(void); +void noko_init_gumbo(void); +void noko_init_test_global_handlers(void); + +static ID id_read, id_write, id_external_encoding; + + +static VALUE +noko_io_read_check(VALUE val) { - /* Mingw32/64 have a broken vsnprintf implementation that fails when - * using a zero-byte limit in order to retrieve the required size for malloc. - * So we use a one byte buffer instead. - */ - char tmp[1]; - int len = vsnprintf (tmp, 1, fmt, ap) + 1; - char *res = (char *)malloc((unsigned int)len); - if (res == NULL) - return -1; - *strp = res; - return vsnprintf(res, (unsigned int)len, fmt, ap); + VALUE *args = (VALUE *)val; + return rb_funcall(args[0], id_read, 1, args[1]); } -#endif -void vasprintf_free (void *p) + +static VALUE +noko_io_read_failed(VALUE arg, VALUE exc) { - free(p); + return Qundef; } -#ifdef HAVE_RUBY_UTIL_H -#include "ruby/util.h" -#else -#include "util.h" -#endif -void nokogiri_root_node(xmlNodePtr node) +int +noko_io_read(void *io, char *c_buffer, int c_buffer_len) +{ + VALUE rb_io = (VALUE)io; + VALUE rb_read_string, rb_args[2]; + size_t n_bytes_read, safe_len; + + rb_args[0] = rb_io; + rb_args[1] = INT2NUM(c_buffer_len); + + rb_read_string = rb_rescue(noko_io_read_check, (VALUE)rb_args, noko_io_read_failed, 0); + + if (NIL_P(rb_read_string)) { return 0; } + if (rb_read_string == Qundef) { return -1; } + if (TYPE(rb_read_string) != T_STRING) { return -1; } + + n_bytes_read = (size_t)RSTRING_LEN(rb_read_string); + safe_len = (n_bytes_read > (size_t)c_buffer_len) ? (size_t)c_buffer_len : n_bytes_read; + memcpy(c_buffer, StringValuePtr(rb_read_string), safe_len); + + return (int)safe_len; +} + + +static VALUE +noko_io_write_check(VALUE rb_args) { - xmlDocPtr doc; - nokogiriTuplePtr tuple; + VALUE rb_io = ((VALUE *)rb_args)[0]; + VALUE rb_output = ((VALUE *)rb_args)[1]; + return rb_funcall(rb_io, id_write, 1, rb_output); +} + - doc = node->doc; - if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc; - tuple = (nokogiriTuplePtr)doc->_private; - st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node); +static VALUE +noko_io_write_failed(VALUE arg, VALUE exc) +{ + return Qundef; } -void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc) + +int +noko_io_write(void *io, char *c_buffer, int c_buffer_len) { - nokogiriTuplePtr tuple; + VALUE rb_args[2], rb_n_bytes_written; + VALUE rb_io = (VALUE)io; + VALUE rb_enc = Qnil; + rb_encoding *io_encoding; + + if (rb_respond_to(rb_io, id_external_encoding)) { + rb_enc = rb_funcall(rb_io, id_external_encoding, 0); + } + io_encoding = RB_NIL_P(rb_enc) ? rb_ascii8bit_encoding() : rb_to_encoding(rb_enc); - if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc; - tuple = (nokogiriTuplePtr)doc->_private; - st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns); + rb_args[0] = rb_io; + rb_args[1] = rb_enc_str_new(c_buffer, (long)c_buffer_len, io_encoding); + + rb_n_bytes_written = rb_rescue(noko_io_write_check, (VALUE)rb_args, noko_io_write_failed, 0); + if (rb_n_bytes_written == Qundef) { return -1; } + + return NUM2INT(rb_n_bytes_written); } -void Init_nokogiri() + +int +noko_io_close(void *io) { - xmlMemSetup( - (xmlFreeFunc)ruby_xfree, - (xmlMallocFunc)ruby_xmalloc, - (xmlReallocFunc)ruby_xrealloc, - ruby_strdup - ); + return 0; +} + +void +Init_nokogiri(void) +{ mNokogiri = rb_define_module("Nokogiri"); + mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo"); + mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4"); + mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX"); + mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5"); mNokogiriXml = rb_define_module_under(mNokogiri, "XML"); - mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML"); - mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT"); mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX"); - mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX"); - - rb_const_set( mNokogiri, - rb_intern("LIBXML_VERSION"), - NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION) - ); - rb_const_set( mNokogiri, - rb_intern("LIBXML_PARSER_VERSION"), - NOKOGIRI_STR_NEW2(xmlParserVersion) - ); - -#ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue); - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH)); - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH)); - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " ")); - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " ")); + mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath"); + mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT"); + + rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)); + rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion)); + + rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION)); + rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion)); + +#ifdef NOKOGIRI_PACKAGED_LIBRARIES + rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue); +# ifdef NOKOGIRI_PRECOMPILED_LIBRARIES + rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qtrue); +# else + rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse); +# endif + rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " ")); + rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " ")); #else - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse); - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil); - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil); - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), Qnil); - rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), Qnil); + rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qfalse); + rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse); + rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), Qnil); + rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), Qnil); #endif #ifdef LIBXML_ICONV_ENABLED @@ -104,38 +178,88 @@ void Init_nokogiri() rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse); #endif +#ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS + rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS)); +#endif + +#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES) + /* + * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this + * issue in libxml 2.9.12: + * + * https://github.com/sparklemotion/nokogiri/issues/2241 + * + * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip + * this config only for the specific libxml2 versions 2.9.12. + * + * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its + * default memory management functions (recall that this config was introduced to reduce memory + * bloat and allow Ruby to GC more often); but we should *really* test with production workloads + * before making that kind of a potentially-invasive change. + */ + rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default")); +#else + rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby")); + xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup); +#endif + xmlInitParser(); + exsltRegisterAll(); + + if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) { + rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue); + } else { + rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse); + } + + cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError); + noko_init_xml_syntax_error(); + assert(cNokogiriXmlSyntaxError); + cNokogiriXmlXpathSyntaxError = rb_define_class_under(mNokogiriXmlXpath, "SyntaxError", cNokogiriXmlSyntaxError); + + noko_init_xml_element_content(); + noko_init_xml_encoding_handler(); + noko_init_xml_namespace(); + noko_init_xml_node_set(); + noko_init_xml_reader(); + noko_init_xml_sax_parser(); + noko_init_xml_xpath_context(); + noko_init_xslt_stylesheet(); + noko_init_html_element_description(); + noko_init_html_entity_lookup(); + + noko_init_xml_schema(); + noko_init_xml_relax_ng(); + + noko_init_xml_sax_parser_context(); + noko_init_html_sax_parser_context(); + + noko_init_xml_sax_push_parser(); + noko_init_html_sax_push_parser(); + + noko_init_xml_node(); + noko_init_xml_attr(); + noko_init_xml_attribute_decl(); + noko_init_xml_dtd(); + noko_init_xml_element_decl(); + noko_init_xml_entity_decl(); + noko_init_xml_entity_reference(); + noko_init_xml_processing_instruction(); + assert(cNokogiriXmlNode); + cNokogiriXmlElement = rb_define_class_under(mNokogiriXml, "Element", cNokogiriXmlNode); + cNokogiriXmlCharacterData = rb_define_class_under(mNokogiriXml, "CharacterData", cNokogiriXmlNode); + noko_init_xml_comment(); + noko_init_xml_text(); + noko_init_xml_cdata(); + + noko_init_xml_document_fragment(); + noko_init_xml_document(); + noko_init_html_document(); + noko_init_gumbo(); + + noko_init_test_global_handlers(); - init_xml_document(); - init_html_document(); - init_xml_node(); - init_xml_document_fragment(); - init_xml_text(); - init_xml_cdata(); - init_xml_processing_instruction(); - init_xml_attr(); - init_xml_entity_reference(); - init_xml_comment(); - init_xml_node_set(); - init_xml_xpath_context(); - init_xml_sax_parser_context(); - init_xml_sax_parser(); - init_xml_sax_push_parser(); - init_xml_reader(); - init_xml_dtd(); - init_xml_element_content(); - init_xml_attribute_decl(); - init_xml_element_decl(); - init_xml_entity_decl(); - init_xml_namespace(); - init_html_sax_parser_context(); - init_html_sax_push_parser(); - init_xslt_stylesheet(); - init_xml_syntax_error(); - init_html_entity_lookup(); - init_html_element_description(); - init_xml_schema(); - init_xml_relax_ng(); - init_nokogiri_io(); - init_xml_encoding_handler(); + id_read = rb_intern("read"); + id_write = rb_intern("write"); + id_external_encoding = rb_intern("external_encoding"); } diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index a018519ea2..4c6bea855e 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -1,26 +1,41 @@ #ifndef NOKOGIRI_NATIVE #define NOKOGIRI_NATIVE +#include // https://github.com/sparklemotion/nokogiri/issues/2696 + +#ifdef _MSC_VER +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif /* WIN32_LEAN_AND_MEAN */ + +# ifndef WIN32 +# define WIN32 +# endif /* WIN32 */ + +# include +# include +# include +#endif + +#ifdef _WIN32 +# define NOKOPUBFUN __declspec(dllexport) +# define NOKOPUBVAR __declspec(dllexport) extern +#else +# define NOKOPUBFUN +# define NOKOPUBVAR extern +#endif + #include #include #include #include +#include -#ifdef USE_INCLUDED_VASPRINTF -int vasprintf (char **strp, const char *fmt, va_list ap); -#else - -#define _GNU_SOURCE -# include -#undef _GNU_SOURCE - -#endif #include +#include #include -#include #include -#include #include #include #include @@ -28,94 +43,193 @@ int vasprintf (char **strp, const char *fmt, va_list ap); #include #include #include -#include #include -#include -#include -#include +#include +#include -#ifndef NORETURN -# if defined(__GNUC__) -# define NORETURN(name) __attribute__((noreturn)) name -# else -# define NORETURN(name) name -# endif +#include +#include +#include +#include +#include + +#include + +/* libxml2_backwards_compat.c */ +#ifndef HAVE_XMLFIRSTELEMENTCHILD +xmlNodePtr xmlFirstElementChild(xmlNodePtr parent); +xmlNodePtr xmlNextElementSibling(xmlNodePtr node); +xmlNodePtr xmlLastElementChild(xmlNodePtr parent); #endif -#define NOKOGIRI_STR_NEW2(str) \ - NOKOGIRI_STR_NEW(str, strlen((const char *)(str))) - -#define NOKOGIRI_STR_NEW(str, len) \ - rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding()) - -#define RBSTR_OR_QNIL(_str) \ - (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil) - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern VALUE mNokogiri ; -extern VALUE mNokogiriXml ; -extern VALUE mNokogiriXmlSax ; -extern VALUE mNokogiriHtml ; -extern VALUE mNokogiriHtmlSax ; -extern VALUE mNokogiriXslt ; - -void nokogiri_root_node(xmlNodePtr); -void nokogiri_root_nsdef(xmlNsPtr, xmlDocPtr); - -#ifdef DEBUG - -#define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_NO_FREE")) return ; if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p); -#define NOKOGIRI_DEBUG_END(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p end\n", __FILE__, __LINE__, p); - -#else +#define XMLNS_PREFIX "xmlns" +#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */ -#define NOKOGIRI_DEBUG_START(p) -#define NOKOGIRI_DEBUG_END(p) +#include +#include +#include +#include +#include + +#define NOKOGIRI_STR_NEW2(str) NOKOGIRI_STR_NEW(str, strlen((const char *)(str))) +#define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding()) +#define RBSTR_OR_QNIL(_str) (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil) + +#ifndef NORETURN_DECL +# if defined(__GNUC__) +# define NORETURN_DECL __attribute__ ((noreturn)) +# else +# define NORETURN_DECL +# endif #endif -#ifndef __builtin_expect -# if defined(__GNUC__) -# define __builtin_expect(expr, c) __builtin_expect((long)(expr), (long)(c)) -# endif +#ifndef PRINTFLIKE_DECL +# if defined(__GNUC__) +# define PRINTFLIKE_DECL(stringidx, argidx) __attribute__ ((format(printf,stringidx,argidx))) +# else +# define PRINTFLIKE_DECL(stringidx, argidx) +# endif #endif -#define XMLNS_PREFIX "xmlns" -#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */ -#define XMLNS_BUFFER_LEN 128 +#if defined(TRUFFLERUBY) && !defined(NOKOGIRI_PACKAGED_LIBRARIES) +# define TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES +#endif +NOKOPUBVAR VALUE mNokogiri ; +NOKOPUBVAR VALUE mNokogiriGumbo ; +NOKOPUBVAR VALUE mNokogiriHtml4 ; +NOKOPUBVAR VALUE mNokogiriHtml4Sax ; +NOKOPUBVAR VALUE mNokogiriHtml5 ; +NOKOPUBVAR VALUE mNokogiriXml ; +NOKOPUBVAR VALUE mNokogiriXmlSax ; +NOKOPUBVAR VALUE mNokogiriXmlXpath ; +NOKOPUBVAR VALUE mNokogiriXslt ; + +NOKOPUBVAR VALUE cNokogiriEncodingHandler; +NOKOPUBVAR VALUE cNokogiriSyntaxError; +NOKOPUBVAR VALUE cNokogiriXmlAttr; +NOKOPUBVAR VALUE cNokogiriXmlAttributeDecl; +NOKOPUBVAR VALUE cNokogiriXmlCData; +NOKOPUBVAR VALUE cNokogiriXmlCharacterData; +NOKOPUBVAR VALUE cNokogiriXmlComment; +NOKOPUBVAR VALUE cNokogiriXmlDocument ; +NOKOPUBVAR VALUE cNokogiriXmlDocumentFragment; +NOKOPUBVAR VALUE cNokogiriXmlDtd; +NOKOPUBVAR VALUE cNokogiriXmlElement ; +NOKOPUBVAR VALUE cNokogiriXmlElementContent; +NOKOPUBVAR VALUE cNokogiriXmlElementDecl; +NOKOPUBVAR VALUE cNokogiriXmlEntityDecl; +NOKOPUBVAR VALUE cNokogiriXmlEntityReference; +NOKOPUBVAR VALUE cNokogiriXmlNamespace ; +NOKOPUBVAR VALUE cNokogiriXmlNode ; +NOKOPUBVAR VALUE cNokogiriXmlNodeSet ; +NOKOPUBVAR VALUE cNokogiriXmlProcessingInstruction; +NOKOPUBVAR VALUE cNokogiriXmlReader; +NOKOPUBVAR VALUE cNokogiriXmlRelaxNG; +NOKOPUBVAR VALUE cNokogiriXmlSaxParser ; +NOKOPUBVAR VALUE cNokogiriXmlSaxParserContext; +NOKOPUBVAR VALUE cNokogiriXmlSaxPushParser ; +NOKOPUBVAR VALUE cNokogiriXmlSchema; +NOKOPUBVAR VALUE cNokogiriXmlSyntaxError; +NOKOPUBVAR VALUE cNokogiriXmlText ; +NOKOPUBVAR VALUE cNokogiriXmlXpathContext; +NOKOPUBVAR VALUE cNokogiriXmlXpathSyntaxError; +NOKOPUBVAR VALUE cNokogiriXsltStylesheet ; + +NOKOPUBVAR VALUE cNokogiriHtml4Document ; +NOKOPUBVAR VALUE cNokogiriHtml4SaxPushParser ; +NOKOPUBVAR VALUE cNokogiriHtml4ElementDescription ; +NOKOPUBVAR VALUE cNokogiriHtml4SaxParserContext; +NOKOPUBVAR VALUE cNokogiriHtml5Document ; + +typedef struct _nokogiriTuple { + VALUE doc; + st_table *unlinkedNodes; + VALUE node_cache; +} nokogiriTuple; +typedef nokogiriTuple *nokogiriTuplePtr; + +typedef struct _nokogiriSAXTuple { + xmlParserCtxtPtr ctxt; + VALUE self; +} nokogiriSAXTuple; +typedef nokogiriSAXTuple *nokogiriSAXTuplePtr; + +typedef struct _libxmlStructuredErrorHandlerState { + void *user_data; + xmlStructuredErrorFunc handler; +} libxmlStructuredErrorHandlerState ; + +typedef struct _nokogiriXsltStylesheetTuple { + xsltStylesheetPtr ss; + VALUE func_instances; +} nokogiriXsltStylesheetTuple; + +void noko_xml_document_pin_node(xmlNodePtr); +void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr); + +int noko_io_read(void *ctx, char *buffer, int len); +int noko_io_write(void *ctx, char *buffer, int len); +int noko_io_close(void *ctx); + +#define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj)) +#define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj)) + +VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ; +VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ; +VALUE noko_xml_node_attrs(xmlNodePtr node) ; + +VALUE noko_xml_namespace_wrap(xmlNsPtr node, xmlDocPtr doc); +VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node); + +VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element); + +VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ; + +VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv); +VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc); +NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, + xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */ + +#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private)) +#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc) +#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes) +#define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache) +#define NOKOGIRI_NAMESPACE_EH(node) ((node)->type == XML_NAMESPACE_DECL) + +#define NOKOGIRI_SAX_SELF(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->self +#define NOKOGIRI_SAX_CTXT(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->ctxt +#define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self) +#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) ruby_xfree(_tuple) + +#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v)) +#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v) + +#if HAVE_RB_CATEGORY_WARNING +# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define NOKO_WARN_DEPRECATION(message) rb_warning(message) #endif + +void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); +void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data, + xmlStructuredErrorFunc handler); +void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state); +VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error); +void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error); +NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error); +void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, + const char *function_name) ; + +static inline +nokogiriSAXTuplePtr +nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self) +{ + nokogiriSAXTuplePtr tuple = ruby_xmalloc(sizeof(nokogiriSAXTuple)); + tuple->self = self; + tuple->ctxt = ctxt; + return tuple; +} + +#endif /* NOKOGIRI_NATIVE */ diff --git a/ext/nokogiri/test_global_handlers.c b/ext/nokogiri/test_global_handlers.c new file mode 100644 index 0000000000..79bb644661 --- /dev/null +++ b/ext/nokogiri/test_global_handlers.c @@ -0,0 +1,40 @@ +#include + +static VALUE foreign_error_handler_block = Qnil; + +static void +foreign_error_handler(void *user_data, xmlErrorPtr c_error) +{ + rb_funcall(foreign_error_handler_block, rb_intern("call"), 0); +} + +/* + * call-seq: + * __foreign_error_handler { ... } -> nil + * + * Override libxml2's global error handlers to call the block. This method thus has very little + * value except to test that Nokogiri is properly setting error handlers elsewhere in the code. See + * test/helper.rb for how this is being used. + */ +static VALUE +rb_foreign_error_handler(VALUE klass) +{ + rb_need_block(); + foreign_error_handler_block = rb_block_proc(); + xmlSetStructuredErrorFunc(NULL, foreign_error_handler); + return Qnil; +} + +/* + * Document-module: Nokogiri::Test + * + * The Nokogiri::Test module should only be used for testing Nokogiri. + * Do NOT use this outside of the Nokogiri test suite. + */ +void +noko_init_test_global_handlers(void) +{ + VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test"); + + rb_define_singleton_method(mNokogiriTest, "__foreign_error_handler", rb_foreign_error_handler, 0); +} diff --git a/ext/nokogiri/xml_attr.c b/ext/nokogiri/xml_attr.c index 9858cd3976..ec91b79294 100644 --- a/ext/nokogiri/xml_attr.c +++ b/ext/nokogiri/xml_attr.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlAttr; /* * call-seq: @@ -7,13 +9,14 @@ * Set the value for this Attr to +content+. Use `nil` to remove the value * (e.g., a HTML boolean attribute). */ -static VALUE set_value(VALUE self, VALUE content) +static VALUE +set_value(VALUE self, VALUE content) { xmlAttrPtr attr; xmlChar *value; xmlNode *cur; - Data_Get_Struct(self, xmlAttr, attr); + Noko_Node_Get_Struct(self, xmlAttr, attr); if (attr->children) { xmlFreeNodeList(attr->children); @@ -49,7 +52,8 @@ static VALUE set_value(VALUE self, VALUE content) * * Create a new Attr element on the +document+ with +name+ */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +new (int argc, VALUE *argv, VALUE klass) { xmlDocPtr xml_doc; VALUE document; @@ -64,7 +68,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document"); } - Data_Get_Struct(document, xmlDoc, xml_doc); + Noko_Node_Get_Struct(document, xmlDoc, xml_doc); node = xmlNewDocProp( xml_doc, @@ -72,9 +76,9 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) NULL ); - nokogiri_root_node((xmlNodePtr)node); + noko_xml_document_pin_node((xmlNodePtr)node); - rb_node = Nokogiri_wrap_xml_node(klass, (xmlNodePtr)node); + rb_node = noko_xml_node_wrap(klass, (xmlNodePtr)node); rb_obj_call_init(rb_node, argc, argv); if (rb_block_given_p()) { @@ -84,20 +88,16 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) return rb_node; } -VALUE cNokogiriXmlAttr; -void init_xml_attr() +void +noko_init_xml_attr(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - + assert(cNokogiriXmlNode); /* * Attr represents a Attr node in an xml document. */ - VALUE klass = rb_define_class_under(xml, "Attr", node); + cNokogiriXmlAttr = rb_define_class_under(mNokogiriXml, "Attr", cNokogiriXmlNode); - cNokogiriXmlAttr = klass; + rb_define_singleton_method(cNokogiriXmlAttr, "new", new, -1); - rb_define_singleton_method(klass, "new", new, -1); - rb_define_method(klass, "value=", set_value, 1); + rb_define_method(cNokogiriXmlAttr, "value=", set_value, 1); } diff --git a/ext/nokogiri/xml_attr.h b/ext/nokogiri/xml_attr.h deleted file mode 100644 index ca25496512..0000000000 --- a/ext/nokogiri/xml_attr.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_ATTR -#define NOKOGIRI_XML_ATTR - -#include - -void init_xml_attr(); - -extern VALUE cNokogiriXmlAttr; -#endif diff --git a/ext/nokogiri/xml_attribute_decl.c b/ext/nokogiri/xml_attribute_decl.c index 6e03aad19a..3f9bebc88c 100644 --- a/ext/nokogiri/xml_attribute_decl.c +++ b/ext/nokogiri/xml_attribute_decl.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlAttributeDecl; /* * call-seq: @@ -6,11 +8,12 @@ * * The attribute_type for this AttributeDecl */ -static VALUE attribute_type(VALUE self) +static VALUE +attribute_type(VALUE self) { xmlAttributePtr node; - Data_Get_Struct(self, xmlAttribute, node); - return INT2NUM((long)node->atype); + Noko_Node_Get_Struct(self, xmlAttribute, node); + return INT2NUM(node->atype); } /* @@ -19,12 +22,13 @@ static VALUE attribute_type(VALUE self) * * The default value */ -static VALUE default_value(VALUE self) +static VALUE +default_value(VALUE self) { xmlAttributePtr node; - Data_Get_Struct(self, xmlAttribute, node); + Noko_Node_Get_Struct(self, xmlAttribute, node); - if(node->defaultValue) return NOKOGIRI_STR_NEW2(node->defaultValue); + if (node->defaultValue) { return NOKOGIRI_STR_NEW2(node->defaultValue); } return Qnil; } @@ -34,18 +38,19 @@ static VALUE default_value(VALUE self) * * An enumeration of possible values */ -static VALUE enumeration(VALUE self) +static VALUE +enumeration(VALUE self) { xmlAttributePtr node; xmlEnumerationPtr enm; VALUE list; - Data_Get_Struct(self, xmlAttribute, node); + Noko_Node_Get_Struct(self, xmlAttribute, node); list = rb_ary_new(); enm = node->tree; - while(enm) { + while (enm) { rb_ary_push(list, NOKOGIRI_STR_NEW2(enm->name)); enm = enm->next; } @@ -53,18 +58,13 @@ static VALUE enumeration(VALUE self) return list; } -VALUE cNokogiriXmlAttributeDecl; - -void init_xml_attribute_decl() +void +noko_init_xml_attribute_decl(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - VALUE klass = rb_define_class_under(xml, "AttributeDecl", node); - - cNokogiriXmlAttributeDecl = klass; + assert(cNokogiriXmlNode); + cNokogiriXmlAttributeDecl = rb_define_class_under(mNokogiriXml, "AttributeDecl", cNokogiriXmlNode); - rb_define_method(klass, "attribute_type", attribute_type, 0); - rb_define_method(klass, "default", default_value, 0); - rb_define_method(klass, "enumeration", enumeration, 0); + rb_define_method(cNokogiriXmlAttributeDecl, "attribute_type", attribute_type, 0); + rb_define_method(cNokogiriXmlAttributeDecl, "default", default_value, 0); + rb_define_method(cNokogiriXmlAttributeDecl, "enumeration", enumeration, 0); } diff --git a/ext/nokogiri/xml_attribute_decl.h b/ext/nokogiri/xml_attribute_decl.h deleted file mode 100644 index 859f1b5f78..0000000000 --- a/ext/nokogiri/xml_attribute_decl.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_ATTRIBUTE_DECL -#define NOKOGIRI_XML_ATTRIBUTE_DECL - -#include - -void init_xml_attribute_decl(); - -extern VALUE cNokogiriXmlAttributeDecl; -#endif diff --git a/ext/nokogiri/xml_cdata.c b/ext/nokogiri/xml_cdata.c index dd1b196507..6ea98293b7 100644 --- a/ext/nokogiri/xml_cdata.c +++ b/ext/nokogiri/xml_cdata.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlCData; /* * call-seq: @@ -9,7 +11,8 @@ * If +content+ cannot be implicitly converted to a string, this method will * raise a TypeError exception. */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +new (int argc, VALUE *argv, VALUE klass) { xmlDocPtr xml_doc; xmlNodePtr node; @@ -22,41 +25,33 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) rb_scan_args(argc, argv, "2*", &doc, &content, &rest); - Data_Get_Struct(doc, xmlDoc, xml_doc); + Noko_Node_Get_Struct(doc, xmlDoc, xml_doc); if (!NIL_P(content)) { content_str = (xmlChar *)StringValuePtr(content); - content_str_len = RSTRING_LEN(content); + content_str_len = RSTRING_LENINT(content); } node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len); - nokogiri_root_node(node); + noko_xml_document_pin_node(node); - rb_node = Nokogiri_wrap_xml_node(klass, node); + rb_node = noko_xml_node_wrap(klass, node); rb_obj_call_init(rb_node, argc, argv); - if(rb_block_given_p()) { rb_yield(rb_node); } + if (rb_block_given_p()) { rb_yield(rb_node); } return rb_node; } -VALUE cNokogiriXmlCData; -void init_xml_cdata() +void +noko_init_xml_cdata(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - VALUE char_data = rb_define_class_under(xml, "CharacterData", node); - VALUE text = rb_define_class_under(xml, "Text", char_data); - + assert(cNokogiriXmlText); /* * CData represents a CData node in an xml document. */ - VALUE klass = rb_define_class_under(xml, "CDATA", text); - - - cNokogiriXmlCData = klass; + cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText); - rb_define_singleton_method(klass, "new", new, -1); + rb_define_singleton_method(cNokogiriXmlCData, "new", new, -1); } diff --git a/ext/nokogiri/xml_cdata.h b/ext/nokogiri/xml_cdata.h deleted file mode 100644 index 16748b872f..0000000000 --- a/ext/nokogiri/xml_cdata.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_CDATA -#define NOKOGIRI_XML_CDATA - -#include - -void init_xml_cdata(); - -extern VALUE cNokogiriXmlCData; -#endif diff --git a/ext/nokogiri/xml_comment.c b/ext/nokogiri/xml_comment.c index a3eb056b06..81b57ca472 100644 --- a/ext/nokogiri/xml_comment.c +++ b/ext/nokogiri/xml_comment.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlComment; static ID document_id ; @@ -9,7 +11,8 @@ static ID document_id ; * Create a new Comment element on the +document+ with +content+. * Alternatively, if a +node+ is passed, the +node+'s document is used. */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +new (int argc, VALUE *argv, VALUE klass) { xmlDocPtr xml_doc; xmlNodePtr node; @@ -20,50 +23,40 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) rb_scan_args(argc, argv, "2*", &document, &content, &rest); - if (rb_obj_is_kind_of(document, cNokogiriXmlNode)) - { + if (rb_obj_is_kind_of(document, cNokogiriXmlNode)) { document = rb_funcall(document, document_id, 0); - } - else if ( !rb_obj_is_kind_of(document, cNokogiriXmlDocument) - && !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment)) - { + } else if (!rb_obj_is_kind_of(document, cNokogiriXmlDocument) + && !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment)) { rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node"); } Data_Get_Struct(document, xmlDoc, xml_doc); node = xmlNewDocComment( - xml_doc, - (const xmlChar *)StringValueCStr(content) - ); + xml_doc, + (const xmlChar *)StringValueCStr(content) + ); - rb_node = Nokogiri_wrap_xml_node(klass, node); + rb_node = noko_xml_node_wrap(klass, node); rb_obj_call_init(rb_node, argc, argv); - nokogiri_root_node(node); + noko_xml_document_pin_node(node); - if(rb_block_given_p()) rb_yield(rb_node); + if (rb_block_given_p()) { rb_yield(rb_node); } return rb_node; } -VALUE cNokogiriXmlComment; -void init_xml_comment() +void +noko_init_xml_comment(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - VALUE char_data = rb_define_class_under(xml, "CharacterData", node); - + assert(cNokogiriXmlCharacterData); /* * Comment represents a comment node in an xml document. */ - VALUE klass = rb_define_class_under(xml, "Comment", char_data); - - - cNokogiriXmlComment = klass; + cNokogiriXmlComment = rb_define_class_under(mNokogiriXml, "Comment", cNokogiriXmlCharacterData); - rb_define_singleton_method(klass, "new", new, -1); + rb_define_singleton_method(cNokogiriXmlComment, "new", new, -1); document_id = rb_intern("document"); } diff --git a/ext/nokogiri/xml_comment.h b/ext/nokogiri/xml_comment.h deleted file mode 100644 index 4886405fd2..0000000000 --- a/ext/nokogiri/xml_comment.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_COMMENT -#define NOKOGIRI_XML_COMMENT - -#include - -void init_xml_comment(); - -extern VALUE cNokogiriXmlComment; -#endif diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index 1d2119c80f..a55daa1b55 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -1,91 +1,117 @@ -#include +#include -static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc) +VALUE cNokogiriXmlDocument ; + +static int +dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc) { - switch(node->type) { - case XML_ATTRIBUTE_NODE: - xmlFreePropList((xmlAttrPtr)node); - break; - case XML_NAMESPACE_DECL: - xmlFreeNs((xmlNsPtr)node); - break; - case XML_DTD_NODE: - xmlFreeDtd((xmlDtdPtr)node); - break; - default: - if(node->parent == NULL) { - xmlAddChild((xmlNodePtr)doc, node); - } + switch (node->type) { + case XML_ATTRIBUTE_NODE: + xmlFreePropList((xmlAttrPtr)node); + break; + case XML_NAMESPACE_DECL: + xmlFreeNs((xmlNsPtr)node); + break; + case XML_DTD_NODE: + xmlFreeDtd((xmlDtdPtr)node); + break; + default: + if (node->parent == NULL) { + xmlAddChild((xmlNodePtr)doc, node); + } } return ST_CONTINUE; } -static void remove_private(xmlNodePtr node) +static int +dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc) +{ + return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc); +} + +static void +remove_private(xmlNodePtr node) { xmlNodePtr child; - for (child = node->children; child; child = child->next) + for (child = node->children; child; child = child->next) { remove_private(child); + } if ((node->type == XML_ELEMENT_NODE || node->type == XML_XINCLUDE_START || node->type == XML_XINCLUDE_END) && node->properties) { - for (child = (xmlNodePtr)node->properties; child; child = child->next) + for (child = (xmlNodePtr)node->properties; child; child = child->next) { remove_private(child); + } } node->_private = NULL; } -static void dealloc(xmlDocPtr doc) +static void +mark(xmlDocPtr doc) { - st_table *node_hash; + nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private; + if (tuple) { + rb_gc_mark(tuple->doc); + rb_gc_mark(tuple->node_cache); + } +} - NOKOGIRI_DEBUG_START(doc); +static void +dealloc(xmlDocPtr doc) +{ + st_table *node_hash; node_hash = DOC_UNLINKED_NODE_HASH(doc); st_foreach(node_hash, dealloc_node_i, (st_data_t)doc); st_free_table(node_hash); - free(doc->_private); + ruby_xfree(doc->_private); /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes * have their _private pointers cleared. This is to avoid libxml-ruby's * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC * free context, which can result in segfaults. */ - if (xmlDeregisterNodeDefaultValue) + if (xmlDeregisterNodeDefaultValue) { remove_private((xmlNodePtr)doc); + } xmlFreeDoc(doc); - - NOKOGIRI_DEBUG_END(doc); } -static void recursively_remove_namespaces_from_node(xmlNodePtr node) +static void +recursively_remove_namespaces_from_node(xmlNodePtr node) { xmlNodePtr child ; xmlAttrPtr property ; xmlSetNs(node, NULL); - for (child = node->children ; child ; child = child->next) + for (child = node->children ; child ; child = child->next) { recursively_remove_namespaces_from_node(child); + } if (((node->type == XML_ELEMENT_NODE) || (node->type == XML_XINCLUDE_START) || (node->type == XML_XINCLUDE_END)) && node->nsDef) { - xmlFreeNsList(node->nsDef); + xmlNsPtr curr = node->nsDef; + while (curr) { + noko_xml_document_pin_namespace(curr, node->doc); + curr = curr->next; + } node->nsDef = NULL; } if (node->type == XML_ELEMENT_NODE && node->properties != NULL) { property = node->properties ; while (property != NULL) { - if (property->ns) property->ns = NULL ; + if (property->ns) { property->ns = NULL ; } property = property->next ; } } @@ -97,12 +123,13 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node) * * Get the url name for this document. */ -static VALUE url(VALUE self) +static VALUE +url(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); - if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL); + if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); } return Qnil; } @@ -113,42 +140,42 @@ static VALUE url(VALUE self) * * Set the root element on this document */ -static VALUE set_root(VALUE self, VALUE root) +static VALUE +rb_xml_document_root_set(VALUE self, VALUE rb_new_root) { - xmlDocPtr doc; - xmlNodePtr new_root; - xmlNodePtr old_root; + xmlDocPtr c_document; + xmlNodePtr c_new_root = NULL, c_current_root; - Data_Get_Struct(self, xmlDoc, doc); - - old_root = NULL; - - if(NIL_P(root)) { - old_root = xmlDocGetRootElement(doc); - - if(old_root) { - xmlUnlinkNode(old_root); - nokogiri_root_node(old_root); - } + Data_Get_Struct(self, xmlDoc, c_document); - return root; + c_current_root = xmlDocGetRootElement(c_document); + if (c_current_root) { + xmlUnlinkNode(c_current_root); + noko_xml_document_pin_node(c_current_root); } - Data_Get_Struct(root, xmlNode, new_root); + if (!NIL_P(rb_new_root)) { + if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) { + rb_raise(rb_eArgError, + "expected Nokogiri::XML::Node but received %"PRIsVALUE, + rb_obj_class(rb_new_root)); + } + Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root); - /* If the new root's document is not the same as the current document, - * then we need to dup the node in to this document. */ - if(new_root->doc != doc) { - old_root = xmlDocGetRootElement(doc); - if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) { - rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)"); + /* If the new root's document is not the same as the current document, + * then we need to dup the node in to this document. */ + if (c_new_root->doc != c_document) { + c_new_root = xmlDocCopyNode(c_new_root, c_document, 1); + if (!c_new_root) { + rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)"); + } } } - xmlDocSetRootElement(doc, new_root); - if(old_root) nokogiri_root_node(old_root); - return root; + xmlDocSetRootElement(c_document, c_new_root); + + return rb_new_root; } /* @@ -157,17 +184,20 @@ static VALUE set_root(VALUE self, VALUE root) * * Get the root node for this document. */ -static VALUE root(VALUE self) +static VALUE +rb_xml_document_root(VALUE self) { - xmlDocPtr doc; - xmlNodePtr root; + xmlDocPtr c_document; + xmlNodePtr c_root; - Data_Get_Struct(self, xmlDoc, doc); + Data_Get_Struct(self, xmlDoc, c_document); - root = xmlDocGetRootElement(doc); + c_root = xmlDocGetRootElement(c_document); + if (!c_root) { + return Qnil; + } - if(!root) return Qnil; - return Nokogiri_wrap_xml_node(Qnil, root) ; + return noko_xml_node_wrap(Qnil, c_root) ; } /* @@ -176,13 +206,15 @@ static VALUE root(VALUE self) * * Set the encoding string for this Document */ -static VALUE set_encoding(VALUE self, VALUE encoding) +static VALUE +set_encoding(VALUE self, VALUE encoding) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); - if (doc->encoding) - free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */ + if (doc->encoding) { + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding)); + } doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding)); @@ -195,12 +227,13 @@ static VALUE set_encoding(VALUE self, VALUE encoding) * * Get the encoding for this Document */ -static VALUE encoding(VALUE self) +static VALUE +encoding(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); - if(!doc->encoding) return Qnil; + if (!doc->encoding) { return Qnil; } return NOKOGIRI_STR_NEW2(doc->encoding); } @@ -210,12 +243,13 @@ static VALUE encoding(VALUE self) * * Get the XML version for this Document */ -static VALUE version(VALUE self) +static VALUE +version(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); - if(!doc->version) return Qnil; + if (!doc->version) { return Qnil; } return NOKOGIRI_STR_NEW2(doc->version); } @@ -225,14 +259,15 @@ static VALUE version(VALUE self) * * Create a new document from an IO object */ -static VALUE read_io( VALUE klass, - VALUE io, - VALUE url, - VALUE encoding, - VALUE options ) +static VALUE +read_io(VALUE klass, + VALUE io, + VALUE url, + VALUE encoding, + VALUE options) { - const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url); - const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding); + const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url); + const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding); VALUE error_list = rb_ary_new(); VALUE document; xmlDocPtr doc; @@ -241,30 +276,31 @@ static VALUE read_io( VALUE klass, xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = xmlReadIO( - (xmlInputReadCallback)io_read_callback, - (xmlInputCloseCallback)io_close_callback, - (void *)io, - c_url, - c_enc, - (int)NUM2INT(options) - ); + (xmlInputReadCallback)noko_io_read, + (xmlInputCloseCallback)noko_io_close, + (void *)io, + c_url, + c_enc, + (int)NUM2INT(options) + ); xmlSetStructuredErrorFunc(NULL, NULL); - if(doc == NULL) { + if (doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); - if(error) + if (error) { rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); - else + } else { rb_raise(rb_eRuntimeError, "Could not parse document"); + } return Qnil; } - document = Nokogiri_wrap_xml_document(klass, doc); + document = noko_xml_document_wrap(klass, doc); rb_iv_set(document, "@errors", error_list); return document; } @@ -275,15 +311,16 @@ static VALUE read_io( VALUE klass, * * Create a new document from a String */ -static VALUE read_memory( VALUE klass, - VALUE string, - VALUE url, - VALUE encoding, - VALUE options ) +static VALUE +read_memory(VALUE klass, + VALUE string, + VALUE url, + VALUE encoding, + VALUE options) { - const char * c_buffer = StringValuePtr(string); - const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url); - const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding); + const char *c_buffer = StringValuePtr(string); + const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url); + const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding); int len = (int)RSTRING_LEN(string); VALUE error_list = rb_ary_new(); VALUE document; @@ -294,21 +331,22 @@ static VALUE read_memory( VALUE klass, doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); xmlSetStructuredErrorFunc(NULL, NULL); - if(doc == NULL) { + if (doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); - if(error) + if (error) { rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); - else + } else { rb_raise(rb_eRuntimeError, "Could not parse document"); + } return Qnil; } - document = Nokogiri_wrap_xml_document(klass, doc); + document = noko_xml_document_wrap(klass, doc); rb_iv_set(document, "@errors", error_list); return document; } @@ -320,26 +358,26 @@ static VALUE read_memory( VALUE klass, * Copy this Document. An optional depth may be passed in, but it defaults * to a deep copy. 0 is a shallow copy, 1 is a deep copy. */ -static VALUE duplicate_document(int argc, VALUE *argv, VALUE self) +static VALUE +duplicate_document(int argc, VALUE *argv, VALUE self) { xmlDocPtr doc, dup; VALUE copy; VALUE level; - VALUE error_list; - if(rb_scan_args(argc, argv, "01", &level) == 0) + if (rb_scan_args(argc, argv, "01", &level) == 0) { level = INT2NUM((long)1); + } Data_Get_Struct(self, xmlDoc, doc); dup = xmlCopyDoc(doc, (int)NUM2INT(level)); - if(dup == NULL) return Qnil; + if (dup == NULL) { return Qnil; } dup->type = doc->type; - copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup); - error_list = rb_iv_get(self, "@errors"); - rb_iv_set(copy, "@errors", error_list); + copy = noko_xml_document_wrap(rb_obj_class(self), dup); + rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors")); return copy ; } @@ -349,18 +387,18 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self) * * Create a new document with +version+ (defaults to "1.0") */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +new (int argc, VALUE *argv, VALUE klass) { xmlDocPtr doc; VALUE version, rest, rb_doc ; rb_scan_args(argc, argv, "0*", &rest); version = rb_ary_entry(rest, (long)0); - if (NIL_P(version)) version = rb_str_new2("1.0"); + if (NIL_P(version)) { version = rb_str_new2("1.0"); } doc = xmlNewDoc((xmlChar *)StringValueCStr(version)); - rb_doc = Nokogiri_wrap_xml_document(klass, doc); - rb_obj_call_init(rb_doc, argc, argv); + rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv); return rb_doc ; } @@ -401,7 +439,8 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) * please direct your browser to * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html */ -VALUE remove_namespaces_bang(VALUE self) +static VALUE +remove_namespaces_bang(VALUE self) { xmlDocPtr doc ; Data_Get_Struct(self, xmlDoc, doc); @@ -421,7 +460,8 @@ VALUE remove_namespaces_bang(VALUE self) * +external_id+, +system_id+, and +content+ set the External ID, System ID, * and content respectively. All of these parameters are optional. */ -static VALUE create_entity(int argc, VALUE *argv, VALUE self) +static VALUE +create_entity(int argc, VALUE *argv, VALUE self) { VALUE name; VALUE type; @@ -434,52 +474,50 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self) Data_Get_Struct(self, xmlDoc, doc); rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id, - &content); + &content); xmlResetLastError(); ptr = xmlAddDocEntity( - doc, - (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)), - (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)), - (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)), - (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)), - (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content)) - ); - - if(NULL == ptr) { + doc, + (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)), + (int)(NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)), + (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)), + (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)), + (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content)) + ); + + if (NULL == ptr) { xmlErrorPtr error = xmlGetLastError(); - if(error) + if (error) { rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); - else + } else { rb_raise(rb_eRuntimeError, "Could not create entity"); + } return Qnil; } - return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr); + return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr); } -static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent) +static int +block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node) { - VALUE block; - VALUE node; - VALUE parent; + VALUE block = (VALUE)ctx; + VALUE rb_node; + VALUE rb_parent_node; VALUE ret; - if(_node->type == XML_NAMESPACE_DECL){ - node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node); + if (c_node->type == XML_NAMESPACE_DECL) { + rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc); + } else { + rb_node = noko_xml_node_wrap(Qnil, c_node); } - else{ - node = Nokogiri_wrap_xml_node(Qnil, _node); - } - parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil; - block = (VALUE)ctx; - - ret = rb_funcall(block, rb_intern("call"), 2, node, parent); + rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil; - if(Qfalse == ret || Qnil == ret) return 0; + ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node); - return 1; + return (Qfalse == ret || Qnil == ret) ? 0 : 1; } /* call-seq: @@ -492,117 +530,160 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent) * The block must return a non-nil, non-false value if the +obj+ passed in * should be included in the canonicalized document. */ -static VALUE canonicalize(int argc, VALUE* argv, VALUE self) +static VALUE +rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self) { - VALUE mode; - VALUE incl_ns; - VALUE with_comments; - xmlChar **ns; - long ns_len, i; + VALUE rb_mode; + VALUE rb_namespaces; + VALUE rb_comments_p; + int c_mode = 0; + xmlChar **c_namespaces; - xmlDocPtr doc; - xmlOutputBufferPtr buf; - xmlC14NIsVisibleCallback cb = NULL; - void * ctx = NULL; + xmlDocPtr c_doc; + xmlOutputBufferPtr c_obuf; + xmlC14NIsVisibleCallback c_callback_wrapper = NULL; + void *rb_callback = NULL; VALUE rb_cStringIO; - VALUE io; + VALUE rb_io; - rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments); + rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p); + if (!NIL_P(rb_mode)) { + Check_Type(rb_mode, T_FIXNUM); + c_mode = NUM2INT(rb_mode); + } + if (!NIL_P(rb_namespaces)) { + Check_Type(rb_namespaces, T_ARRAY); + if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) { + rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation"); + } + } - Data_Get_Struct(self, xmlDoc, doc); + Data_Get_Struct(self, xmlDoc, c_doc); rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO")); - io = rb_class_new_instance(0, 0, rb_cStringIO); - buf = xmlAllocOutputBuffer(NULL); + rb_io = rb_class_new_instance(0, 0, rb_cStringIO); + c_obuf = xmlAllocOutputBuffer(NULL); - buf->writecallback = (xmlOutputWriteCallback)io_write_callback; - buf->closecallback = (xmlOutputCloseCallback)io_close_callback; - buf->context = (void *)io; + c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write; + c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close; + c_obuf->context = (void *)rb_io; - if(rb_block_given_p()) { - cb = block_caller; - ctx = (void *)rb_block_proc(); + if (rb_block_given_p()) { + c_callback_wrapper = block_caller; + rb_callback = (void *)rb_block_proc(); } - if(NIL_P(incl_ns)){ - ns = NULL; - } - else{ - Check_Type(incl_ns, T_ARRAY); - ns_len = RARRAY_LEN(incl_ns); - ns = calloc((size_t)ns_len+1, sizeof(xmlChar *)); - for (i = 0 ; i < ns_len ; i++) { - VALUE entry = rb_ary_entry(incl_ns, i); - ns[i] = (xmlChar*)StringValueCStr(entry); + if (NIL_P(rb_namespaces)) { + c_namespaces = NULL; + } else { + long ns_len = RARRAY_LEN(rb_namespaces); + c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *)); + for (int j = 0 ; j < ns_len ; j++) { + VALUE entry = rb_ary_entry(rb_namespaces, j); + c_namespaces[j] = (xmlChar *)StringValueCStr(entry); } } + xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback, + c_mode, + c_namespaces, + (int)RTEST(rb_comments_p), + c_obuf); + + ruby_xfree(c_namespaces); + xmlOutputBufferClose(c_obuf); + + return rb_funcall(rb_io, rb_intern("string"), 0); +} + +VALUE +noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv) +{ + VALUE rb_document; + nokogiriTuplePtr tuple; + + if (!klass) { + klass = cNokogiriXmlDocument; + } + + rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document); - xmlC14NExecute(doc, cb, ctx, - (int) (NIL_P(mode) ? 0 : NUM2INT(mode)), - ns, - (int) RTEST(with_comments), - buf); + tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple)); + tuple->doc = rb_document; + tuple->unlinkedNodes = st_init_numtable_with_size(128); + tuple->node_cache = rb_ary_new(); + + c_document->_private = tuple ; - xmlOutputBufferClose(buf); + rb_iv_set(rb_document, "@decorators", Qnil); + rb_iv_set(rb_document, "@errors", Qnil); + rb_iv_set(rb_document, "@node_cache", tuple->node_cache); - return rb_funcall(io, rb_intern("string"), 0); + rb_obj_call_init(rb_document, argc, argv); + + return rb_document ; } -VALUE cNokogiriXmlDocument ; -void init_xml_document() + +/* deprecated. use noko_xml_document_wrap() instead. */ +VALUE +Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); + /* TODO: deprecate this method in v2.0 */ + return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL); +} - /* - * Nokogiri::XML::Document wraps an xml document. - */ - VALUE klass = rb_define_class_under(xml, "Document", node); - - cNokogiriXmlDocument = klass; - - rb_define_singleton_method(klass, "read_memory", read_memory, 4); - rb_define_singleton_method(klass, "read_io", read_io, 4); - rb_define_singleton_method(klass, "new", new, -1); - - rb_define_method(klass, "root", root, 0); - rb_define_method(klass, "root=", set_root, 1); - rb_define_method(klass, "encoding", encoding, 0); - rb_define_method(klass, "encoding=", set_encoding, 1); - rb_define_method(klass, "version", version, 0); - rb_define_method(klass, "canonicalize", canonicalize, -1); - rb_define_method(klass, "dup", duplicate_document, -1); - rb_define_method(klass, "url", url, 0); - rb_define_method(klass, "create_entity", create_entity, -1); - rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0); +VALUE +noko_xml_document_wrap(VALUE klass, xmlDocPtr doc) +{ + return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL); } -/* this takes klass as a param because it's used for HtmlDocument, too. */ -VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc) +void +noko_xml_document_pin_node(xmlNodePtr node) { - nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple)); + xmlDocPtr doc; + nokogiriTuplePtr tuple; - VALUE rb_doc = Data_Wrap_Struct( - klass ? klass : cNokogiriXmlDocument, - 0, - dealloc, - doc - ); + doc = node->doc; + tuple = (nokogiriTuplePtr)doc->_private; + st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node); +} - VALUE cache = rb_ary_new(); - rb_iv_set(rb_doc, "@decorators", Qnil); - rb_iv_set(rb_doc, "@node_cache", cache); - tuple->doc = rb_doc; - tuple->unlinkedNodes = st_init_numtable_with_size(128); - tuple->node_cache = cache; - doc->_private = tuple ; +void +noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc) +{ + nokogiriTuplePtr tuple; + + tuple = (nokogiriTuplePtr)doc->_private; + st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns); +} - rb_obj_call_init(rb_doc, 0, NULL); - return rb_doc ; +void +noko_init_xml_document(void) +{ + assert(cNokogiriXmlNode); + /* + * Nokogiri::XML::Document wraps an xml document. + */ + cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode); + + rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4); + rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4); + rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1); + + rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0); + rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1); + rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0); + rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1); + rb_define_method(cNokogiriXmlDocument, "version", version, 0); + rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1); + rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1); + rb_define_method(cNokogiriXmlDocument, "url", url, 0); + rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1); + rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0); } diff --git a/ext/nokogiri/xml_document.h b/ext/nokogiri/xml_document.h deleted file mode 100644 index 48353a3e97..0000000000 --- a/ext/nokogiri/xml_document.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef NOKOGIRI_XML_DOCUMENT -#define NOKOGIRI_XML_DOCUMENT - -#include - -struct _nokogiriTuple { - VALUE doc; - st_table *unlinkedNodes; - VALUE node_cache; -}; -typedef struct _nokogiriTuple nokogiriTuple; -typedef nokogiriTuple * nokogiriTuplePtr; - -void init_xml_document(); -VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); - -#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private)) -#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc) -#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes) -#define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache) - -extern VALUE cNokogiriXmlDocument ; -#endif diff --git a/ext/nokogiri/xml_document_fragment.c b/ext/nokogiri/xml_document_fragment.c index 2d7fb17150..1a82eed1ec 100644 --- a/ext/nokogiri/xml_document_fragment.c +++ b/ext/nokogiri/xml_document_fragment.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlDocumentFragment; /* * call-seq: @@ -6,7 +8,8 @@ * * Create a new DocumentFragment element on the +document+ */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +new (int argc, VALUE *argv, VALUE klass) { xmlDocPtr xml_doc; xmlNodePtr node; @@ -20,29 +23,22 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) node = xmlNewDocFragment(xml_doc->doc); - nokogiri_root_node(node); + noko_xml_document_pin_node(node); - rb_node = Nokogiri_wrap_xml_node(klass, node); + rb_node = noko_xml_node_wrap(klass, node); rb_obj_call_init(rb_node, argc, argv); - if(rb_block_given_p()) rb_yield(rb_node); - return rb_node; } -VALUE cNokogiriXmlDocumentFragment; -void init_xml_document_fragment() +void +noko_init_xml_document_fragment(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - + assert(cNokogiriXmlNode); /* * DocumentFragment represents a DocumentFragment node in an xml document. */ - VALUE klass = rb_define_class_under(xml, "DocumentFragment", node); - - cNokogiriXmlDocumentFragment = klass; + cNokogiriXmlDocumentFragment = rb_define_class_under(mNokogiriXml, "DocumentFragment", cNokogiriXmlNode); - rb_define_singleton_method(klass, "new", new, -1); + rb_define_singleton_method(cNokogiriXmlDocumentFragment, "new", new, -1); } diff --git a/ext/nokogiri/xml_document_fragment.h b/ext/nokogiri/xml_document_fragment.h deleted file mode 100644 index 90c6d0437d..0000000000 --- a/ext/nokogiri/xml_document_fragment.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_XML_DOCUMENT_FRAGMENT -#define NOKOGIRI_XML_DOCUMENT_FRAGMENT - -#include - -void init_xml_document_fragment(); - -extern VALUE cNokogiriXmlDocumentFragment; -#endif - diff --git a/ext/nokogiri/xml_dtd.c b/ext/nokogiri/xml_dtd.c index e9ca71aafc..98e687fff1 100644 --- a/ext/nokogiri/xml_dtd.c +++ b/ext/nokogiri/xml_dtd.c @@ -1,30 +1,35 @@ -#include +#include -static void notation_copier(void *payload, void *data, const xmlChar *name) +VALUE cNokogiriXmlDtd; + +static void +notation_copier(void *c_notation_ptr, void *rb_hash_ptr, const xmlChar *name) { - VALUE hash = (VALUE)data; - VALUE klass = rb_const_get(mNokogiriXml, rb_intern("Notation")); + VALUE rb_hash = (VALUE)rb_hash_ptr; + xmlNotationPtr c_notation = (xmlNotationPtr)c_notation_ptr; + VALUE rb_notation; + VALUE cNokogiriXmlNotation; + VALUE rb_constructor_args[3]; - xmlNotationPtr c_notation = (xmlNotationPtr)payload; - VALUE notation; - VALUE argv[3]; - argv[0] = (c_notation->name ? NOKOGIRI_STR_NEW2(c_notation->name) : Qnil); - argv[1] = (c_notation->PublicID ? NOKOGIRI_STR_NEW2(c_notation->PublicID) : Qnil); - argv[2] = (c_notation->SystemID ? NOKOGIRI_STR_NEW2(c_notation->SystemID) : Qnil); + rb_constructor_args[0] = (c_notation->name ? NOKOGIRI_STR_NEW2(c_notation->name) : Qnil); + rb_constructor_args[1] = (c_notation->PublicID ? NOKOGIRI_STR_NEW2(c_notation->PublicID) : Qnil); + rb_constructor_args[2] = (c_notation->SystemID ? NOKOGIRI_STR_NEW2(c_notation->SystemID) : Qnil); - notation = rb_class_new_instance(3, argv, klass); + cNokogiriXmlNotation = rb_const_get_at(mNokogiriXml, rb_intern("Notation")); + rb_notation = rb_class_new_instance(3, rb_constructor_args, cNokogiriXmlNotation); - rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name),notation); + rb_hash_aset(rb_hash, NOKOGIRI_STR_NEW2(name), rb_notation); } -static void element_copier(void *_payload, void *data, const xmlChar *name) +static void +element_copier(void *c_node_ptr, void *rb_hash_ptr, const xmlChar *c_name) { - VALUE hash = (VALUE)data; - xmlNodePtr payload = (xmlNodePtr)_payload; + VALUE rb_hash = (VALUE)rb_hash_ptr; + xmlNodePtr c_node = (xmlNodePtr)c_node_ptr; - VALUE element = Nokogiri_wrap_xml_node(Qnil, payload); + VALUE rb_node = noko_xml_node_wrap(Qnil, c_node); - rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name), element); + rb_hash_aset(rb_hash, NOKOGIRI_STR_NEW2(c_name), rb_node); } /* @@ -33,14 +38,15 @@ static void element_copier(void *_payload, void *data, const xmlChar *name) * * Get a hash of the elements for this DTD. */ -static VALUE entities(VALUE self) +static VALUE +entities(VALUE self) { xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); - if(!dtd->entities) return Qnil; + if (!dtd->entities) { return Qnil; } hash = rb_hash_new(); @@ -51,18 +57,19 @@ static VALUE entities(VALUE self) /* * call-seq: - * notations + * notations() → Hash * - * Get a hash of the notations for this DTD. + * [Returns] All the notations for this DTD in a Hash of Notation +name+ to Notation. */ -static VALUE notations(VALUE self) +static VALUE +notations(VALUE self) { xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); - if(!dtd->notations) return Qnil; + if (!dtd->notations) { return Qnil; } hash = rb_hash_new(); @@ -77,16 +84,17 @@ static VALUE notations(VALUE self) * * Get a hash of the attributes for this DTD. */ -static VALUE attributes(VALUE self) +static VALUE +attributes(VALUE self) { xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); hash = rb_hash_new(); - if(!dtd->attributes) return hash; + if (!dtd->attributes) { return hash; } xmlHashScan((xmlHashTablePtr)dtd->attributes, element_copier, (void *)hash); @@ -99,14 +107,15 @@ static VALUE attributes(VALUE self) * * Get a hash of the elements for this DTD. */ -static VALUE elements(VALUE self) +static VALUE +elements(VALUE self) { xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); - if(!dtd->elements) return Qnil; + if (!dtd->elements) { return Qnil; } hash = rb_hash_new(); @@ -121,15 +130,16 @@ static VALUE elements(VALUE self) * * Validate +document+ returning a list of errors */ -static VALUE validate(VALUE self, VALUE document) +static VALUE +validate(VALUE self, VALUE document) { xmlDocPtr doc; xmlDtdPtr dtd; xmlValidCtxtPtr ctxt; VALUE error_list; - Data_Get_Struct(self, xmlDtd, dtd); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(document, xmlDoc, doc); error_list = rb_ary_new(); ctxt = xmlNewValidCtxt(); @@ -151,12 +161,13 @@ static VALUE validate(VALUE self, VALUE document) * * Get the System ID for this DTD */ -static VALUE system_id(VALUE self) +static VALUE +system_id(VALUE self) { xmlDtdPtr dtd; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); - if(!dtd->SystemID) return Qnil; + if (!dtd->SystemID) { return Qnil; } return NOKOGIRI_STR_NEW2(dtd->SystemID); } @@ -167,36 +178,31 @@ static VALUE system_id(VALUE self) * * Get the External ID for this DTD */ -static VALUE external_id(VALUE self) +static VALUE +external_id(VALUE self) { xmlDtdPtr dtd; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); - if(!dtd->ExternalID) return Qnil; + if (!dtd->ExternalID) { return Qnil; } return NOKOGIRI_STR_NEW2(dtd->ExternalID); } -VALUE cNokogiriXmlDtd; - -void init_xml_dtd() +void +noko_init_xml_dtd(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - + assert(cNokogiriXmlNode); /* * Nokogiri::XML::DTD wraps DTD nodes in an XML document */ - VALUE klass = rb_define_class_under(xml, "DTD", node); - - cNokogiriXmlDtd = klass; - - rb_define_method(klass, "notations", notations, 0); - rb_define_method(klass, "elements", elements, 0); - rb_define_method(klass, "entities", entities, 0); - rb_define_method(klass, "validate", validate, 1); - rb_define_method(klass, "attributes", attributes, 0); - rb_define_method(klass, "system_id", system_id, 0); - rb_define_method(klass, "external_id", external_id, 0); + cNokogiriXmlDtd = rb_define_class_under(mNokogiriXml, "DTD", cNokogiriXmlNode); + + rb_define_method(cNokogiriXmlDtd, "notations", notations, 0); + rb_define_method(cNokogiriXmlDtd, "elements", elements, 0); + rb_define_method(cNokogiriXmlDtd, "entities", entities, 0); + rb_define_method(cNokogiriXmlDtd, "validate", validate, 1); + rb_define_method(cNokogiriXmlDtd, "attributes", attributes, 0); + rb_define_method(cNokogiriXmlDtd, "system_id", system_id, 0); + rb_define_method(cNokogiriXmlDtd, "external_id", external_id, 0); } diff --git a/ext/nokogiri/xml_dtd.h b/ext/nokogiri/xml_dtd.h deleted file mode 100644 index fc432b5261..0000000000 --- a/ext/nokogiri/xml_dtd.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_XML_DTD -#define NOKOGIRI_XML_DTD - -#include - -extern VALUE cNokogiriXmlDtd; - -void init_xml_dtd(); - -#endif diff --git a/ext/nokogiri/xml_element_content.c b/ext/nokogiri/xml_element_content.c index 6b7bc18920..90c4b3dc2e 100644 --- a/ext/nokogiri/xml_element_content.c +++ b/ext/nokogiri/xml_element_content.c @@ -1,4 +1,4 @@ -#include +#include VALUE cNokogiriXmlElementContent; @@ -8,12 +8,13 @@ VALUE cNokogiriXmlElementContent; * * Get the require element +name+ */ -static VALUE get_name(VALUE self) +static VALUE +get_name(VALUE self) { xmlElementContentPtr elem; Data_Get_Struct(self, xmlElementContent, elem); - if(!elem->name) return Qnil; + if (!elem->name) { return Qnil; } return NOKOGIRI_STR_NEW2(elem->name); } @@ -24,12 +25,13 @@ static VALUE get_name(VALUE self) * Get the element content +type+. Possible values are PCDATA, ELEMENT, SEQ, * or OR. */ -static VALUE get_type(VALUE self) +static VALUE +get_type(VALUE self) { xmlElementContentPtr elem; Data_Get_Struct(self, xmlElementContent, elem); - return INT2NUM((long)elem->type); + return INT2NUM(elem->type); } /* @@ -38,13 +40,14 @@ static VALUE get_type(VALUE self) * * Get the first child. */ -static VALUE get_c1(VALUE self) +static VALUE +get_c1(VALUE self) { xmlElementContentPtr elem; Data_Get_Struct(self, xmlElementContent, elem); - if(!elem->c1) return Qnil; - return Nokogiri_wrap_element_content(rb_iv_get(self, "@document"), elem->c1); + if (!elem->c1) { return Qnil; } + return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c1); } /* @@ -53,13 +56,14 @@ static VALUE get_c1(VALUE self) * * Get the first child. */ -static VALUE get_c2(VALUE self) +static VALUE +get_c2(VALUE self) { xmlElementContentPtr elem; Data_Get_Struct(self, xmlElementContent, elem); - if(!elem->c2) return Qnil; - return Nokogiri_wrap_element_content(rb_iv_get(self, "@document"), elem->c2); + if (!elem->c2) { return Qnil; } + return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c2); } /* @@ -69,12 +73,13 @@ static VALUE get_c2(VALUE self) * Get the element content +occur+ flag. Possible values are ONCE, OPT, MULT * or PLUS. */ -static VALUE get_occur(VALUE self) +static VALUE +get_occur(VALUE self) { xmlElementContentPtr elem; Data_Get_Struct(self, xmlElementContent, elem); - return INT2NUM((long)elem->ocur); + return INT2NUM(elem->ocur); } /* @@ -83,17 +88,19 @@ static VALUE get_occur(VALUE self) * * Get the element content namespace +prefix+. */ -static VALUE get_prefix(VALUE self) +static VALUE +get_prefix(VALUE self) { xmlElementContentPtr elem; Data_Get_Struct(self, xmlElementContent, elem); - if(!elem->prefix) return Qnil; + if (!elem->prefix) { return Qnil; } return NOKOGIRI_STR_NEW2(elem->prefix); } -VALUE Nokogiri_wrap_element_content(VALUE doc, xmlElementContentPtr element) +VALUE +noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element) { VALUE elem = Data_Wrap_Struct(cNokogiriXmlElementContent, 0, 0, element); @@ -104,20 +111,18 @@ VALUE Nokogiri_wrap_element_content(VALUE doc, xmlElementContentPtr element) return elem; } -void init_xml_element_content() +void +noko_init_xml_element_content(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); + cNokogiriXmlElementContent = rb_define_class_under(mNokogiriXml, "ElementContent", rb_cObject); - VALUE klass = rb_define_class_under(xml, "ElementContent", rb_cObject); + rb_undef_alloc_func(cNokogiriXmlElementContent); - cNokogiriXmlElementContent = klass; + rb_define_method(cNokogiriXmlElementContent, "name", get_name, 0); + rb_define_method(cNokogiriXmlElementContent, "type", get_type, 0); + rb_define_method(cNokogiriXmlElementContent, "occur", get_occur, 0); + rb_define_method(cNokogiriXmlElementContent, "prefix", get_prefix, 0); - rb_define_method(klass, "name", get_name, 0); - rb_define_method(klass, "type", get_type, 0); - rb_define_method(klass, "occur", get_occur, 0); - rb_define_method(klass, "prefix", get_prefix, 0); - - rb_define_private_method(klass, "c1", get_c1, 0); - rb_define_private_method(klass, "c2", get_c2, 0); + rb_define_private_method(cNokogiriXmlElementContent, "c1", get_c1, 0); + rb_define_private_method(cNokogiriXmlElementContent, "c2", get_c2, 0); } diff --git a/ext/nokogiri/xml_element_content.h b/ext/nokogiri/xml_element_content.h deleted file mode 100644 index 9ae79604b4..0000000000 --- a/ext/nokogiri/xml_element_content.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_XML_ELEMENT_CONTENT -#define NOKOGIRI_XML_ELEMENT_CONTENT - -#include - - -VALUE Nokogiri_wrap_element_content(VALUE doc, xmlElementContentPtr element); -void init_xml_element_content(); - -#endif diff --git a/ext/nokogiri/xml_element_decl.c b/ext/nokogiri/xml_element_decl.c index ce3555bd19..5b8d5572ae 100644 --- a/ext/nokogiri/xml_element_decl.c +++ b/ext/nokogiri/xml_element_decl.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlElementDecl; static ID id_document; @@ -8,11 +10,12 @@ static ID id_document; * * The element_type */ -static VALUE element_type(VALUE self) +static VALUE +element_type(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); - return INT2NUM((long)node->etype); + Noko_Node_Get_Struct(self, xmlElement, node); + return INT2NUM(node->etype); } /* @@ -21,17 +24,18 @@ static VALUE element_type(VALUE self) * * The allowed content for this ElementDecl */ -static VALUE content(VALUE self) +static VALUE +content(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); + Noko_Node_Get_Struct(self, xmlElement, node); - if(!node->content) return Qnil; + if (!node->content) { return Qnil; } - return Nokogiri_wrap_element_content( - rb_funcall(self, id_document, 0), - node->content - ); + return noko_xml_element_content_wrap( + rb_funcall(self, id_document, 0), + node->content + ); } /* @@ -40,30 +44,26 @@ static VALUE content(VALUE self) * * The namespace prefix for this ElementDecl */ -static VALUE prefix(VALUE self) +static VALUE +prefix(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); + Noko_Node_Get_Struct(self, xmlElement, node); - if(!node->prefix) return Qnil; + if (!node->prefix) { return Qnil; } return NOKOGIRI_STR_NEW2(node->prefix); } -VALUE cNokogiriXmlElementDecl; - -void init_xml_element_decl() +void +noko_init_xml_element_decl(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - VALUE klass = rb_define_class_under(xml, "ElementDecl", node); - - cNokogiriXmlElementDecl = klass; + assert(cNokogiriXmlNode); + cNokogiriXmlElementDecl = rb_define_class_under(mNokogiriXml, "ElementDecl", cNokogiriXmlNode); - rb_define_method(klass, "element_type", element_type, 0); - rb_define_method(klass, "content", content, 0); - rb_define_method(klass, "prefix", prefix, 0); + rb_define_method(cNokogiriXmlElementDecl, "element_type", element_type, 0); + rb_define_method(cNokogiriXmlElementDecl, "content", content, 0); + rb_define_method(cNokogiriXmlElementDecl, "prefix", prefix, 0); id_document = rb_intern("document"); } diff --git a/ext/nokogiri/xml_element_decl.h b/ext/nokogiri/xml_element_decl.h deleted file mode 100644 index 013381cc23..0000000000 --- a/ext/nokogiri/xml_element_decl.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_ELEMENT_DECL -#define NOKOGIRI_XML_ELEMENT_DECL - -#include - -void init_xml_element_decl(); - -extern VALUE cNokogiriXmlElementDecl; -#endif diff --git a/ext/nokogiri/xml_encoding_handler.c b/ext/nokogiri/xml_encoding_handler.c index 837c28da25..305c9434ef 100644 --- a/ext/nokogiri/xml_encoding_handler.c +++ b/ext/nokogiri/xml_encoding_handler.c @@ -1,63 +1,84 @@ -#include +#include + +VALUE cNokogiriEncodingHandler; + + +static void +_xml_encoding_handler_dealloc(xmlCharEncodingHandlerPtr c_handler) +{ + /* make sure iconv handlers are cleaned up and freed */ + xmlCharEncCloseFunc(c_handler); +} + /* * call-seq: Nokogiri::EncodingHandler.[](name) * * Get the encoding handler for +name+ */ -static VALUE get(VALUE klass, VALUE key) +static VALUE +rb_xml_encoding_handler_s_get(VALUE klass, VALUE key) { xmlCharEncodingHandlerPtr handler; handler = xmlFindCharEncodingHandler(StringValueCStr(key)); - if(handler) - return Data_Wrap_Struct(klass, NULL, NULL, handler); + if (handler) { + return Data_Wrap_Struct(klass, NULL, _xml_encoding_handler_dealloc, handler); + } return Qnil; } + /* * call-seq: Nokogiri::EncodingHandler.delete(name) * * Delete the encoding alias named +name+ */ -static VALUE delete(VALUE klass, VALUE name) +static VALUE +rb_xml_encoding_handler_s_delete(VALUE klass, VALUE name) { - if(xmlDelEncodingAlias(StringValueCStr(name))) return Qnil; + if (xmlDelEncodingAlias(StringValueCStr(name))) { return Qnil; } return Qtrue; } + /* - * call-seq: Nokogiri::EncodingHandler.alias(from, to) + * call-seq: Nokogiri::EncodingHandler.alias(real_name, alias_name) * - * Alias encoding handler with name +from+ to name +to+ + * Alias encoding handler with name +real_name+ to name +alias_name+ */ -static VALUE alias(VALUE klass, VALUE from, VALUE to) +static VALUE +rb_xml_encoding_handler_s_alias(VALUE klass, VALUE from, VALUE to) { xmlAddEncodingAlias(StringValueCStr(from), StringValueCStr(to)); return to; } + /* * call-seq: Nokogiri::EncodingHandler.clear_aliases! * * Remove all encoding aliases. */ -static VALUE clear_aliases(VALUE klass) +static VALUE +rb_xml_encoding_handler_s_clear_aliases(VALUE klass) { xmlCleanupEncodingAliases(); return klass; } + /* * call-seq: name * * Get the name of this EncodingHandler */ -static VALUE name(VALUE self) +static VALUE +rb_xml_encoding_handler_name(VALUE self) { xmlCharEncodingHandlerPtr handler; @@ -66,14 +87,18 @@ static VALUE name(VALUE self) return NOKOGIRI_STR_NEW2(handler->name); } -void init_xml_encoding_handler() + +void +noko_init_xml_encoding_handler(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE klass = rb_define_class_under(nokogiri, "EncodingHandler", rb_cObject); - - rb_define_singleton_method(klass, "[]", get, 1); - rb_define_singleton_method(klass, "delete", delete, 1); - rb_define_singleton_method(klass, "alias", alias, 2); - rb_define_singleton_method(klass, "clear_aliases!", clear_aliases, 0); - rb_define_method(klass, "name", name, 0); + cNokogiriEncodingHandler = rb_define_class_under(mNokogiri, "EncodingHandler", rb_cObject); + + rb_undef_alloc_func(cNokogiriEncodingHandler); + + rb_define_singleton_method(cNokogiriEncodingHandler, "[]", rb_xml_encoding_handler_s_get, 1); + rb_define_singleton_method(cNokogiriEncodingHandler, "delete", rb_xml_encoding_handler_s_delete, 1); + rb_define_singleton_method(cNokogiriEncodingHandler, "alias", rb_xml_encoding_handler_s_alias, 2); + rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", rb_xml_encoding_handler_s_clear_aliases, 0); + + rb_define_method(cNokogiriEncodingHandler, "name", rb_xml_encoding_handler_name, 0); } diff --git a/ext/nokogiri/xml_encoding_handler.h b/ext/nokogiri/xml_encoding_handler.h deleted file mode 100644 index f2e5bf4a04..0000000000 --- a/ext/nokogiri/xml_encoding_handler.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef NOKOGIRI_XML_ENCODING_HANDLER -#define NOKOGIRI_XML_ENCODING_HANDLER - -#include - -void init_xml_encoding_handler(); - -#endif diff --git a/ext/nokogiri/xml_entity_decl.c b/ext/nokogiri/xml_entity_decl.c index 31997cb0da..4b7f40783b 100644 --- a/ext/nokogiri/xml_entity_decl.c +++ b/ext/nokogiri/xml_entity_decl.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlEntityDecl; /* * call-seq: @@ -6,12 +8,13 @@ * * Get the original_content before ref substitution */ -static VALUE original_content(VALUE self) +static VALUE +original_content(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); - if(!node->orig) return Qnil; + if (!node->orig) { return Qnil; } return NOKOGIRI_STR_NEW2(node->orig); } @@ -22,12 +25,13 @@ static VALUE original_content(VALUE self) * * Get the content */ -static VALUE get_content(VALUE self) +static VALUE +get_content(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); - if(!node->content) return Qnil; + if (!node->content) { return Qnil; } return NOKOGIRI_STR_NEW(node->content, node->length); } @@ -38,10 +42,11 @@ static VALUE get_content(VALUE self) * * Get the entity type */ -static VALUE entity_type(VALUE self) +static VALUE +entity_type(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); return INT2NUM((int)node->etype); } @@ -52,12 +57,13 @@ static VALUE entity_type(VALUE self) * * Get the external identifier for PUBLIC */ -static VALUE external_id(VALUE self) +static VALUE +external_id(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); - if(!node->ExternalID) return Qnil; + if (!node->ExternalID) { return Qnil; } return NOKOGIRI_STR_NEW2(node->ExternalID); } @@ -68,43 +74,39 @@ static VALUE external_id(VALUE self) * * Get the URI for a SYSTEM or PUBLIC Entity */ -static VALUE system_id(VALUE self) +static VALUE +system_id(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); - if(!node->SystemID) return Qnil; + if (!node->SystemID) { return Qnil; } return NOKOGIRI_STR_NEW2(node->SystemID); } -VALUE cNokogiriXmlEntityDecl; - -void init_xml_entity_decl() +void +noko_init_xml_entity_decl(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - VALUE klass = rb_define_class_under(xml, "EntityDecl", node); - - cNokogiriXmlEntityDecl = klass; + assert(cNokogiriXmlNode); + cNokogiriXmlEntityDecl = rb_define_class_under(mNokogiriXml, "EntityDecl", cNokogiriXmlNode); - rb_define_method(klass, "original_content", original_content, 0); - rb_define_method(klass, "content", get_content, 0); - rb_define_method(klass, "entity_type", entity_type, 0); - rb_define_method(klass, "external_id", external_id, 0); - rb_define_method(klass, "system_id", system_id, 0); + rb_define_method(cNokogiriXmlEntityDecl, "original_content", original_content, 0); + rb_define_method(cNokogiriXmlEntityDecl, "content", get_content, 0); + rb_define_method(cNokogiriXmlEntityDecl, "entity_type", entity_type, 0); + rb_define_method(cNokogiriXmlEntityDecl, "external_id", external_id, 0); + rb_define_method(cNokogiriXmlEntityDecl, "system_id", system_id, 0); rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_GENERAL"), - INT2NUM(XML_INTERNAL_GENERAL_ENTITY)); + INT2NUM(XML_INTERNAL_GENERAL_ENTITY)); rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_GENERAL_PARSED"), - INT2NUM(XML_EXTERNAL_GENERAL_PARSED_ENTITY)); + INT2NUM(XML_EXTERNAL_GENERAL_PARSED_ENTITY)); rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_GENERAL_UNPARSED"), - INT2NUM(XML_EXTERNAL_GENERAL_UNPARSED_ENTITY)); + INT2NUM(XML_EXTERNAL_GENERAL_UNPARSED_ENTITY)); rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_PARAMETER"), - INT2NUM(XML_INTERNAL_PARAMETER_ENTITY)); + INT2NUM(XML_INTERNAL_PARAMETER_ENTITY)); rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_PARAMETER"), - INT2NUM(XML_EXTERNAL_PARAMETER_ENTITY)); + INT2NUM(XML_EXTERNAL_PARAMETER_ENTITY)); rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_PREDEFINED"), - INT2NUM(XML_INTERNAL_PREDEFINED_ENTITY)); + INT2NUM(XML_INTERNAL_PREDEFINED_ENTITY)); } diff --git a/ext/nokogiri/xml_entity_decl.h b/ext/nokogiri/xml_entity_decl.h deleted file mode 100644 index bbca8f8ec9..0000000000 --- a/ext/nokogiri/xml_entity_decl.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_XML_ENTITY_DECL -#define NOKOGIRI_XML_ENTITY_DECL - -#include - -void init_xml_entity_decl(); - -extern VALUE cNokogiriXmlEntityDecl; -#endif - diff --git a/ext/nokogiri/xml_entity_reference.c b/ext/nokogiri/xml_entity_reference.c index 17fe6fa72b..d8f12438c5 100644 --- a/ext/nokogiri/xml_entity_reference.c +++ b/ext/nokogiri/xml_entity_reference.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlEntityReference; /* * call-seq: @@ -6,7 +8,8 @@ * * Create a new EntityReference element on the +document+ with +name+ */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +new (int argc, VALUE *argv, VALUE klass) { xmlDocPtr xml_doc; xmlNodePtr node; @@ -20,33 +23,28 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) Data_Get_Struct(document, xmlDoc, xml_doc); node = xmlNewReference( - xml_doc, - (const xmlChar *)StringValueCStr(name) - ); + xml_doc, + (const xmlChar *)StringValueCStr(name) + ); - nokogiri_root_node(node); + noko_xml_document_pin_node(node); - rb_node = Nokogiri_wrap_xml_node(klass, node); + rb_node = noko_xml_node_wrap(klass, node); rb_obj_call_init(rb_node, argc, argv); - if(rb_block_given_p()) rb_yield(rb_node); + if (rb_block_given_p()) { rb_yield(rb_node); } return rb_node; } -VALUE cNokogiriXmlEntityReference; -void init_xml_entity_reference() +void +noko_init_xml_entity_reference(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - + assert(cNokogiriXmlNode); /* * EntityReference represents an EntityReference node in an xml document. */ - VALUE klass = rb_define_class_under(xml, "EntityReference", node); - - cNokogiriXmlEntityReference = klass; + cNokogiriXmlEntityReference = rb_define_class_under(mNokogiriXml, "EntityReference", cNokogiriXmlNode); - rb_define_singleton_method(klass, "new", new, -1); + rb_define_singleton_method(cNokogiriXmlEntityReference, "new", new, -1); } diff --git a/ext/nokogiri/xml_entity_reference.h b/ext/nokogiri/xml_entity_reference.h deleted file mode 100644 index 7635a73a92..0000000000 --- a/ext/nokogiri/xml_entity_reference.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_ENTITY_REFERENCE -#define NOKOGIRI_XML_ENTITY_REFERENCE - -#include - -void init_xml_entity_reference(); - -extern VALUE cNokogiriXmlEntityReference; -#endif diff --git a/ext/nokogiri/xml_io.c b/ext/nokogiri/xml_io.c deleted file mode 100644 index 46655e4731..0000000000 --- a/ext/nokogiri/xml_io.c +++ /dev/null @@ -1,61 +0,0 @@ -#include - -static ID id_read, id_write; - -VALUE read_check(VALUE *args) { - return rb_funcall(args[0], id_read, 1, args[1]); -} - -VALUE read_failed(void) { - return Qundef; -} - -int io_read_callback(void * ctx, char * buffer, int len) { - VALUE string, args[2]; - size_t str_len, safe_len; - - args[0] = (VALUE)ctx; - args[1] = INT2NUM(len); - - string = rb_rescue(read_check, (VALUE)args, read_failed, 0); - - if (NIL_P(string)) return 0; - if (string == Qundef) return -1; - if (TYPE(string) != T_STRING) return -1; - - str_len = (size_t)RSTRING_LEN(string); - safe_len = str_len > (size_t)len ? (size_t)len : str_len; - memcpy(buffer, StringValuePtr(string), safe_len); - - return (int)safe_len; -} - -VALUE write_check(VALUE *args) { - return rb_funcall(args[0], id_write, 1, args[1]); -} - -VALUE write_failed(void) { - return Qundef; -} - -int io_write_callback(void * ctx, char * buffer, int len) { - VALUE args[2], size; - - args[0] = (VALUE)ctx; - args[1] = rb_str_new(buffer, (long)len); - - size = rb_rescue(write_check, (VALUE)args, write_failed, 0); - - if (size == Qundef) return -1; - - return NUM2INT(size); -} - -int io_close_callback(void * ctx) { - return 0; -} - -void init_nokogiri_io() { - id_read = rb_intern("read"); - id_write = rb_intern("write"); -} diff --git a/ext/nokogiri/xml_io.h b/ext/nokogiri/xml_io.h deleted file mode 100644 index 6e71b099a8..0000000000 --- a/ext/nokogiri/xml_io.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef NOKOGIRI_XML_IO -#define NOKOGIRI_XML_IO - -#include - -int io_read_callback(void * ctx, char * buffer, int len); -int io_write_callback(void * ctx, char * buffer, int len); -int io_close_callback(void * ctx); -void init_nokogiri_io(); - -#endif diff --git a/ext/nokogiri/xml_libxml2_hacks.c b/ext/nokogiri/xml_libxml2_hacks.c deleted file mode 100644 index 7a927b65b5..0000000000 --- a/ext/nokogiri/xml_libxml2_hacks.c +++ /dev/null @@ -1,112 +0,0 @@ -#ifndef HAVE_XMLFIRSTELEMENTCHILD -#include -/** - * xmlFirstElementChild: - * @parent: the parent node - * - * Finds the first child node of that element which is a Element node - * Note the handling of entities references is different than in - * the W3C DOM element traversal spec since we don't have back reference - * from entities content to entities references. - * - * Returns the first element child or NULL if not available - */ -xmlNodePtr -xmlFirstElementChild(xmlNodePtr parent) { - xmlNodePtr cur = NULL; - - if (parent == NULL) - return(NULL); - switch (parent->type) { - case XML_ELEMENT_NODE: - case XML_ENTITY_NODE: - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - cur = parent->children; - break; - default: - return(NULL); - } - while (cur != NULL) { - if (cur->type == XML_ELEMENT_NODE) - return(cur); - cur = cur->next; - } - return(NULL); -} - -/** - * xmlNextElementSibling: - * @node: the current node - * - * Finds the first closest next sibling of the node which is an - * element node. - * Note the handling of entities references is different than in - * the W3C DOM element traversal spec since we don't have back reference - * from entities content to entities references. - * - * Returns the next element sibling or NULL if not available - */ -xmlNodePtr -xmlNextElementSibling(xmlNodePtr node) { - if (node == NULL) - return(NULL); - switch (node->type) { - case XML_ELEMENT_NODE: - case XML_TEXT_NODE: - case XML_CDATA_SECTION_NODE: - case XML_ENTITY_REF_NODE: - case XML_ENTITY_NODE: - case XML_PI_NODE: - case XML_COMMENT_NODE: - case XML_DTD_NODE: - case XML_XINCLUDE_START: - case XML_XINCLUDE_END: - node = node->next; - break; - default: - return(NULL); - } - while (node != NULL) { - if (node->type == XML_ELEMENT_NODE) - return(node); - node = node->next; - } - return(NULL); -} - -/** - * xmlLastElementChild: - * @parent: the parent node - * - * Finds the last child node of that element which is a Element node - * Note the handling of entities references is different than in - * the W3C DOM element traversal spec since we don't have back reference - * from entities content to entities references. - * - * Returns the last element child or NULL if not available - */ -xmlNodePtr -xmlLastElementChild(xmlNodePtr parent) { - xmlNodePtr cur = NULL; - - if (parent == NULL) - return(NULL); - switch (parent->type) { - case XML_ELEMENT_NODE: - case XML_ENTITY_NODE: - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - cur = parent->last; - break; - default: - return(NULL); - } - while (cur != NULL) { - if (cur->type == XML_ELEMENT_NODE) - return(cur); - cur = cur->prev; - } - return(NULL); -} -#endif diff --git a/ext/nokogiri/xml_libxml2_hacks.h b/ext/nokogiri/xml_libxml2_hacks.h deleted file mode 100644 index a78aff0ed4..0000000000 --- a/ext/nokogiri/xml_libxml2_hacks.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef HAVE_XMLFIRSTELEMENTCHILD - -#ifndef XML_LIBXML2_HACKS -#define XML_LIBXML2_HACKS - -xmlNodePtr xmlFirstElementChild(xmlNodePtr parent); -xmlNodePtr xmlNextElementSibling(xmlNodePtr node); -xmlNodePtr xmlLastElementChild(xmlNodePtr parent); - -#endif - -#endif diff --git a/ext/nokogiri/xml_namespace.c b/ext/nokogiri/xml_namespace.c index 7aea6ce1ac..d4d85790d0 100644 --- a/ext/nokogiri/xml_namespace.c +++ b/ext/nokogiri/xml_namespace.c @@ -1,111 +1,186 @@ -#include +#include + +/* + * The lifecycle of a Namespace node is more complicated than other Nodes, for two reasons: + * + * 1. the underlying C structure has a different layout than all the other node structs, with the + * `_private` member where we store a pointer to Ruby object data not being in first position. + * 2. xmlNs structures returned in an xmlNodeset from an XPath query are copies of the document's + * namespaces, and so do not share the same memory lifecycle as everything else in a document. + * + * As a result of 1, you may see special handling of XML_NAMESPACE_DECL node types throughout the + * Nokogiri C code, though I intend to wrap up that logic in ruby_object_{get,set} functions + * shortly. + * + * As a result of 2, you will see we have special handling in this file and in xml_node_set.c to + * carefully manage the memory lifecycle of xmlNs structs to match the Ruby object's GC + * lifecycle. In xml_node_set.c we have local versions of xmlXPathNodeSetDel() and + * xmlXPathFreeNodeSet() that avoid freeing xmlNs structs in the node set. In this file, we decide + * whether or not to call dealloc_namespace() depending on whether the xmlNs struct appears to be + * in an xmlNodeSet (and thus the result of an XPath query) or not. + * + * Yes, this is madness. + */ VALUE cNokogiriXmlNamespace ; -static void dealloc_namespace(xmlNsPtr ns) +static void +_xml_namespace_dealloc(void *ptr) { /* - * * this deallocator is only used for namespace nodes that are part of an xpath - * node set. - * - * see Nokogiri_wrap_xml_namespace() for more details. - * + * node set. see noko_xml_namespace_wrap(). */ - NOKOGIRI_DEBUG_START(ns) ; + xmlNsPtr ns = ptr; + if (ns->href) { - xmlFree((xmlChar *)(uintptr_t)ns->href); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href)); } if (ns->prefix) { - xmlFree((xmlChar *)(uintptr_t)ns->prefix); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix)); } xmlFree(ns); - NOKOGIRI_DEBUG_END(ns) ; } +#ifdef HAVE_RB_GC_LOCATION +static void +_xml_namespace_update_references(void *ptr) +{ + xmlNsPtr ns = ptr; + if (ns->_private) { + ns->_private = (void *)rb_gc_location((VALUE)ns->_private); + } +} +#else +# define _xml_namespace_update_references 0 +#endif + +static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = { + "Nokogiri/XMLNamespace/WithDealloc", + {0, _xml_namespace_dealloc, 0, _xml_namespace_update_references}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; + +static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = { + "Nokogiri/XMLNamespace/WithoutDealloc", + {0, 0, 0, _xml_namespace_update_references}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; /* - * call-seq: - * prefix + * :call-seq: + * prefix() → String or nil + * + * Return the prefix for this Namespace, or +nil+ if there is no prefix (e.g., default namespace). + * + * *Example* + * + * doc = Nokogiri::XML.parse(<<~XML) + * + * + * + * + * + * XML + * + * doc.root.elements.first.namespace.prefix + * # => nil * - * Get the prefix for this namespace. Returns +nil+ if there is no prefix. + * doc.root.elements.last.namespace.prefix + * # => "noko" */ -static VALUE prefix(VALUE self) +static VALUE +prefix(VALUE self) { xmlNsPtr ns; - Data_Get_Struct(self, xmlNs, ns); - if(!ns->prefix) return Qnil; + Noko_Namespace_Get_Struct(self, xmlNs, ns); + if (!ns->prefix) { return Qnil; } return NOKOGIRI_STR_NEW2(ns->prefix); } /* - * call-seq: - * href + * :call-seq: + * href() → String * - * Get the href for this namespace + * Returns the URI reference for this Namespace. + * + * *Example* + * + * doc = Nokogiri::XML.parse(<<~XML) + * + * + * + * + * + * XML + * + * doc.root.elements.first.namespace.href + * # => "http://nokogiri.org/ns/default" + * + * doc.root.elements.last.namespace.href + * # => "http://nokogiri.org/ns/noko" */ -static VALUE href(VALUE self) +static VALUE +href(VALUE self) { xmlNsPtr ns; - Data_Get_Struct(self, xmlNs, ns); - if(!ns->href) return Qnil; + Noko_Namespace_Get_Struct(self, xmlNs, ns); + if (!ns->href) { return Qnil; } return NOKOGIRI_STR_NEW2(ns->href); } -static int part_of_an_xpath_node_set_eh(xmlNsPtr node) -{ - return (node->next && ! NOKOGIRI_NAMESPACE_EH(node->next)); -} - -VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node) +VALUE +noko_xml_namespace_wrap(xmlNsPtr c_namespace, xmlDocPtr c_document) { - VALUE ns = 0, document, node_cache; + VALUE rb_namespace; - assert(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE); - - if (node->_private) return (VALUE)node->_private; - - if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc; + if (c_namespace->_private) { + return (VALUE)c_namespace->_private; + } - if (DOC_RUBY_OBJECT_TEST(doc)) { - document = DOC_RUBY_OBJECT(doc); + if (c_document) { + rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace, + &nokogiri_xml_namespace_type_without_dealloc, + c_namespace); - if (part_of_an_xpath_node_set_eh(node)) { - /* - * this is a duplicate returned as part of an xpath query node set, and so - * we need to make sure we manage this memory. - * - * see comments in xml_node_set.c for more details. - */ - ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, node); - } else { - ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node); - node_cache = rb_iv_get(document, "@node_cache"); - rb_ary_push(node_cache, ns); + if (DOC_RUBY_OBJECT_TEST(c_document)) { + rb_iv_set(rb_namespace, "@document", DOC_RUBY_OBJECT(c_document)); + rb_ary_push(DOC_NODE_CACHE(c_document), rb_namespace); } - - rb_iv_set(ns, "@document", document); } else { - ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node); + rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace, + &nokogiri_xml_namespace_type_with_dealloc, + c_namespace); } - node->_private = (void *)ns; + c_namespace->_private = (void *)rb_namespace; - return ns; + return rb_namespace; +} + +VALUE +noko_xml_namespace_wrap_xpath_copy(xmlNsPtr c_namespace) +{ + return noko_xml_namespace_wrap(c_namespace, NULL); } -void init_xml_namespace() +void +noko_init_xml_namespace(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE klass = rb_define_class_under(xml, "Namespace", rb_cObject); + cNokogiriXmlNamespace = rb_define_class_under(mNokogiriXml, "Namespace", rb_cObject); - cNokogiriXmlNamespace = klass; + rb_undef_alloc_func(cNokogiriXmlNamespace); - rb_define_method(klass, "prefix", prefix, 0); - rb_define_method(klass, "href", href, 0); + rb_define_method(cNokogiriXmlNamespace, "prefix", prefix, 0); + rb_define_method(cNokogiriXmlNamespace, "href", href, 0); } diff --git a/ext/nokogiri/xml_namespace.h b/ext/nokogiri/xml_namespace.h deleted file mode 100644 index 4b09a03f6b..0000000000 --- a/ext/nokogiri/xml_namespace.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef NOKOGIRI_XML_NAMESPACE -#define NOKOGIRI_XML_NAMESPACE - -#include - -void init_xml_namespace(); - -extern VALUE cNokogiriXmlNamespace ; - -VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node); - -#define NOKOGIRI_NAMESPACE_EH(node) ((node)->type == XML_NAMESPACE_DECL) - -#endif diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 6190ee5ed7..7ddc639cbe 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1,36 +1,61 @@ -#include +#include -static ID decorate, decorate_bang; +#include -#ifdef DEBUG -static void debug_node_dealloc(xmlNodePtr x) -{ - NOKOGIRI_DEBUG_START(x) - NOKOGIRI_DEBUG_END(x) -} -#else -# define debug_node_dealloc 0 -#endif +// :stopdoc: -static void mark(xmlNodePtr node) +VALUE cNokogiriXmlNode ; +static ID id_decorate, id_decorate_bang; + +typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr); + +static void +_xml_node_mark(void *ptr) { + xmlNodePtr node = ptr; + + if (!DOC_RUBY_OBJECT_TEST(node->doc)) { + return; + } + xmlDocPtr doc = node->doc; - if(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) { - if(DOC_RUBY_OBJECT_TEST(doc)) { + if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) { + if (DOC_RUBY_OBJECT_TEST(doc)) { rb_gc_mark(DOC_RUBY_OBJECT(doc)); } - } else if(node->doc->_private) { + } else if (node->doc->_private) { rb_gc_mark((VALUE)doc->_private); } } -/* :nodoc: */ -typedef xmlNodePtr (*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr); +#ifdef HAVE_RB_GC_LOCATION +static void +_xml_node_update_references(void *ptr) +{ + xmlNodePtr node = ptr; -/* :nodoc: */ -static void relink_namespace(xmlNodePtr reparented) + if (node->_private) { + node->_private = (void *)rb_gc_location((VALUE)node->_private); + } +} +#else +# define _xml_node_update_references 0 +#endif + +static const rb_data_type_t nokogiri_node_type = { + "Nokogiri/XMLNode", + {_xml_node_mark, 0, 0, _xml_node_update_references}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; + +static void +relink_namespace(xmlNodePtr reparented) { xmlNodePtr child; + xmlAttrPtr attr; if (reparented->type != XML_ATTRIBUTE_NODE && reparented->type != XML_ELEMENT_NODE) { return; } @@ -42,7 +67,7 @@ static void relink_namespace(xmlNodePtr reparented) name = xmlSplitQName2(reparented->name, &prefix); if (reparented->type == XML_ATTRIBUTE_NODE) { - if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) { + if (prefix == NULL || strcmp((char *)prefix, XMLNS_PREFIX) == 0) { xmlFree(name); xmlFree(prefix); return; @@ -64,7 +89,9 @@ static void relink_namespace(xmlNodePtr reparented) if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; } /* Make sure that our reparented node has the correct namespaces */ - if (!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent) { + if (!reparented->ns && + (reparented->doc != (xmlDocPtr)reparented->parent) && + (rb_iv_get(DOC_RUBY_OBJECT(reparented->doc), "@namespace_inheritance") == Qtrue)) { xmlSetNs(reparented, reparented->parent->ns); } @@ -87,7 +114,7 @@ static void relink_namespace(xmlNodePtr reparented) } else { reparented->nsDef = curr->next; } - nokogiri_root_nsdef(curr, reparented->doc); + noko_xml_document_pin_namespace(curr, reparented->doc); } else { prev = curr; } @@ -127,16 +154,19 @@ static void relink_namespace(xmlNodePtr reparented) } if (reparented->type == XML_ELEMENT_NODE) { - child = (xmlNodePtr)((xmlElementPtr)reparented)->attributes; - while(NULL != child) { - relink_namespace(child); - child = child->next; + attr = reparented->properties; + while (NULL != attr) { + relink_namespace((xmlNodePtr)attr); + attr = attr->next; } } } -/* :nodoc: */ -static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node) + +/* internal function meant to wrap xmlReplaceNode + and fix some issues we have with libxml2 merging nodes */ +static xmlNodePtr +xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node) { xmlNodePtr retval ; @@ -159,22 +189,34 @@ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node) return retval ; } -/* :nodoc: */ -static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf) + +static void +raise_if_ancestor_of_self(xmlNodePtr self) +{ + for (xmlNodePtr ancestor = self->parent ; ancestor ; ancestor = ancestor->parent) { + if (self == ancestor) { + rb_raise(rb_eRuntimeError, "cycle detected: node '%s' is an ancestor of itself", self->name); + } + } +} + + +static VALUE +reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf) { VALUE reparented_obj ; - xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ; + xmlNodePtr reparentee, original_reparentee, pivot, reparented, next_text, new_next_text, parent ; int original_ns_prefix_is_default = 0 ; - if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) { + if (!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) { rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node"); } - if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) { + if (rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) { rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node"); } - Data_Get_Struct(reparentee_obj, xmlNode, reparentee); - Data_Get_Struct(pivot_obj, xmlNode, pivot); + Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee); + Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot); /* * Check if nodes given are appropriate to have a parent-child @@ -190,66 +232,66 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep if (parent) { switch (parent->type) { - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - switch (reparentee->type) { - case XML_ELEMENT_NODE: - case XML_PI_NODE: - case XML_COMMENT_NODE: - case XML_DOCUMENT_TYPE_NODE: - /* - * The DOM specification says no to adding text-like nodes - * directly to a document, but we allow it for compatibility. - */ - case XML_TEXT_NODE: - case XML_CDATA_SECTION_NODE: - case XML_ENTITY_REF_NODE: - goto ok; - default: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + switch (reparentee->type) { + case XML_ELEMENT_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + /* + * The DOM specification says no to adding text-like nodes + * directly to a document, but we allow it for compatibility. + */ + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + goto ok; + default: + break; + } break; - } - break; - case XML_DOCUMENT_FRAG_NODE: - case XML_ENTITY_REF_NODE: - case XML_ELEMENT_NODE: - switch (reparentee->type) { - case XML_ELEMENT_NODE: - case XML_PI_NODE: - case XML_COMMENT_NODE: - case XML_TEXT_NODE: - case XML_CDATA_SECTION_NODE: + case XML_DOCUMENT_FRAG_NODE: case XML_ENTITY_REF_NODE: - goto ok; - default: + case XML_ELEMENT_NODE: + switch (reparentee->type) { + case XML_ELEMENT_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + goto ok; + default: + break; + } + break; + case XML_ATTRIBUTE_NODE: + switch (reparentee->type) { + case XML_TEXT_NODE: + case XML_ENTITY_REF_NODE: + goto ok; + default: + break; + } break; - } - break; - case XML_ATTRIBUTE_NODE: - switch (reparentee->type) { case XML_TEXT_NODE: - case XML_ENTITY_REF_NODE: - goto ok; + /* + * xmlAddChild() breaks the DOM specification in that it allows + * adding a text node to another, in which case text nodes are + * coalesced, but since our JRuby version does not support such + * operation, we should inhibit it. + */ + break; default: break; - } - break; - case XML_TEXT_NODE: - /* - * xmlAddChild() breaks the DOM specification in that it allows - * adding a text node to another, in which case text nodes are - * coalesced, but since our JRuby version does not support such - * operation, we should inhibit it. - */ - break; - default: - break; } rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj)); } ok: - xmlUnlinkNode(reparentee); + original_reparentee = reparentee; if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) { /* @@ -290,7 +332,7 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep original_ns_prefix_is_default = 1; } - nokogiri_root_node(reparentee); + noko_xml_document_pin_node(reparentee); if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) { rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)"); @@ -301,11 +343,13 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep * issue #391, where new node's prefix may become the string "default" * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior. */ - xmlFree(reparentee->ns->prefix); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(reparentee->ns->prefix)); reparentee->ns->prefix = NULL; } } + xmlUnlinkNode(original_reparentee); + if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) { /* @@ -330,12 +374,12 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ; xmlUnlinkNode(next_text); - nokogiri_root_node(next_text); + noko_xml_document_pin_node(next_text); xmlAddNextSibling(pivot, new_next_text); } - if(!(reparented = (*prf)(pivot, reparentee))) { + if (!(reparented = (*prf)(pivot, reparentee))) { rb_raise(rb_eRuntimeError, "Could not reparent node"); } @@ -345,57 +389,432 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep * adjacent text nodes. */ DATA_PTR(reparentee_obj) = reparented ; + reparented_obj = noko_xml_node_wrap(Qnil, reparented); - relink_namespace(reparented); + rb_funcall(reparented_obj, id_decorate_bang, 0); - reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented); + /* if we've created a cycle, raise an exception */ + raise_if_ancestor_of_self(reparented); - rb_funcall(reparented_obj, decorate_bang, 0); + relink_namespace(reparented); return reparented_obj ; } +// :startdoc: /* - * call-seq: - * document + * :call-seq: + * add_namespace_definition(prefix, href) → Nokogiri::XML::Namespace + * add_namespace(prefix, href) → Nokogiri::XML::Namespace + * + * :category: Manipulating Document Structure + * + * Adds a namespace definition to this node with +prefix+ using +href+ value, as if this node had + * included an attribute "xmlns:prefix=href". + * + * A default namespace definition for this node can be added by passing +nil+ for +prefix+. + * + * [Parameters] + * - +prefix+ (String, +nil+) An {XML Name}[https://www.w3.org/TR/xml-names/#ns-decl] + * - +href+ (String) The {URI reference}[https://www.w3.org/TR/xml-names/#sec-namespaces] + * + * [Returns] The new Nokogiri::XML::Namespace + * + * *Example:* adding a non-default namespace definition + * + * doc = Nokogiri::XML("") + * inventory = doc.at_css("inventory") + * inventory.add_namespace_definition("automobile", "http://alices-autos.com/") + * inventory.add_namespace_definition("bicycle", "http://bobs-bikes.com/") + * inventory.add_child("Michelin model XGV, size 75R") + * doc.to_xml + * # => "\n" + + * # "\n" + + * # " \n" + + * # " Michelin model XGV, size 75R\n" + + * # " \n" + + * # "\n" + * + * *Example:* adding a default namespace definition + * + * doc = Nokogiri::XML("Michelin model XGV, size 75R") + * doc.at_css("tire").add_namespace_definition(nil, "http://bobs-bikes.com/") + * doc.to_xml + * # => "\n" + + * # "\n" + + * # " \n" + + * # " Michelin model XGV, size 75R\n" + + * # " \n" + + * # "\n" + * + */ +static VALUE +rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_href) +{ + xmlNodePtr c_node, element; + xmlNsPtr c_namespace; + const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix)); + + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); + element = c_node ; + + c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix); + + if (!c_namespace) { + if (c_node->type != XML_ELEMENT_NODE) { + element = c_node->parent; + } + c_namespace = xmlNewNs(element, (const xmlChar *)StringValueCStr(rb_href), c_prefix); + } + + if (!c_namespace) { + return Qnil ; + } + + if (NIL_P(rb_prefix) || c_node != element) { + xmlSetNs(c_node, c_namespace); + } + + return noko_xml_namespace_wrap(c_namespace, c_node->doc); +} + + +/* + * :call-seq: attribute(name) → Nokogiri::XML::Attr + * + * :category: Working With Node Attributes + * + * [Returns] Attribute (Nokogiri::XML::Attr) belonging to this node with name +name+. + * + * ⚠ Note that attribute namespaces are ignored and only the simple (non-namespace-prefixed) name is + * used to find a matching attribute. In case of a simple name collision, only one of the matching + * attributes will be returned. In this case, you will need to use #attribute_with_ns. + * + * *Example:* + * + * doc = Nokogiri::XML("") + * child = doc.at_css("child") + * child.attribute("size") # => # + * child.attribute("class") # => # + * + * *Example* showing that namespaced attributes will not be returned: + * + * ⚠ Note that only one of the two matching attributes is returned. + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * EOF + * doc.at_css("child").attribute("size") + * # => #(Attr:0x550 { + * # name = "size", + * # namespace = #(Namespace:0x564 { + * # prefix = "width", + * # href = "http://example.com/widths" + * # }), + * # value = "broad" + * # }) + */ +static VALUE +rb_xml_node_attribute(VALUE self, VALUE name) +{ + xmlNodePtr node; + xmlAttrPtr prop; + Noko_Node_Get_Struct(self, xmlNode, node); + prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name)); + + if (! prop) { return Qnil; } + return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop); +} + + +/* + * :call-seq: attribute_nodes() → Array + * + * :category: Working With Node Attributes + * + * [Returns] Attributes (an Array of Nokogiri::XML::Attr) belonging to this node. + * + * Note that this is the preferred alternative to #attributes when the simple + * (non-namespace-prefixed) attribute names may collide. + * + * *Example:* + * + * Contrast this with the colliding-name example from #attributes. + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * EOF + * doc.at_css("child").attribute_nodes + * # => [#(Attr:0x550 { + * # name = "size", + * # namespace = #(Namespace:0x564 { + * # prefix = "width", + * # href = "http://example.com/widths" + * # }), + * # value = "broad" + * # }), + * # #(Attr:0x578 { + * # name = "size", + * # namespace = #(Namespace:0x58c { + * # prefix = "height", + * # href = "http://example.com/heights" + * # }), + * # value = "tall" + * # })] + */ +static VALUE +rb_xml_node_attribute_nodes(VALUE rb_node) +{ + xmlNodePtr c_node; + + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); + + return noko_xml_node_attrs(c_node); +} + + +/* + * :call-seq: attribute_with_ns(name, namespace) → Nokogiri::XML::Attr + * + * :category: Working With Node Attributes + * + * [Returns] + * Attribute (Nokogiri::XML::Attr) belonging to this node with matching +name+ and +namespace+. + * + * [Parameters] + * - +name+ (String): the simple (non-namespace-prefixed) name of the attribute + * - +namespace+ (String): the URI of the attribute's namespace + * + * See related: #attribute + * + * *Example:* + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * EOF + * doc.at_css("child").attribute_with_ns("size", "http://example.com/widths") + * # => #(Attr:0x550 { + * # name = "size", + * # namespace = #(Namespace:0x564 { + * # prefix = "width", + * # href = "http://example.com/widths" + * # }), + * # value = "broad" + * # }) + * doc.at_css("child").attribute_with_ns("size", "http://example.com/heights") + * # => #(Attr:0x578 { + * # name = "size", + * # namespace = #(Namespace:0x58c { + * # prefix = "height", + * # href = "http://example.com/heights" + * # }), + * # value = "tall" + * # }) + */ +static VALUE +rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace) +{ + xmlNodePtr node; + xmlAttrPtr prop; + Noko_Node_Get_Struct(self, xmlNode, node); + prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name), + NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace)); + + if (! prop) { return Qnil; } + return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop); +} + + + +/* + * call-seq: blank? → Boolean + * + * [Returns] +true+ if the node is an empty or whitespace-only text or cdata node, else +false+. + * + * *Example:* + * + * Nokogiri("").root.child.blank? # => false + * Nokogiri("\t \n").root.child.blank? # => true + * Nokogiri("").root.child.blank? # => true + * Nokogiri("not-blank").root.child + * .tap { |n| n.content = "" }.blank # => true + */ +static VALUE +rb_xml_node_blank_eh(VALUE self) +{ + xmlNodePtr node; + Noko_Node_Get_Struct(self, xmlNode, node); + return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ; +} + + +/* + * :call-seq: child() → Nokogiri::XML::Node + * + * :category: Traversing Document Structure + * + * [Returns] First of this node's children, or +nil+ if there are no children + * + * This is a convenience method and is equivalent to: + * + * node.children.first + * + * See related: #children + */ +static VALUE +rb_xml_node_child(VALUE self) +{ + xmlNodePtr node, child; + Noko_Node_Get_Struct(self, xmlNode, node); + + child = node->children; + if (!child) { return Qnil; } + + return noko_xml_node_wrap(Qnil, child); +} + + +/* + * :call-seq: children() → Nokogiri::XML::NodeSet + * + * :category: Traversing Document Structure + * + * [Returns] Nokogiri::XML::NodeSet containing this node's children. + */ +static VALUE +rb_xml_node_children(VALUE self) +{ + xmlNodePtr node; + xmlNodePtr child; + xmlNodeSetPtr set; + VALUE document; + VALUE node_set; + + Noko_Node_Get_Struct(self, xmlNode, node); + + child = node->children; + set = xmlXPathNodeSetCreate(child); + + document = DOC_RUBY_OBJECT(node->doc); + + if (!child) { return noko_xml_node_set_wrap(set, document); } + + child = child->next; + while (NULL != child) { + xmlXPathNodeSetAddUnique(set, child); + child = child->next; + } + + node_set = noko_xml_node_set_wrap(set, document); + + return node_set; +} + + +/* + * :call-seq: + * content() → String + * inner_text() → String + * text() → String + * to_str() → String + * + * [Returns] + * Contents of all the text nodes in this node's subtree, concatenated together into a single + * String. + * + * ⚠ Note that entities will _always_ be expanded in the returned String. + * + * See related: #inner_html + * + * *Example* of how entities are handled: + * + * Note that < becomes < in the returned String. + * + * doc = Nokogiri::XML.fragment("a < b") + * doc.at_css("child").content + * # => "a < b" + * + * *Example* of how a subtree is handled: + * + * Note that the tags are omitted and only the text node contents are returned, + * concatenated into a single string. + * + * doc = Nokogiri::XML.fragment("first second") + * doc.at_css("child").content + * # => "first second" + */ +static VALUE +rb_xml_node_content(VALUE self) +{ + xmlNodePtr node; + xmlChar *content; + + Noko_Node_Get_Struct(self, xmlNode, node); + + content = xmlNodeGetContent(node); + if (content) { + VALUE rval = NOKOGIRI_STR_NEW2(content); + xmlFree(content); + return rval; + } + return Qnil; +} + + +/* + * :call-seq: document() → Nokogiri::XML::Document + * + * :category: Traversing Document Structure * - * Get the document for this Node + * [Returns] Parent Nokogiri::XML::Document for this node */ -static VALUE document(VALUE self) +static VALUE +rb_xml_node_document(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); return DOC_RUBY_OBJECT(node->doc); } /* - * call-seq: - * pointer_id + * :call-seq: pointer_id() → Integer * - * Get the internal pointer number + * [Returns] + * A unique id for this node based on the internal memory structures. This method is used by #== + * to determine node identity. */ -static VALUE pointer_id(VALUE self) +static VALUE +rb_xml_node_pointer_id(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); - return INT2NUM((long)(node)); + return rb_uint2inum((uintptr_t)(node)); } /* - * call-seq: - * encode_special_chars(string) + * :call-seq: encode_special_chars(string) → String * * Encode any special characters in +string+ */ -static VALUE encode_special_chars(VALUE self, VALUE string) +static VALUE +encode_special_chars(VALUE self, VALUE string) { xmlNodePtr node; xmlChar *encoded; VALUE encoded_str; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); encoded = xmlEncodeSpecialChars( node->doc, (const xmlChar *)StringValueCStr(string) @@ -408,8 +827,8 @@ static VALUE encode_special_chars(VALUE self, VALUE string) } /* - * call-seq: - * create_internal_subset(name, external_id, system_id) + * :call-seq: + * create_internal_subset(name, external_id, system_id) * * Create the internal subset of a document. * @@ -419,17 +838,18 @@ static VALUE encode_special_chars(VALUE self, VALUE string) * doc.create_internal_subset("chapter", nil, "chapter.dtd") * # => */ -static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id) +static VALUE +create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id) { xmlNodePtr node; xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = node->doc; - if(xmlGetIntSubset(doc)) { + if (xmlGetIntSubset(doc)) { rb_raise(rb_eRuntimeError, "Document already has an internal subset"); } @@ -440,28 +860,29 @@ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, V NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id) ); - if(!dtd) { return Qnil; } + if (!dtd) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd); + return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd); } /* - * call-seq: - * create_external_subset(name, external_id, system_id) + * :call-seq: + * create_external_subset(name, external_id, system_id) * * Create an external subset */ -static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id) +static VALUE +create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id) { xmlNodePtr node; xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = node->doc; - if(doc->extSubset) { + if (doc->extSubset) { rb_raise(rb_eRuntimeError, "Document already has an external subset"); } @@ -472,72 +893,78 @@ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, V NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id) ); - if(!dtd) { return Qnil; } + if (!dtd) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd); + return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd); } /* - * call-seq: - * external_subset + * :call-seq: + * external_subset() * * Get the external subset */ -static VALUE external_subset(VALUE self) +static VALUE +external_subset(VALUE self) { xmlNodePtr node; xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); - if(!node->doc) { return Qnil; } + if (!node->doc) { return Qnil; } doc = node->doc; dtd = doc->extSubset; - if(!dtd) { return Qnil; } + if (!dtd) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd); + return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd); } /* - * call-seq: - * internal_subset + * :call-seq: + * internal_subset() * * Get the internal subset */ -static VALUE internal_subset(VALUE self) +static VALUE +internal_subset(VALUE self) { xmlNodePtr node; xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); - if(!node->doc) { return Qnil; } + if (!node->doc) { return Qnil; } doc = node->doc; dtd = xmlGetIntSubset(doc); - if(!dtd) { return Qnil; } + if (!dtd) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd); + return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd); } /* - * call-seq: - * dup - * dup(depth) - * dup(depth, new_parent_doc) + * :call-seq: + * dup → Nokogiri::XML::Node + * dup(depth) → Nokogiri::XML::Node + * dup(depth, new_parent_doc) → Nokogiri::XML::Node * * Copy this node. - * An optional depth may be passed in. 0 is a shallow copy, 1 (the default) is a deep copy. - * An optional new_parent_doc may also be passed in, which will be the new - * node's parent document. Defaults to the current node's document. - * current document. + * + * [Parameters] + * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy. + * - +new_parent_doc+ + * The new node's parent Document. Defaults to the this node's document. + * + * [Returns] The new Nokgiri::XML::Node */ -static VALUE duplicate_node(int argc, VALUE *argv, VALUE self) +static VALUE +duplicate_node(int argc, VALUE *argv, VALUE self) { VALUE r_level, r_new_parent_doc; int level; @@ -545,7 +972,7 @@ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self) xmlDocPtr new_parent_doc; xmlNodePtr node, dup; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc); @@ -561,40 +988,29 @@ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self) } dup = xmlDocCopyNode(node, new_parent_doc, level); - if(dup == NULL) { return Qnil; } + if (dup == NULL) { return Qnil; } - nokogiri_root_node(dup); + noko_xml_document_pin_node(dup); - return Nokogiri_wrap_xml_node(rb_obj_class(self), dup); + return noko_xml_node_wrap(rb_obj_class(self), dup); } /* - * call-seq: - * unlink + * :call-seq: + * unlink() → self * * Unlink this node from its current context. */ -static VALUE unlink_node(VALUE self) +static VALUE +unlink_node(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlUnlinkNode(node); - nokogiri_root_node(node); + noko_xml_document_pin_node(node); return self; } -/* - * call-seq: - * blank? - * - * Is this node blank? - */ -static VALUE blank_eh(VALUE self) -{ - xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ; -} /* * call-seq: @@ -602,15 +1018,16 @@ static VALUE blank_eh(VALUE self) * * Returns the next sibling node */ -static VALUE next_sibling(VALUE self) +static VALUE +next_sibling(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = node->next; - if(!sibling) { return Qnil; } + if (!sibling) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, sibling) ; + return noko_xml_node_wrap(Qnil, sibling) ; } /* @@ -619,15 +1036,16 @@ static VALUE next_sibling(VALUE self) * * Returns the previous sibling node */ -static VALUE previous_sibling(VALUE self) +static VALUE +previous_sibling(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = node->prev; - if(!sibling) { return Qnil; } + if (!sibling) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, sibling); + return noko_xml_node_wrap(Qnil, sibling); } /* @@ -636,15 +1054,16 @@ static VALUE previous_sibling(VALUE self) * * Returns the next Nokogiri::XML::Element type sibling node. */ -static VALUE next_element(VALUE self) +static VALUE +next_element(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = xmlNextElementSibling(node); - if(!sibling) { return Qnil; } + if (!sibling) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, sibling); + return noko_xml_node_wrap(Qnil, sibling); } /* @@ -653,82 +1072,60 @@ static VALUE next_element(VALUE self) * * Returns the previous Nokogiri::XML::Element type sibling node. */ -static VALUE previous_element(VALUE self) +static VALUE +previous_element(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); /* * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7. */ sibling = node->prev; - if(!sibling) { return Qnil; } + if (!sibling) { return Qnil; } - while(sibling && sibling->type != XML_ELEMENT_NODE) { + while (sibling && sibling->type != XML_ELEMENT_NODE) { sibling = sibling->prev; } - return sibling ? Nokogiri_wrap_xml_node(Qnil, sibling) : Qnil ; + return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ; } /* :nodoc: */ -static VALUE replace(VALUE self, VALUE new_node) +static VALUE +replace(VALUE self, VALUE new_node) { VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper); xmlNodePtr pivot; - Data_Get_Struct(self, xmlNode, pivot); - nokogiri_root_node(pivot); + Noko_Node_Get_Struct(self, xmlNode, pivot); + noko_xml_document_pin_node(pivot); return reparent; } /* - * call-seq: - * children + * :call-seq: + * element_children() → NodeSet + * elements() → NodeSet * - * Get the list of children for this node as a NodeSet - */ -static VALUE children(VALUE self) -{ - xmlNodePtr node; - xmlNodePtr child; - xmlNodeSetPtr set; - VALUE document; - VALUE node_set; - - Data_Get_Struct(self, xmlNode, node); - - child = node->children; - set = xmlXPathNodeSetCreate(child); - - document = DOC_RUBY_OBJECT(node->doc); - - if(!child) { return Nokogiri_wrap_xml_node_set(set, document); } - - child = child->next; - while(NULL != child) { - xmlXPathNodeSetAddUnique(set, child); - child = child->next; - } - - node_set = Nokogiri_wrap_xml_node_set(set, document); - - return node_set; -} - -/* - * call-seq: - * element_children + * [Returns] + * The node's child elements as a NodeSet. Only children that are elements will be returned, which + * notably excludes Text nodes. * - * Get the list of children for this node as a NodeSet. All nodes will be - * element nodes. + * *Example:* * - * Example: + * Note that #children returns the Text node "hello" while #element_children does not. * - * @doc.root.element_children.all? { |x| x.element? } # => true + * div = Nokogiri::HTML5("

    helloworld").at_css("div") + * div.element_children + * # => [#]>] + * div.children + * # => [#, + * # #]>] */ -static VALUE element_children(VALUE self) +static VALUE +rb_xml_node_element_children(VALUE self) { xmlNodePtr node; xmlNodePtr child; @@ -736,83 +1133,78 @@ static VALUE element_children(VALUE self) VALUE document; VALUE node_set; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlFirstElementChild(node); set = xmlXPathNodeSetCreate(child); document = DOC_RUBY_OBJECT(node->doc); - if(!child) { return Nokogiri_wrap_xml_node_set(set, document); } - - child = xmlNextElementSibling(child); - while(NULL != child) { - xmlXPathNodeSetAddUnique(set, child); - child = xmlNextElementSibling(child); - } - - node_set = Nokogiri_wrap_xml_node_set(set, document); + if (!child) { return noko_xml_node_set_wrap(set, document); } - return node_set; -} - -/* - * call-seq: - * child - * - * Returns the child node - */ -static VALUE child(VALUE self) -{ - xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); + child = xmlNextElementSibling(child); + while (NULL != child) { + xmlXPathNodeSetAddUnique(set, child); + child = xmlNextElementSibling(child); + } - child = node->children; - if(!child) { return Qnil; } + node_set = noko_xml_node_set_wrap(set, document); - return Nokogiri_wrap_xml_node(Qnil, child); + return node_set; } /* - * call-seq: - * first_element_child + * :call-seq: + * first_element_child() → Node * - * Returns the first child node of this node that is an element. + * [Returns] The first child Node that is an element. * - * Example: + * *Example:* * - * @doc.root.first_element_child.element? # => true + * Note that the "hello" child, which is a Text node, is skipped and the element is + * returned. + * + * div = Nokogiri::HTML5("
    helloworld").at_css("div") + * div.first_element_child + * # => #(Element:0x3c { name = "span", children = [ #(Text "world")] }) */ -static VALUE first_element_child(VALUE self) +static VALUE +rb_xml_node_first_element_child(VALUE self) { xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlFirstElementChild(node); - if(!child) { return Qnil; } + if (!child) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, child); + return noko_xml_node_wrap(Qnil, child); } /* - * call-seq: - * last_element_child + * :call-seq: + * last_element_child() → Node * - * Returns the last child node of this node that is an element. + * [Returns] The last child Node that is an element. * - * Example: + * *Example:* * - * @doc.root.last_element_child.element? # => true + * Note that the "hello" child, which is a Text node, is skipped and the yes + * element is returned. + * + * div = Nokogiri::HTML5("
    noyesskip
    ").at_css("div") + * div.last_element_child + * # => #(Element:0x3c { name = "span", children = [ #(Text "yes")] }) */ -static VALUE last_element_child(VALUE self) +static VALUE +rb_xml_node_last_element_child(VALUE self) { xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlLastElementChild(node); - if(!child) { return Qnil; } + if (!child) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, child); + return noko_xml_node_wrap(Qnil, child); } /* @@ -821,11 +1213,12 @@ static VALUE last_element_child(VALUE self) * * Returns true if +attribute+ is set */ -static VALUE key_eh(VALUE self, VALUE attribute) +static VALUE +key_eh(VALUE self, VALUE attribute) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - if(xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) { + Noko_Node_Get_Struct(self, xmlNode, node); + if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) { return Qtrue; } return Qfalse; @@ -837,12 +1230,13 @@ static VALUE key_eh(VALUE self, VALUE attribute) * * Returns true if +attribute+ is set with +namespace+ */ -static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace) +static VALUE +namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - if(xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute), - NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) { + Noko_Node_Get_Struct(self, xmlNode, node); + if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute), + NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) { return Qtrue; } return Qfalse; @@ -854,11 +1248,12 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace) * * Set the +property+ to +value+ */ -static VALUE set(VALUE self, VALUE property, VALUE value) +static VALUE +set(VALUE self, VALUE property, VALUE value) { xmlNodePtr node, cur; xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); /* If a matching attribute node already exists, then xmlSetProp will destroy * the existing node's children. However, if Nokogiri has a node object @@ -867,13 +1262,13 @@ static VALUE set(VALUE self, VALUE property, VALUE value) * We can avoid this by unlinking these nodes first. */ if (node->type != XML_ELEMENT_NODE) { - return(Qnil); + return (Qnil); } prop = xmlHasProp(node, (xmlChar *)StringValueCStr(property)); if (prop && prop->children) { for (cur = prop->children; cur; cur = cur->next) { if (cur->_private) { - nokogiri_root_node(cur); + noko_xml_document_pin_node(cur); xmlUnlinkNode(cur); } } @@ -891,7 +1286,8 @@ static VALUE set(VALUE self, VALUE property, VALUE value) * * Get the value for +attribute+ */ -static VALUE get(VALUE self, VALUE rattribute) +static VALUE +get(VALUE self, VALUE rattribute) { xmlNodePtr node; xmlChar *value = 0; @@ -902,10 +1298,10 @@ static VALUE get(VALUE self, VALUE rattribute) if (NIL_P(rattribute)) { return Qnil; } - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); attribute = xmlCharStrdup(StringValueCStr(rattribute)); - colon = (xmlChar *)(uintptr_t)xmlStrchr(attribute, (const xmlChar)':'); + colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':')); if (colon) { /* split the attribute string into separate prefix and name by * null-terminating the prefix at the colon */ @@ -917,7 +1313,7 @@ static VALUE get(VALUE self, VALUE rattribute) if (ns) { value = xmlGetNsProp(node, attr_name, ns->href); } else { - value = xmlGetProp(node, (xmlChar*)StringValueCStr(rattribute)); + value = xmlGetProp(node, (xmlChar *)StringValueCStr(rattribute)); } } else { value = xmlGetNoNsProp(node, attribute); @@ -938,15 +1334,16 @@ static VALUE get(VALUE self, VALUE rattribute) * * Set the namespace to +namespace+ */ -static VALUE set_namespace(VALUE self, VALUE namespace) +static VALUE +set_namespace(VALUE self, VALUE namespace) { xmlNodePtr node; xmlNsPtr ns = NULL; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); - if(!NIL_P(namespace)) { - Data_Get_Struct(namespace, xmlNs, ns); + if (!NIL_P(namespace)) { + Noko_Namespace_Get_Struct(namespace, xmlNs, ns); } xmlSetNs(node, ns); @@ -955,138 +1352,140 @@ static VALUE set_namespace(VALUE self, VALUE namespace) } /* - * call-seq: - * attribute(name) + * :call-seq: + * namespace() → Namespace * - * Get the attribute node with +name+ - */ -static VALUE attr(VALUE self, VALUE name) -{ - xmlNodePtr node; - xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); - prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name)); - - if(! prop) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop); -} - -/* - * call-seq: - * attribute_with_ns(name, namespace) + * [Returns] The Namespace of the element or attribute node, or +nil+ if there is no namespace. * - * Get the attribute node with +name+ and +namespace+ - */ -static VALUE attribute_with_ns(VALUE self, VALUE name, VALUE namespace) -{ - xmlNodePtr node; - xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); - prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name), - NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace)); - - if(! prop) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop); -} - -/* - * call-seq: - * attribute_nodes() + * *Example:* * - * returns a list containing the Node attributes. + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * + * + * EOF + * doc.at_xpath("//first").namespace + * # => nil + * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace + * # => #(Namespace:0x3c { href = "http://example.com/child" }) + * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace + * # => #(Namespace:0x50 { prefix = "foo", href = "http://example.com/foo" }) */ -static VALUE attribute_nodes(VALUE self) +static VALUE +rb_xml_node_namespace(VALUE rb_node) { - /* this code in the mode of xmlHasProp() */ - xmlNodePtr node; - VALUE attr; + xmlNodePtr c_node ; + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); - Data_Get_Struct(self, xmlNode, node); - - attr = rb_ary_new(); - Nokogiri_xml_node_properties(node, attr); + if (c_node->ns) { + return noko_xml_namespace_wrap(c_node->ns, c_node->doc); + } - return attr ; + return Qnil ; } - /* - * call-seq: - * namespace() + * :call-seq: + * namespace_definitions() → Array * - * returns the namespace of the element or attribute node as a Namespace - * object, or nil if there is no namespace for the element or attribute. - */ -static VALUE namespace(VALUE self) -{ -xmlNodePtr node ; -Data_Get_Struct(self, xmlNode, node); - -if (node->ns) { - return Nokogiri_wrap_xml_namespace(node->doc, node->ns); -} - -return Qnil ; -} - -/* - * call-seq: - * namespace_definitions() + * [Returns] + * Namespaces that are defined directly on this node, as an Array of Namespace objects. The array + * will be empty if no namespaces are defined on this node. * - * returns namespaces defined on self element directly, as an array of Namespace objects. Includes both a default namespace (as in"xmlns="), and prefixed namespaces (as in "xmlns:prefix="). + * *Example:* + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * + * + * EOF + * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_definitions + * # => [] + * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace_definitions + * # => [#(Namespace:0x3c { href = "http://example.com/child" }), + * # #(Namespace:0x50 { + * # prefix = "unused", + * # href = "http://example.com/unused" + * # })] + * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace_definitions + * # => [#(Namespace:0x64 { prefix = "foo", href = "http://example.com/foo" })] */ -static VALUE namespace_definitions(VALUE self) +static VALUE +namespace_definitions(VALUE rb_node) { /* this code in the mode of xmlHasProp() */ - xmlNodePtr node ; - VALUE list; - xmlNsPtr ns; - - Data_Get_Struct(self, xmlNode, node); + xmlNodePtr c_node ; + xmlNsPtr c_namespace; + VALUE definitions = rb_ary_new(); - list = rb_ary_new(); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); - ns = node->nsDef; - - if(!ns) { return list; } + c_namespace = c_node->nsDef; + if (!c_namespace) { + return definitions; + } - while(NULL != ns) { - rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns)); - ns = ns->next; + while (c_namespace != NULL) { + rb_ary_push(definitions, noko_xml_namespace_wrap(c_namespace, c_node->doc)); + c_namespace = c_namespace->next; } - return list; + return definitions; } /* - * call-seq: - * namespace_scopes() + * :call-seq: + * namespace_scopes() → Array * - * returns namespaces in scope for self -- those defined on self element - * directly or any ancestor node -- as an array of Namespace objects. Default - * namespaces ("xmlns=" style) for self are included in this array; Default - * namespaces for ancestors, however, are not. See also #namespaces + * [Returns] Array of all the Namespaces on this node and its ancestors. + * + * See also #namespaces + * + * *Example:* + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * + * + * EOF + * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes + * # => [#(Namespace:0x3c { href = "http://example.com/root" }), + * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })] + * doc.at_xpath("//child:second", "child" => "http://example.com/child").namespace_scopes + * # => [#(Namespace:0x64 { href = "http://example.com/child" }), + * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })] + * doc.at_xpath("//root:third", "root" => "http://example.com/root").namespace_scopes + * # => [#(Namespace:0x78 { prefix = "foo", href = "http://example.com/foo" }), + * # #(Namespace:0x3c { href = "http://example.com/root" }), + * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })] */ -static VALUE namespace_scopes(VALUE self) +static VALUE +rb_xml_node_namespace_scopes(VALUE rb_node) { - xmlNodePtr node ; - VALUE list; - xmlNsPtr *ns_list; + xmlNodePtr c_node ; + xmlNsPtr *namespaces; + VALUE scopes = rb_ary_new(); int j; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); - list = rb_ary_new(); - ns_list = xmlGetNsList(node->doc, node); - - if(!ns_list) { return list; } + namespaces = xmlGetNsList(c_node->doc, c_node); + if (!namespaces) { + return scopes; + } - for (j = 0 ; ns_list[j] != NULL ; ++j) { - rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns_list[j])); + for (j = 0 ; namespaces[j] != NULL ; ++j) { + rb_ary_push(scopes, noko_xml_namespace_wrap(namespaces[j], c_node->doc)); } - xmlFree(ns_list); - return list; + xmlFree(namespaces); + return scopes; } /* @@ -1095,11 +1494,12 @@ static VALUE namespace_scopes(VALUE self) * * Get the type for this Node */ -static VALUE node_type(VALUE self) +static VALUE +node_type(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - return INT2NUM((long)node->type); + Noko_Node_Get_Struct(self, xmlNode, node); + return INT2NUM(node->type); } /* @@ -1108,16 +1508,17 @@ static VALUE node_type(VALUE self) * * Set the content for this Node */ -static VALUE set_native_content(VALUE self, VALUE content) +static VALUE +set_native_content(VALUE self, VALUE content) { xmlNodePtr node, child, next ; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = node->children; while (NULL != child) { next = child->next ; xmlUnlinkNode(child) ; - nokogiri_root_node(child); + noko_xml_document_pin_node(child); child = next ; } @@ -1125,42 +1526,20 @@ static VALUE set_native_content(VALUE self, VALUE content) return content; } -/* - * call-seq: - * content - * - * Returns the plaintext content for this Node. Note that entities will always - * be expanded in the returned string. - */ -static VALUE get_native_content(VALUE self) -{ - xmlNodePtr node; - xmlChar * content; - - Data_Get_Struct(self, xmlNode, node); - - content = xmlNodeGetContent(node); - if(content) { - VALUE rval = NOKOGIRI_STR_NEW2(content); - xmlFree(content); - return rval; - } - return Qnil; -} - /* * call-seq: * lang= * * Set the language of a node, i.e. the values of the xml:lang attribute. */ -static VALUE set_lang(VALUE self_rb, VALUE lang_rb) +static VALUE +set_lang(VALUE self_rb, VALUE lang_rb) { xmlNodePtr self ; - xmlChar* lang ; + xmlChar *lang ; - Data_Get_Struct(self_rb, xmlNode, self); - lang = (xmlChar*)StringValueCStr(lang_rb); + Noko_Node_Get_Struct(self_rb, xmlNode, self); + lang = (xmlChar *)StringValueCStr(lang_rb); xmlNodeSetLang(self, lang); @@ -1174,13 +1553,14 @@ static VALUE set_lang(VALUE self_rb, VALUE lang_rb) * Searches the language of a node, i.e. the values of the xml:lang attribute or * the one carried by the nearest ancestor. */ -static VALUE get_lang(VALUE self_rb) +static VALUE +get_lang(VALUE self_rb) { xmlNodePtr self ; - xmlChar* lang ; + xmlChar *lang ; VALUE lang_rb ; - Data_Get_Struct(self_rb, xmlNode, self); + Noko_Node_Get_Struct(self_rb, xmlNode, self); lang = xmlNodeGetLang(self); if (lang) { @@ -1193,7 +1573,8 @@ static VALUE get_lang(VALUE self_rb) } /* :nodoc: */ -static VALUE add_child(VALUE self, VALUE new_child) +static VALUE +add_child(VALUE self, VALUE new_child) { return reparent_node_with(self, new_child, xmlAddChild); } @@ -1204,15 +1585,16 @@ static VALUE add_child(VALUE self, VALUE new_child) * * Get the parent Node for this Node */ -static VALUE get_parent(VALUE self) +static VALUE +get_parent(VALUE self) { xmlNodePtr node, parent; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); parent = node->parent; - if(!parent) { return Qnil; } + if (!parent) { return Qnil; } - return Nokogiri_wrap_xml_node(Qnil, parent) ; + return noko_xml_node_wrap(Qnil, parent) ; } /* @@ -1221,11 +1603,12 @@ static VALUE get_parent(VALUE self) * * Set the name for this Node */ -static VALUE set_name(VALUE self, VALUE new_name) +static VALUE +set_name(VALUE self, VALUE new_name) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - xmlNodeSetName(node, (xmlChar*)StringValueCStr(new_name)); + Noko_Node_Get_Struct(self, xmlNode, node); + xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name)); return new_name; } @@ -1235,11 +1618,12 @@ static VALUE set_name(VALUE self, VALUE new_name) * * Returns the name for this Node */ -static VALUE get_name(VALUE self) +static VALUE +get_name(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - if(node->name) { + Noko_Node_Get_Struct(self, xmlNode, node); + if (node->name) { return NOKOGIRI_STR_NEW2(node->name); } return Qnil; @@ -1251,28 +1635,39 @@ static VALUE get_name(VALUE self) * * Returns the path associated with this Node */ -static VALUE path(VALUE self) +static VALUE +rb_xml_node_path(VALUE rb_node) { - xmlNodePtr node; - xmlChar *path ; + xmlNodePtr c_node; + xmlChar *c_path ; VALUE rval; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); + + c_path = xmlGetNodePath(c_node); + if (c_path == NULL) { + // see https://github.com/sparklemotion/nokogiri/issues/2250 + // this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we + // do this for now to preserve the behavior across libxml2 versions. + rval = NOKOGIRI_STR_NEW2("?"); + } else { + rval = NOKOGIRI_STR_NEW2(c_path); + xmlFree(c_path); + } - path = xmlGetNodePath(node); - rval = NOKOGIRI_STR_NEW2(path); - xmlFree(path); return rval ; } /* :nodoc: */ -static VALUE add_next_sibling(VALUE self, VALUE new_sibling) +static VALUE +add_next_sibling(VALUE self, VALUE new_sibling) { return reparent_node_with(self, new_sibling, xmlAddNextSibling) ; } /* :nodoc: */ -static VALUE add_previous_sibling(VALUE self, VALUE new_sibling) +static VALUE +add_previous_sibling(VALUE self, VALUE new_sibling) { return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ; } @@ -1283,7 +1678,8 @@ static VALUE add_previous_sibling(VALUE self, VALUE new_sibling) * * Write this Node to +io+ with +encoding+ and +options+ */ -static VALUE native_write_to( +static VALUE +native_write_to( VALUE self, VALUE io, VALUE encoding, @@ -1292,10 +1688,10 @@ static VALUE native_write_to( ) { xmlNodePtr node; - const char * before_indent; + const char *before_indent; xmlSaveCtxtPtr savectx; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlIndentTreeOutput = 1; @@ -1304,8 +1700,8 @@ static VALUE native_write_to( xmlTreeIndentString = StringValueCStr(indent_string); savectx = xmlSaveToIO( - (xmlOutputWriteCallback)io_write_callback, - (xmlOutputCloseCallback)io_close_callback, + (xmlOutputWriteCallback)noko_io_write, + (xmlOutputCloseCallback)noko_io_close, (void *)io, RTEST(encoding) ? StringValueCStr(encoding) : NULL, (int)NUM2INT(options) @@ -1318,93 +1714,366 @@ static VALUE native_write_to( return io; } -/* - * call-seq: - * line - * - * Returns the line for this Node - */ -static VALUE line(VALUE self) + +static inline void +output_partial_string(VALUE out, char const *str, size_t length) { - xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + if (length) { + rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding()); + } +} - return INT2NUM(xmlGetLineNo(node)); +static inline void +output_char(VALUE out, char ch) +{ + output_partial_string(out, &ch, 1); } -/* - * call-seq: - * add_namespace_definition(prefix, href) - * - * Adds a namespace definition with +prefix+ using +href+ value. The result is - * as if parsed XML for this node had included an attribute - * 'xmlns:prefix=value'. A default namespace for this node ("xmlns=") can be - * added by passing 'nil' for prefix. Namespaces added this way will not - * show up in #attributes, but they will be included as an xmlns attribute - * when the node is serialized to XML. - */ -static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href) +static inline void +output_string(VALUE out, char const *str) { - xmlNodePtr node, namespace; - xmlNsPtr ns; + output_partial_string(out, str, strlen(str)); +} + +static inline void +output_tagname(VALUE out, xmlNodePtr elem) +{ + // Elements in the HTML, MathML, and SVG namespaces do not use a namespace + // prefix in the HTML syntax. + char const *name = (char const *)elem->name; + xmlNsPtr ns = elem->ns; + if (ns && ns->href && ns->prefix + && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml") + && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML") + && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) { + output_string(out, (char const *)elem->ns->prefix); + output_char(out, ':'); + char const *colon = strchr(name, ':'); + if (colon) { + name = colon + 1; + } + } + output_string(out, name); +} + +static inline void +output_attr_name(VALUE out, xmlAttrPtr attr) +{ + xmlNsPtr ns = attr->ns; + char const *name = (char const *)attr->name; + if (ns && ns->href) { + char const *uri = (char const *)ns->href; + char const *localname = strchr(name, ':'); + if (localname) { + ++localname; + } else { + localname = name; + } + + if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) { + output_string(out, "xml:"); + name = localname; + } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) { + // xmlns:xmlns -> xmlns + // xmlns:foo -> xmlns:foo + if (strcmp(localname, "xmlns")) { + output_string(out, "xmlns:"); + } + name = localname; + } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) { + output_string(out, "xlink:"); + name = localname; + } else if (ns->prefix) { + output_string(out, (char const *)ns->prefix); + output_char(out, ':'); + name = localname; + } + } + output_string(out, name); +} + +static void +output_escaped_string(VALUE out, xmlChar const *start, bool attr) +{ + xmlChar const *next = start; + int ch; + + while ((ch = *next) != 0) { + char const *replacement = NULL; + size_t replaced_bytes = 1; + if (ch == '&') { + replacement = "&"; + } else if (ch == 0xC2 && next[1] == 0xA0) { + // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0. + replacement = " "; + replaced_bytes = 2; + } else if (attr && ch == '"') { + replacement = """; + } else if (!attr && ch == '<') { + replacement = "<"; + } else if (!attr && ch == '>') { + replacement = ">"; + } else { + ++next; + continue; + } + output_partial_string(out, (char const *)start, next - start); + output_string(out, replacement); + next += replaced_bytes; + start = next; + } + output_partial_string(out, (char const *)start, next - start); +} - Data_Get_Struct(self, xmlNode, node); - namespace = node ; +static bool +should_prepend_newline(xmlNodePtr node) +{ + char const *name = (char const *)node->name; + xmlNodePtr child = node->children; + + if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) { + return false; + } + + return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n'; +} - ns = xmlSearchNs( - node->doc, - node, - (const xmlChar *)(NIL_P(prefix) ? NULL : StringValueCStr(prefix)) - ); +static VALUE +rb_prepend_newline(VALUE self) +{ + xmlNodePtr node; + Noko_Node_Get_Struct(self, xmlNode, node); + return should_prepend_newline(node) ? Qtrue : Qfalse; +} - if(!ns) { - if (node->type != XML_ELEMENT_NODE) { - namespace = node->parent; +static bool +is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames) +{ + char const *name = (char const *)node->name; + if (name == NULL) { // fragments don't have a name + return false; + } + for (size_t idx = 0; idx < num_tagnames; ++idx) { + if (!strcmp(name, tagnames[idx])) { + return true; } - ns = xmlNewNs( - namespace, - (const xmlChar *)StringValueCStr(href), - (const xmlChar *)(NIL_P(prefix) ? NULL : StringValueCStr(prefix)) - ); } + return false; + +} + +static void +output_node( + VALUE out, + xmlNodePtr node, + bool preserve_newline +) +{ + static char const *const VOID_ELEMENTS[] = { + "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr", + "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr", + }; + + static char const *const UNESCAPED_TEXT_ELEMENTS[] = { + "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript", + }; + + switch (node->type) { + case XML_ELEMENT_NODE: + // Serialize the start tag. + output_char(out, '<'); + output_tagname(out, node); + + // Add attributes. + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { + output_char(out, ' '); + output_attr_name(out, attr); + if (attr->children) { + output_string(out, "=\""); + xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1); + output_escaped_string(out, value, true); + xmlFree(value); + output_char(out, '"'); + } else { + // Output name="" + output_string(out, "=\"\""); + } + } + output_char(out, '>'); + + // Add children and end tag if element is not void. + if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) { + if (preserve_newline && should_prepend_newline(node)) { + output_char(out, '\n'); + } + for (xmlNodePtr child = node->children; child; child = child->next) { + output_node(out, child, preserve_newline); + } + output_string(out, "'); + } + break; + + case XML_TEXT_NODE: + if (node->parent + && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS, + sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) { + output_string(out, (char const *)node->content); + } else { + output_escaped_string(out, node->content, false); + } + break; + + case XML_CDATA_SECTION_NODE: + output_string(out, "content); + output_string(out, "]]>"); + break; + + case XML_COMMENT_NODE: + output_string(out, ""); + break; + + case XML_PI_NODE: + output_string(out, "content); + output_char(out, '>'); + break; + + case XML_DOCUMENT_TYPE_NODE: + case XML_DTD_NODE: + output_string(out, "name); + output_string(out, ">"); + break; + + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: + for (xmlNodePtr child = node->children; child; child = child->next) { + output_node(out, child, preserve_newline); + } + break; + + default: + rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type); + break; + } +} - if (!ns) { return Qnil ; } +static VALUE +html_standard_serialize( + VALUE self, + VALUE preserve_newline +) +{ + xmlNodePtr node; + Noko_Node_Get_Struct(self, xmlNode, node); + VALUE output = rb_str_buf_new(4096); + output_node(output, node, RTEST(preserve_newline)); + return output; +} - if(NIL_P(prefix) || node != namespace) { xmlSetNs(node, ns); } +/* + * :call-seq: + * line() → Integer + * + * [Returns] The line number of this Node. + * + * --- + * + * ⚠ The CRuby and JRuby implementations differ in important ways! + * + * Semantic differences: + * - The CRuby method reflects the node's line number in the parsed string + * - The JRuby method reflects the node's line number in the final DOM structure after + * corrections have been applied + * + * Performance differences: + * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time] + * (constant time) + * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear + * time, where n is the number of nodes before/above the element in the DOM) + * + * If you'd like to help improve the JRuby implementation, please review these issues and reach out + * to the maintainers: + * - https://github.com/sparklemotion/nokogiri/issues/1223 + * - https://github.com/sparklemotion/nokogiri/pull/2177 + * - https://github.com/sparklemotion/nokogiri/issues/2380 + */ +static VALUE +rb_xml_node_line(VALUE rb_node) +{ + xmlNodePtr c_node; + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); - return Nokogiri_wrap_xml_namespace(node->doc, ns); + return LONG2NUM(xmlGetLineNo(c_node)); } /* * call-seq: - * new(name, document) + * line=(num) * - * Create a new node with +name+ sharing GC lifecycle with +document+ + * Sets the line for this Node. num must be less than 65535. */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number) { - xmlDocPtr doc; - xmlNodePtr node; - VALUE name; - VALUE document; + xmlNodePtr c_node; + int line_number = NUM2INT(rb_line_number); + + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); + + // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes. + // search for "psvi" in SAX2.c and tree.c to learn more. + if (line_number < 65535) { + c_node->line = (short) line_number; + } else { + c_node->line = 65535; + if (c_node->type == XML_TEXT_NODE) { + c_node->psvi = (void *)(ptrdiff_t) line_number; + } + } + + return rb_line_number; +} + +/* :nodoc: documented in lib/nokogiri/xml/node.rb */ +static VALUE +rb_xml_node_new(int argc, VALUE *argv, VALUE klass) +{ + xmlNodePtr c_document_node; + xmlNodePtr c_node; + VALUE rb_name; + VALUE rb_document_node; VALUE rest; VALUE rb_node; - rb_scan_args(argc, argv, "2*", &name, &document, &rest); + rb_scan_args(argc, argv, "2*", &rb_name, &rb_document_node, &rest); - Data_Get_Struct(document, xmlDoc, doc); + if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlNode)) { + rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node"); + } + if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) { + // TODO: deprecate allowing Node + NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); + } + Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node); - node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(name)); - node->doc = doc->doc; - nokogiri_root_node(node); + c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name)); + c_node->doc = c_document_node->doc; + noko_xml_document_pin_node(c_node); - rb_node = Nokogiri_wrap_xml_node( + rb_node = noko_xml_node_wrap( klass == cNokogiriXmlNode ? (VALUE)NULL : klass, - node + c_node ); rb_obj_call_init(rb_node, argc, argv); - if(rb_block_given_p()) { rb_yield(rb_node); } + if (rb_block_given_p()) { rb_yield(rb_node); } return rb_node; } @@ -1415,13 +2084,14 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) * * Returns the Node as html. */ -static VALUE dump_html(VALUE self) +static VALUE +dump_html(VALUE self) { xmlBufferPtr buf ; xmlNodePtr node ; VALUE html; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); buf = xmlBufferCreate() ; htmlNodeDump(buf, node->doc, node); @@ -1436,13 +2106,14 @@ static VALUE dump_html(VALUE self) * * Compare this Node to +other+ with respect to their Document */ -static VALUE compare(VALUE self, VALUE _other) +static VALUE +compare(VALUE self, VALUE _other) { xmlNodePtr node, other; - Data_Get_Struct(self, xmlNode, node); - Data_Get_Struct(_other, xmlNode, other); + Noko_Node_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(_other, xmlNode, other); - return INT2NUM((long)xmlXPathCmpNodes(other, node)); + return INT2NUM(xmlXPathCmpNodes(other, node)); } @@ -1453,13 +2124,14 @@ static VALUE compare(VALUE self, VALUE _other) * Loads and substitutes all xinclude elements below the node. The * parser context will be initialized with +options+. */ -static VALUE process_xincludes(VALUE self, VALUE options) +static VALUE +process_xincludes(VALUE self, VALUE options) { int rcode ; xmlNodePtr node; VALUE error_list = rb_ary_new(); - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options)); @@ -1469,7 +2141,7 @@ static VALUE process_xincludes(VALUE self, VALUE options) xmlErrorPtr error; error = xmlGetLastError(); - if(error) { + if (error) { rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); } else { rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution"); @@ -1481,7 +2153,8 @@ static VALUE process_xincludes(VALUE self, VALUE options) /* TODO: DOCUMENT ME */ -static VALUE in_context(VALUE self, VALUE _str, VALUE _options) +static VALUE +in_context(VALUE self, VALUE _str, VALUE _options) { xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children; xmlNodeSetPtr set; @@ -1489,7 +2162,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) VALUE doc, err; int doc_is_empty; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = DOC_RUBY_OBJECT(node->doc); err = rb_iv_get(doc, "@errors"); @@ -1530,9 +2203,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) */ child_iter = node->doc->children ; while (child_iter) { - if (child_iter->parent != (xmlNodePtr)node->doc) { - child_iter->parent = (xmlNodePtr)node->doc; - } + child_iter->parent = (xmlNodePtr)node->doc; child_iter = child_iter->next; } @@ -1542,12 +2213,17 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) xmlSetStructuredErrorFunc(NULL, NULL); - /* Workaround for a libxml2 bug where a parsing error may leave a broken + /* + * Workaround for a libxml2 bug where a parsing error may leave a broken * node reference in node->doc->children. + * + * https://bugzilla.gnome.org/show_bug.cgi?id=668155 + * * This workaround is limited to when a parse error occurs, the document * went from having no children to having children, and the context node is * part of a document fragment. - * https://bugzilla.gnome.org/show_bug.cgi?id=668155 + * + * TODO: This was fixed in libxml 2.8.0 by 71a243d */ if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) { child_iter = node; @@ -1562,12 +2238,12 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) /* FIXME: This probably needs to handle more constants... */ switch (error) { - case XML_ERR_INTERNAL_ERROR: - case XML_ERR_NO_MEMORY: - rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error); - break; - default: - break; + case XML_ERR_INTERNAL_ERROR: + case XML_ERR_NO_MEMORY: + rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error); + break; + default: + break; } set = xmlXPathNodeSetCreate(NULL); @@ -1576,178 +2252,174 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) tmp = list->next; list->next = NULL; xmlXPathNodeSetAddUnique(set, list); - nokogiri_root_node(list); + noko_xml_document_pin_node(list); list = tmp; } - return Nokogiri_wrap_xml_node_set(set, doc); + return noko_xml_node_set_wrap(set, doc); } - -VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node) +VALUE +noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) { - VALUE document = Qnil ; - VALUE node_cache = Qnil ; - VALUE rb_node = Qnil ; + VALUE rb_document, rb_node_cache, rb_node; nokogiriTuplePtr node_has_a_document; - xmlDocPtr doc; - void (*mark_method)(xmlNodePtr) = NULL ; + xmlDocPtr c_doc; - assert(node); + assert(c_node); - if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE) { - return DOC_RUBY_OBJECT(node->doc); + if (c_node->type == XML_DOCUMENT_NODE || c_node->type == XML_HTML_DOCUMENT_NODE) { + return DOC_RUBY_OBJECT(c_node->doc); } - /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */ - /* see https://github.com/sparklemotion/nokogiri/issues/95 */ - /* and https://github.com/sparklemotion/nokogiri/issues/439 */ - doc = node->doc; - if (doc->type == XML_DOCUMENT_FRAG_NODE) { doc = doc->doc; } - node_has_a_document = DOC_RUBY_OBJECT_TEST(doc); + c_doc = c_node->doc; + + // Nodes yielded from XML::Reader don't have a fully-realized Document + node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc); - if(node->_private && node_has_a_document) { - return (VALUE)node->_private; + if (c_node->_private && node_has_a_document) { + return (VALUE)c_node->_private; } - if(!RTEST(klass)) { - switch(node->type) { - case XML_ELEMENT_NODE: - klass = cNokogiriXmlElement; - break; - case XML_TEXT_NODE: - klass = cNokogiriXmlText; - break; - case XML_ATTRIBUTE_NODE: - klass = cNokogiriXmlAttr; - break; - case XML_ENTITY_REF_NODE: - klass = cNokogiriXmlEntityReference; - break; - case XML_COMMENT_NODE: - klass = cNokogiriXmlComment; - break; - case XML_DOCUMENT_FRAG_NODE: - klass = cNokogiriXmlDocumentFragment; - break; - case XML_PI_NODE: - klass = cNokogiriXmlProcessingInstruction; - break; - case XML_ENTITY_DECL: - klass = cNokogiriXmlEntityDecl; - break; - case XML_CDATA_SECTION_NODE: - klass = cNokogiriXmlCData; - break; - case XML_DTD_NODE: - klass = cNokogiriXmlDtd; - break; - case XML_ATTRIBUTE_DECL: - klass = cNokogiriXmlAttributeDecl; - break; - case XML_ELEMENT_DECL: - klass = cNokogiriXmlElementDecl; - break; - default: - klass = cNokogiriXmlNode; + if (!RTEST(rb_class)) { + switch (c_node->type) { + case XML_ELEMENT_NODE: + rb_class = cNokogiriXmlElement; + break; + case XML_TEXT_NODE: + rb_class = cNokogiriXmlText; + break; + case XML_ATTRIBUTE_NODE: + rb_class = cNokogiriXmlAttr; + break; + case XML_ENTITY_REF_NODE: + rb_class = cNokogiriXmlEntityReference; + break; + case XML_COMMENT_NODE: + rb_class = cNokogiriXmlComment; + break; + case XML_DOCUMENT_FRAG_NODE: + rb_class = cNokogiriXmlDocumentFragment; + break; + case XML_PI_NODE: + rb_class = cNokogiriXmlProcessingInstruction; + break; + case XML_ENTITY_DECL: + rb_class = cNokogiriXmlEntityDecl; + break; + case XML_CDATA_SECTION_NODE: + rb_class = cNokogiriXmlCData; + break; + case XML_DTD_NODE: + rb_class = cNokogiriXmlDtd; + break; + case XML_ATTRIBUTE_DECL: + rb_class = cNokogiriXmlAttributeDecl; + break; + case XML_ELEMENT_DECL: + rb_class = cNokogiriXmlElementDecl; + break; + default: + rb_class = cNokogiriXmlNode; } } - mark_method = node_has_a_document ? mark : NULL ; - - rb_node = Data_Wrap_Struct(klass, mark_method, debug_node_dealloc, node) ; - node->_private = (void *)rb_node; + rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ; + c_node->_private = (void *)rb_node; if (node_has_a_document) { - document = DOC_RUBY_OBJECT(doc); - node_cache = DOC_NODE_CACHE(doc); - rb_ary_push(node_cache, rb_node); - rb_funcall(document, decorate, 1, rb_node); + rb_document = DOC_RUBY_OBJECT(c_doc); + rb_node_cache = DOC_NODE_CACHE(c_doc); + rb_ary_push(rb_node_cache, rb_node); + rb_funcall(rb_document, id_decorate, 1, rb_node); } return rb_node ; } -void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_list) +/* + * return Array containing the node's attributes + */ +VALUE +noko_xml_node_attrs(xmlNodePtr c_node) { - xmlAttrPtr prop; - prop = node->properties ; - while (prop != NULL) { - rb_ary_push(attr_list, Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop)); - prop = prop->next ; + VALUE rb_properties = rb_ary_new(); + xmlAttrPtr c_property; + + c_property = c_node->properties ; + while (c_property != NULL) { + rb_ary_push(rb_properties, noko_xml_node_wrap(Qnil, (xmlNodePtr)c_property)); + c_property = c_property->next ; } + + return rb_properties; } -VALUE cNokogiriXmlNode ; -VALUE cNokogiriXmlElement ; - -void init_xml_node() -{ - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE klass = rb_define_class_under(xml, "Node", rb_cObject); - - cNokogiriXmlNode = klass; - - cNokogiriXmlElement = rb_define_class_under(xml, "Element", klass); - - rb_define_singleton_method(klass, "new", new, -1); - - rb_define_method(klass, "add_namespace_definition", add_namespace_definition, 2); - rb_define_method(klass, "node_name", get_name, 0); - rb_define_method(klass, "document", document, 0); - rb_define_method(klass, "node_name=", set_name, 1); - rb_define_method(klass, "parent", get_parent, 0); - rb_define_method(klass, "child", child, 0); - rb_define_method(klass, "first_element_child", first_element_child, 0); - rb_define_method(klass, "last_element_child", last_element_child, 0); - rb_define_method(klass, "children", children, 0); - rb_define_method(klass, "element_children", element_children, 0); - rb_define_method(klass, "next_sibling", next_sibling, 0); - rb_define_method(klass, "previous_sibling", previous_sibling, 0); - rb_define_method(klass, "next_element", next_element, 0); - rb_define_method(klass, "previous_element", previous_element, 0); - rb_define_method(klass, "node_type", node_type, 0); - rb_define_method(klass, "path", path, 0); - rb_define_method(klass, "key?", key_eh, 1); - rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2); - rb_define_method(klass, "blank?", blank_eh, 0); - rb_define_method(klass, "attribute_nodes", attribute_nodes, 0); - rb_define_method(klass, "attribute", attr, 1); - rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2); - rb_define_method(klass, "namespace", namespace, 0); - rb_define_method(klass, "namespace_definitions", namespace_definitions, 0); - rb_define_method(klass, "namespace_scopes", namespace_scopes, 0); - rb_define_method(klass, "encode_special_chars", encode_special_chars, 1); - rb_define_method(klass, "dup", duplicate_node, -1); - rb_define_method(klass, "unlink", unlink_node, 0); - rb_define_method(klass, "internal_subset", internal_subset, 0); - rb_define_method(klass, "external_subset", external_subset, 0); - rb_define_method(klass, "create_internal_subset", create_internal_subset, 3); - rb_define_method(klass, "create_external_subset", create_external_subset, 3); - rb_define_method(klass, "pointer_id", pointer_id, 0); - rb_define_method(klass, "line", line, 0); - rb_define_method(klass, "content", get_native_content, 0); - rb_define_method(klass, "native_content=", set_native_content, 1); - rb_define_method(klass, "lang", get_lang, 0); - rb_define_method(klass, "lang=", set_lang, 1); - - rb_define_private_method(klass, "process_xincludes", process_xincludes, 1); - rb_define_private_method(klass, "in_context", in_context, 2); - rb_define_private_method(klass, "add_child_node", add_child, 1); - rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1); - rb_define_private_method(klass, "add_next_sibling_node", add_next_sibling, 1); - rb_define_private_method(klass, "replace_node", replace, 1); - rb_define_private_method(klass, "dump_html", dump_html, 0); - rb_define_private_method(klass, "native_write_to", native_write_to, 4); - rb_define_private_method(klass, "get", get, 1); - rb_define_private_method(klass, "set", set, 2); - rb_define_private_method(klass, "set_namespace", set_namespace, 1); - rb_define_private_method(klass, "compare", compare, 1); - - decorate = rb_intern("decorate"); - decorate_bang = rb_intern("decorate!"); -} - -/* vim: set noet sw=4 sws=4 */ +void +noko_init_xml_node(void) +{ + cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject); + + rb_undef_alloc_func(cNokogiriXmlNode); + + rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1); + + rb_define_method(cNokogiriXmlNode, "add_namespace_definition", rb_xml_node_add_namespace_definition, 2); + rb_define_method(cNokogiriXmlNode, "attribute", rb_xml_node_attribute, 1); + rb_define_method(cNokogiriXmlNode, "attribute_nodes", rb_xml_node_attribute_nodes, 0); + rb_define_method(cNokogiriXmlNode, "attribute_with_ns", rb_xml_node_attribute_with_ns, 2); + rb_define_method(cNokogiriXmlNode, "blank?", rb_xml_node_blank_eh, 0); + rb_define_method(cNokogiriXmlNode, "child", rb_xml_node_child, 0); + rb_define_method(cNokogiriXmlNode, "children", rb_xml_node_children, 0); + rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0); + rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3); + rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3); + rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0); + rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1); + rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0); + rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1); + rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0); + rb_define_method(cNokogiriXmlNode, "first_element_child", rb_xml_node_first_element_child, 0); + rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0); + rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1); + rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0); + rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1); + rb_define_method(cNokogiriXmlNode, "last_element_child", rb_xml_node_last_element_child, 0); + rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0); + rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1); + rb_define_method(cNokogiriXmlNode, "namespace", rb_xml_node_namespace, 0); + rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0); + rb_define_method(cNokogiriXmlNode, "namespace_scopes", rb_xml_node_namespace_scopes, 0); + rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2); + rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1); + rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0); + rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0); + rb_define_method(cNokogiriXmlNode, "node_name", get_name, 0); + rb_define_method(cNokogiriXmlNode, "node_name=", set_name, 1); + rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0); + rb_define_method(cNokogiriXmlNode, "parent", get_parent, 0); + rb_define_method(cNokogiriXmlNode, "path", rb_xml_node_path, 0); + rb_define_method(cNokogiriXmlNode, "pointer_id", rb_xml_node_pointer_id, 0); + rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0); + rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0); + rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0); + + rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1); + rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1); + rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1); + rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1); + rb_define_private_method(cNokogiriXmlNode, "dump_html", dump_html, 0); + rb_define_private_method(cNokogiriXmlNode, "get", get, 1); + rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2); + rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4); + rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0); + rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1); + rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1); + rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1); + rb_define_private_method(cNokogiriXmlNode, "set", set, 2); + rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1); + + id_decorate = rb_intern("decorate"); + id_decorate_bang = rb_intern("decorate!"); +} diff --git a/ext/nokogiri/xml_node.h b/ext/nokogiri/xml_node.h deleted file mode 100644 index 3733233b92..0000000000 --- a/ext/nokogiri/xml_node.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef NOKOGIRI_XML_NODE -#define NOKOGIRI_XML_NODE - -#include - -void init_xml_node(); - -extern VALUE cNokogiriXmlNode ; -extern VALUE cNokogiriXmlElement ; - -VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node) ; -void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_hash) ; -#endif diff --git a/ext/nokogiri/xml_node_set.c b/ext/nokogiri/xml_node_set.c index feb8ccebe6..25a2ced09b 100644 --- a/ext/nokogiri/xml_node_set.c +++ b/ext/nokogiri/xml_node_set.c @@ -1,12 +1,11 @@ -#include -#include -#include +#include -static ID decorate ; -static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val); +VALUE cNokogiriXmlNodeSet ; +static ID decorate ; -static void Check_Node_Set_Node_Type(VALUE node) +static void +Check_Node_Set_Node_Type(VALUE node) { if (!(rb_obj_is_kind_of(node, cNokogiriXmlNode) || rb_obj_is_kind_of(node, cNokogiriXmlNamespace))) { @@ -15,34 +14,92 @@ static void Check_Node_Set_Node_Type(VALUE node) } -static void deallocate(xmlNodeSetPtr node_set) +static +VALUE +ruby_object_get(xmlNodePtr c_node) +{ + /* see xmlElementType in libxml2 tree.h */ + switch (c_node->type) { + case XML_NAMESPACE_DECL: + /* _private is later in the namespace struct */ + return (VALUE)(((xmlNsPtr)c_node)->_private); + + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + /* in documents we use _private to store a tuple */ + if (DOC_RUBY_OBJECT_TEST(((xmlDocPtr)c_node))) { + return DOC_RUBY_OBJECT((xmlDocPtr)c_node); + } + return (VALUE)NULL; + + default: + return (VALUE)(c_node->_private); + } +} + + +static void +mark(xmlNodeSetPtr node_set) +{ + VALUE rb_node; + int jnode; + + for (jnode = 0; jnode < node_set->nodeNr; jnode++) { + rb_node = ruby_object_get(node_set->nodeTab[jnode]); + if (rb_node) { + rb_gc_mark(rb_node); + } + } +} + +static void +xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val) +{ + /* + * For reasons outlined in xml_namespace.c, here we reproduce xmlXPathNodeSetDel() except for the + * offending call to xmlXPathNodeSetFreeNs(). + */ + int i; + + if (cur == NULL) { return; } + if (val == NULL) { return; } + + /* + * find node in nodeTab + */ + for (i = 0; i < cur->nodeNr; i++) + if (cur->nodeTab[i] == val) { break; } + + if (i >= cur->nodeNr) { /* not found */ + return; + } + cur->nodeNr--; + for (; i < cur->nodeNr; i++) { + cur->nodeTab[i] = cur->nodeTab[i + 1]; + } + cur->nodeTab[cur->nodeNr] = NULL; +} + + +static void +deallocate(xmlNodeSetPtr node_set) { /* - * - * since xpath queries return copies of the xmlNs structs, - * xmlXPathFreeNodeSet() frees those xmlNs structs that are in the - * NodeSet. - * - * this is bad if someone is still trying to use the Namespace object wrapped - * around the xmlNs, so we need to avoid that. - * - * here we reproduce xmlXPathFreeNodeSet() without the xmlNs logic. - * - * this doesn't cause a leak because Namespace objects that are in an XPath - * query NodeSet are given their own lifecycle in - * Nokogiri_wrap_xml_namespace(). + * For reasons outlined in xml_namespace.c, here we reproduce xmlXPathFreeNodeSet() except for the + * offending call to xmlXPathNodeSetFreeNs(). */ - NOKOGIRI_DEBUG_START(node_set) ; - if (node_set->nodeTab != NULL) + if (node_set->nodeTab != NULL) { xmlFree(node_set->nodeTab); + } xmlFree(node_set); - NOKOGIRI_DEBUG_END(node_set) ; } -static VALUE allocate(VALUE klass) + +static VALUE +allocate(VALUE klass) { - return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL), Qnil); + return noko_xml_node_set_wrap(xmlXPathNodeSetCreate(NULL), Qnil); } @@ -53,7 +110,8 @@ static VALUE allocate(VALUE klass) * Duplicate this NodeSet. Note that the Nodes contained in the NodeSet are not * duplicated (similar to how Array and other Enumerable classes work). */ -static VALUE duplicate(VALUE self) +static VALUE +duplicate(VALUE self) { xmlNodeSetPtr node_set; xmlNodeSetPtr dupl; @@ -62,7 +120,7 @@ static VALUE duplicate(VALUE self) dupl = xmlXPathNodeSetMerge(NULL, node_set); - return Nokogiri_wrap_xml_node_set(dupl, rb_iv_get(self, "@document")); + return noko_xml_node_set_wrap(dupl, rb_iv_get(self, "@document")); } /* @@ -71,7 +129,8 @@ static VALUE duplicate(VALUE self) * * Get the length of the node set */ -static VALUE length(VALUE self) +static VALUE +length(VALUE self) { xmlNodeSetPtr node_set; @@ -86,7 +145,8 @@ static VALUE length(VALUE self) * * Append +node+ to the NodeSet. */ -static VALUE push(VALUE self, VALUE rb_node) +static VALUE +push(VALUE self, VALUE rb_node) { xmlNodeSetPtr node_set; xmlNodePtr node; @@ -94,7 +154,7 @@ static VALUE push(VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); xmlXPathNodeSetAdd(node_set, node); @@ -109,7 +169,7 @@ static VALUE push(VALUE self, VALUE rb_node) * if found, otherwise returns nil. */ static VALUE -delete(VALUE self, VALUE rb_node) +delete (VALUE self, VALUE rb_node) { xmlNodeSetPtr node_set; xmlNodePtr node; @@ -117,7 +177,7 @@ delete(VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); if (xmlXPathNodeSetContains(node_set, node)) { xpath_node_set_del(node_set, node); @@ -133,19 +193,21 @@ delete(VALUE self, VALUE rb_node) * * Set Intersection — Returns a new NodeSet containing nodes common to the two NodeSets. */ -static VALUE intersection(VALUE self, VALUE rb_other) +static VALUE +intersection(VALUE self, VALUE rb_other) { xmlNodeSetPtr node_set, other ; xmlNodeSetPtr intersection; - if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet)) + if (!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet)) { rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet"); + } Data_Get_Struct(self, xmlNodeSet, node_set); Data_Get_Struct(rb_other, xmlNodeSet, other); intersection = xmlXPathIntersection(node_set, other); - return Nokogiri_wrap_xml_node_set(intersection, rb_iv_get(self, "@document")); + return noko_xml_node_set_wrap(intersection, rb_iv_get(self, "@document")); } @@ -155,7 +217,8 @@ static VALUE intersection(VALUE self, VALUE rb_other) * * Returns true if any member of node set equals +node+. */ -static VALUE include_eh(VALUE self, VALUE rb_node) +static VALUE +include_eh(VALUE self, VALUE rb_node) { xmlNodeSetPtr node_set; xmlNodePtr node; @@ -163,7 +226,7 @@ static VALUE include_eh(VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse); } @@ -176,21 +239,23 @@ static VALUE include_eh(VALUE self, VALUE rb_node) * Returns a new set built by merging the set and the elements of the given * set. */ -static VALUE set_union(VALUE self, VALUE rb_other) +static VALUE +rb_xml_node_set_union(VALUE rb_node_set, VALUE rb_other) { - xmlNodeSetPtr node_set, other; - xmlNodeSetPtr new; + xmlNodeSetPtr c_node_set, c_other; + xmlNodeSetPtr c_new_node_set; - if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet)) + if (!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet)) { rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet"); + } - Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_other, xmlNodeSet, other); + Data_Get_Struct(rb_node_set, xmlNodeSet, c_node_set); + Data_Get_Struct(rb_other, xmlNodeSet, c_other); - new = xmlXPathNodeSetMerge(NULL, node_set); - new = xmlXPathNodeSetMerge(new, other); + c_new_node_set = xmlXPathNodeSetMerge(NULL, c_node_set); + c_new_node_set = xmlXPathNodeSetMerge(c_new_node_set, c_other); - return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document")); + return noko_xml_node_set_wrap(c_new_node_set, rb_iv_get(rb_node_set, "@document")); } /* @@ -200,14 +265,16 @@ static VALUE set_union(VALUE self, VALUE rb_other) * Difference - returns a new NodeSet that is a copy of this NodeSet, removing * each item that also appears in +node_set+ */ -static VALUE minus(VALUE self, VALUE rb_other) +static VALUE +minus(VALUE self, VALUE rb_other) { xmlNodeSetPtr node_set, other; xmlNodeSetPtr new; int j ; - if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet)) + if (!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet)) { rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet"); + } Data_Get_Struct(self, xmlNodeSet, node_set); Data_Get_Struct(rb_other, xmlNodeSet, other); @@ -217,11 +284,12 @@ static VALUE minus(VALUE self, VALUE rb_other) xpath_node_set_del(new, other->nodeTab[j]); } - return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document")); + return noko_xml_node_set_wrap(new, rb_iv_get(self, "@document")); } -static VALUE index_at(VALUE self, long offset) +static VALUE +index_at(VALUE self, long offset) { xmlNodeSetPtr node_set; @@ -233,10 +301,11 @@ static VALUE index_at(VALUE self, long offset) if (offset < 0) { offset += node_set->nodeNr ; } - return Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[offset], self); + return noko_xml_node_wrap_node_set_result(node_set->nodeTab[offset], self); } -static VALUE subseq(VALUE self, long beg, long len) +static VALUE +subseq(VALUE self, long beg, long len) { long j; xmlNodeSetPtr node_set; @@ -244,18 +313,18 @@ static VALUE subseq(VALUE self, long beg, long len) Data_Get_Struct(self, xmlNodeSet, node_set); - if (beg > node_set->nodeNr) return Qnil ; - if (beg < 0 || len < 0) return Qnil ; + if (beg > node_set->nodeNr) { return Qnil ; } + if (beg < 0 || len < 0) { return Qnil ; } if ((beg + len) > node_set->nodeNr) { len = node_set->nodeNr - beg ; } new_set = xmlXPathNodeSetCreate(NULL); - for (j = beg ; j < beg+len ; ++j) { + for (j = beg ; j < beg + len ; ++j) { xmlXPathNodeSetAddUnique(new_set, node_set->nodeTab[j]); } - return Nokogiri_wrap_xml_node_set(new_set, rb_iv_get(self, "@document")); + return noko_xml_node_set_wrap(new_set, rb_iv_get(self, "@document")); } /* @@ -273,7 +342,8 @@ static VALUE subseq(VALUE self, long beg, long len) * count backward from the end of the +node_set+ (-1 is the last node). Returns * nil if the +index+ (or +start+) are out of range. */ -static VALUE slice(int argc, VALUE *argv, VALUE self) +static VALUE +slice(int argc, VALUE *argv, VALUE self) { VALUE arg ; long beg, len ; @@ -301,12 +371,12 @@ static VALUE slice(int argc, VALUE *argv, VALUE self) /* if arg is Range */ switch (rb_range_beg_len(arg, &beg, &len, (long)node_set->nodeNr, 0)) { - case Qfalse: - break; - case Qnil: - return Qnil; - default: - return subseq(self, beg, len); + case Qfalse: + break; + case Qnil: + return Qnil; + default: + return subseq(self, beg, len); } return index_at(self, NUM2LONG(arg)); @@ -319,7 +389,8 @@ static VALUE slice(int argc, VALUE *argv, VALUE self) * * Return this list as an Array */ -static VALUE to_array(VALUE self, VALUE rb_node) +static VALUE +to_array(VALUE self) { xmlNodeSetPtr node_set ; VALUE list; @@ -328,9 +399,9 @@ static VALUE to_array(VALUE self, VALUE rb_node) Data_Get_Struct(self, xmlNodeSet, node_set); list = rb_ary_new2(node_set->nodeNr); - for(i = 0; i < node_set->nodeNr; i++) { - VALUE elt = Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[i], self); - rb_ary_push( list, elt ); + for (i = 0; i < node_set->nodeNr; i++) { + VALUE elt = noko_xml_node_wrap_node_set_result(node_set->nodeTab[i], self); + rb_ary_push(list, elt); } return list; @@ -342,7 +413,8 @@ static VALUE to_array(VALUE self, VALUE rb_node) * * Unlink this NodeSet and all Node objects it contains from their current context. */ -static VALUE unlink_nodeset(VALUE self) +static VALUE +unlink_nodeset(VALUE self) { xmlNodeSetPtr node_set; int j, nodeNr ; @@ -354,9 +426,9 @@ static VALUE unlink_nodeset(VALUE self) if (! NOKOGIRI_NAMESPACE_EH(node_set->nodeTab[j])) { VALUE node ; xmlNodePtr node_ptr; - node = Nokogiri_wrap_xml_node(Qnil, node_set->nodeTab[j]); + node = noko_xml_node_wrap(Qnil, node_set->nodeTab[j]); rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */ - Data_Get_Struct(node, xmlNode, node_ptr); + Noko_Node_Get_Struct(node, xmlNode, node_ptr); node_set->nodeTab[j] = node_ptr ; } } @@ -364,123 +436,61 @@ static VALUE unlink_nodeset(VALUE self) } -static void reify_node_set_namespaces(VALUE self) +VALUE +noko_xml_node_set_wrap(xmlNodeSetPtr c_node_set, VALUE document) { - /* - * as mentioned in deallocate() above, xmlNs structs returned in an XPath - * NodeSet are duplicates, and we don't clean them up at deallocate() time. - * - * as a result, we need to make sure the Ruby manages this memory. we do this - * by forcing the creation of a Ruby object wrapped around the xmlNs. - * - * we also have to make sure that the NodeSet has a reference to the - * Namespace object, otherwise GC will kick in and the Namespace won't be - * marked. - * - * we *could* do this safely with *all* the nodes in the NodeSet, but we only - * *need* to do it for xmlNs structs, and so you get the code we have here. - */ - int j ; - xmlNodeSetPtr node_set ; - VALUE namespace_cache ; - - Data_Get_Struct(self, xmlNodeSet, node_set); - - namespace_cache = rb_iv_get(self, "@namespace_cache"); + int j; + VALUE rb_node_set ; - for (j = 0 ; j < node_set->nodeNr ; j++) { - if (NOKOGIRI_NAMESPACE_EH(node_set->nodeTab[j])) { - rb_ary_push(namespace_cache, Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[j], self)); - } + if (c_node_set == NULL) { + c_node_set = xmlXPathNodeSetCreate(NULL); } -} - -VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) -{ - VALUE new_set ; - - if (node_set == NULL) { - node_set = xmlXPathNodeSetCreate(NULL); - } - - new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, 0, deallocate, node_set); + rb_node_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, mark, deallocate, c_node_set); if (!NIL_P(document)) { - rb_iv_set(new_set, "@document", document); - rb_funcall(document, decorate, 1, new_set); + rb_iv_set(rb_node_set, "@document", document); + rb_funcall(document, decorate, 1, rb_node_set); } - rb_iv_set(new_set, "@namespace_cache", rb_ary_new()); - reify_node_set_namespaces(new_set); + /* make sure we create ruby objects for all the results, so they'll be marked during the GC mark phase */ + for (j = 0 ; j < c_node_set->nodeNr ; j++) { + noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set); + } - return new_set ; + return rb_node_set ; } -VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set) +VALUE +noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) { - xmlDocPtr document ; - if (NOKOGIRI_NAMESPACE_EH(node)) { - Data_Get_Struct(rb_iv_get(node_set, "@document"), xmlDoc, document); - return Nokogiri_wrap_xml_namespace(document, (xmlNsPtr)node); + return noko_xml_namespace_wrap_xpath_copy((xmlNsPtr)node); } else { - return Nokogiri_wrap_xml_node(Qnil, node); + return noko_xml_node_wrap(Qnil, node); } } -static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val) -{ - /* - * as mentioned a few times above, we do not want to free xmlNs structs - * outside of the Namespace lifecycle. - * - * xmlXPathNodeSetDel() frees xmlNs structs, and so here we reproduce that - * function with the xmlNs logic. - */ - int i; - - if (cur == NULL) return; - if (val == NULL) return; - - /* - * find node in nodeTab - */ - for (i = 0;i < cur->nodeNr;i++) - if (cur->nodeTab[i] == val) break; - - if (i >= cur->nodeNr) { /* not found */ - return; - } - cur->nodeNr--; - for (;i < cur->nodeNr;i++) - cur->nodeTab[i] = cur->nodeTab[i + 1]; - cur->nodeTab[cur->nodeNr] = NULL; -} - - -VALUE cNokogiriXmlNodeSet ; -void init_xml_node_set(void) +void +noko_init_xml_node_set(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE klass = rb_define_class_under(xml, "NodeSet", rb_cObject); - cNokogiriXmlNodeSet = klass; - - rb_define_alloc_func(klass, allocate); - rb_define_method(klass, "length", length, 0); - rb_define_method(klass, "[]", slice, -1); - rb_define_method(klass, "slice", slice, -1); - rb_define_method(klass, "push", push, 1); - rb_define_method(klass, "|", set_union, 1); - rb_define_method(klass, "-", minus, 1); - rb_define_method(klass, "unlink", unlink_nodeset, 0); - rb_define_method(klass, "to_a", to_array, 0); - rb_define_method(klass, "dup", duplicate, 0); - rb_define_method(klass, "delete", delete, 1); - rb_define_method(klass, "&", intersection, 1); - rb_define_method(klass, "include?", include_eh, 1); + cNokogiriXmlNodeSet = rb_define_class_under(mNokogiriXml, "NodeSet", rb_cObject); + + rb_define_alloc_func(cNokogiriXmlNodeSet, allocate); + + rb_define_method(cNokogiriXmlNodeSet, "length", length, 0); + rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1); + rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1); + rb_define_method(cNokogiriXmlNodeSet, "push", push, 1); + rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1); + rb_define_method(cNokogiriXmlNodeSet, "-", minus, 1); + rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0); + rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0); + rb_define_method(cNokogiriXmlNodeSet, "dup", duplicate, 0); + rb_define_method(cNokogiriXmlNodeSet, "delete", delete, 1); + rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1); + rb_define_method(cNokogiriXmlNodeSet, "include?", include_eh, 1); decorate = rb_intern("decorate"); } diff --git a/ext/nokogiri/xml_node_set.h b/ext/nokogiri/xml_node_set.h deleted file mode 100644 index 210eee3057..0000000000 --- a/ext/nokogiri/xml_node_set.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef NOKOGIRI_XML_NODE_SET -#define NOKOGIRI_XML_NODE_SET - -#include -void init_xml_node_set(); - -extern VALUE cNokogiriXmlNodeSet ; -VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) ; -VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set) ; -VALUE Nokogiri_wrap_xml_node_set_namespace(xmlNsPtr node, VALUE node_set) ; - -#endif diff --git a/ext/nokogiri/xml_processing_instruction.c b/ext/nokogiri/xml_processing_instruction.c index cbb68d5ac9..f4a6dc2eb2 100644 --- a/ext/nokogiri/xml_processing_instruction.c +++ b/ext/nokogiri/xml_processing_instruction.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlProcessingInstruction; /* * call-seq: @@ -7,7 +9,8 @@ * Create a new ProcessingInstruction element on the +document+ with +name+ * and +content+ */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +new (int argc, VALUE *argv, VALUE klass) { xmlDocPtr xml_doc; xmlNodePtr node; @@ -22,35 +25,30 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) Data_Get_Struct(document, xmlDoc, xml_doc); node = xmlNewDocPI( - xml_doc, - (const xmlChar *)StringValueCStr(name), - (const xmlChar *)StringValueCStr(content) - ); + xml_doc, + (const xmlChar *)StringValueCStr(name), + (const xmlChar *)StringValueCStr(content) + ); - nokogiri_root_node(node); + noko_xml_document_pin_node(node); - rb_node = Nokogiri_wrap_xml_node(klass, node); + rb_node = noko_xml_node_wrap(klass, node); rb_obj_call_init(rb_node, argc, argv); - if(rb_block_given_p()) rb_yield(rb_node); + if (rb_block_given_p()) { rb_yield(rb_node); } return rb_node; } -VALUE cNokogiriXmlProcessingInstruction; -void init_xml_processing_instruction() +void +noko_init_xml_processing_instruction(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - + assert(cNokogiriXmlNode); /* * ProcessingInstruction represents a ProcessingInstruction node in an xml * document. */ - VALUE klass = rb_define_class_under(xml, "ProcessingInstruction", node); - - cNokogiriXmlProcessingInstruction = klass; + cNokogiriXmlProcessingInstruction = rb_define_class_under(mNokogiriXml, "ProcessingInstruction", cNokogiriXmlNode); - rb_define_singleton_method(klass, "new", new, -1); + rb_define_singleton_method(cNokogiriXmlProcessingInstruction, "new", new, -1); } diff --git a/ext/nokogiri/xml_processing_instruction.h b/ext/nokogiri/xml_processing_instruction.h deleted file mode 100644 index f639630bff..0000000000 --- a/ext/nokogiri/xml_processing_instruction.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_PROCESSING_INSTRUCTION -#define NOKOGIRI_XML_PROCESSING_INSTRUCTION - -#include - -void init_xml_processing_instruction(); - -extern VALUE cNokogiriXmlProcessingInstruction; -#endif diff --git a/ext/nokogiri/xml_reader.c b/ext/nokogiri/xml_reader.c index aea150d0cb..d099158a41 100644 --- a/ext/nokogiri/xml_reader.c +++ b/ext/nokogiri/xml_reader.c @@ -1,13 +1,15 @@ -#include +#include -static void dealloc(xmlTextReaderPtr reader) +VALUE cNokogiriXmlReader; + +static void +dealloc(xmlTextReaderPtr reader) { - NOKOGIRI_DEBUG_START(reader); xmlFreeTextReader(reader); - NOKOGIRI_DEBUG_END(reader); } -static int has_attributes(xmlTextReaderPtr reader) +static int +has_attributes(xmlTextReaderPtr reader) { /* * this implementation of xmlTextReaderHasAttributes explicitly includes @@ -16,47 +18,40 @@ static int has_attributes(xmlTextReaderPtr reader) */ xmlNodePtr node ; node = xmlTextReaderCurrentNode(reader); - if (node == NULL) - return(0); + if (node == NULL) { + return (0); + } if ((node->type == XML_ELEMENT_NODE) && - ((node->properties != NULL) || (node->nsDef != NULL))) - return(1); - return(0); + ((node->properties != NULL) || (node->nsDef != NULL))) { + return (1); + } + return (0); } -static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) +// TODO: merge this function into the `namespaces` method implementation +static void +Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) { xmlNsPtr ns; - static char buffer[XMLNS_BUFFER_LEN] ; - char *key ; - size_t keylen ; + VALUE key; - if (node->type != XML_ELEMENT_NODE) return ; + if (node->type != XML_ELEMENT_NODE) { return ; } ns = node->nsDef; while (ns != NULL) { - keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ; - if (keylen > XMLNS_BUFFER_LEN) { - key = (char*)malloc(keylen) ; - } else { - key = buffer ; - } - + key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding()); if (ns->prefix) { - sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix); - } else { - sprintf(key, "%s", XMLNS_PREFIX); + rb_str_cat_cstr(key, ":"); + rb_str_cat_cstr(key, (const char *)ns->prefix); } + key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding()); rb_hash_aset(attr_hash, - NOKOGIRI_STR_NEW2(key), - (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil) - ); - if (key != buffer) { - free(key); - } + key, + (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil) + ); ns = ns->next ; } } @@ -68,15 +63,16 @@ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) * * Was an attribute generated from the default value in the DTD or schema? */ -static VALUE default_eh(VALUE self) +static VALUE +default_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = xmlTextReaderIsDefault(reader); - if(eh == 0) return Qfalse; - if(eh == 1) return Qtrue; + if (eh == 0) { return Qfalse; } + if (eh == 1) { return Qtrue; } return Qnil; } @@ -87,15 +83,16 @@ static VALUE default_eh(VALUE self) * * Does this node have a text value? */ -static VALUE value_eh(VALUE self) +static VALUE +value_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = xmlTextReaderHasValue(reader); - if(eh == 0) return Qfalse; - if(eh == 1) return Qtrue; + if (eh == 0) { return Qfalse; } + if (eh == 1) { return Qtrue; } return Qnil; } @@ -106,15 +103,16 @@ static VALUE value_eh(VALUE self) * * Does this node have attributes? */ -static VALUE attributes_eh(VALUE self) +static VALUE +attributes_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = has_attributes(reader); - if(eh == 0) return Qfalse; - if(eh == 1) return Qtrue; + if (eh == 0) { return Qfalse; } + if (eh == 1) { return Qtrue; } return Qnil; } @@ -125,52 +123,136 @@ static VALUE attributes_eh(VALUE self) * * Get a hash of namespaces for this Node */ -static VALUE namespaces(VALUE self) +static VALUE +rb_xml_reader_namespaces(VALUE rb_reader) { - xmlTextReaderPtr reader; - xmlNodePtr ptr; - VALUE attr ; + VALUE rb_namespaces = rb_hash_new() ; + xmlTextReaderPtr c_reader; + xmlNodePtr c_node; + VALUE rb_errors; - Data_Get_Struct(self, xmlTextReader, reader); + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + + if (! has_attributes(c_reader)) { + return rb_namespaces ; + } - attr = rb_hash_new() ; + rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0); - if (! has_attributes(reader)) - return attr ; + xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher); + c_node = xmlTextReaderExpand(c_reader); + xmlSetStructuredErrorFunc(NULL, NULL); - ptr = xmlTextReaderExpand(reader); - if(ptr == NULL) return Qnil; + if (c_node == NULL) { + if (RARRAY_LEN(rb_errors) > 0) { + VALUE rb_error = rb_ary_entry(rb_errors, 0); + VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0); + rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError)); + } + return Qnil; + } - Nokogiri_xml_node_namespaces(ptr, attr); + Nokogiri_xml_node_namespaces(c_node, rb_namespaces); - return attr ; + return rb_namespaces ; } /* - * call-seq: - * attribute_nodes - * - * Get a list of attributes for this Node + :call-seq: attribute_nodes() → Array + + Get the attributes of the current node as an Array of XML:Attr + + ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri. + + See related: #attribute_hash, #attributes */ -static VALUE attribute_nodes(VALUE self) +static VALUE +rb_xml_reader_attribute_nodes(VALUE rb_reader) { - xmlTextReaderPtr reader; - xmlNodePtr ptr; - VALUE attr ; + xmlTextReaderPtr c_reader; + xmlNodePtr c_node; + VALUE attr_nodes; + int j; - Data_Get_Struct(self, xmlTextReader, reader); + // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598 + // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c + NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); + + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); - attr = rb_ary_new() ; + if (! has_attributes(c_reader)) { + return rb_ary_new() ; + } - if (! has_attributes(reader)) - return attr ; + c_node = xmlTextReaderExpand(c_reader); + if (c_node == NULL) { + return Qnil; + } - ptr = xmlTextReaderExpand(reader); - if(ptr == NULL) return Qnil; + attr_nodes = noko_xml_node_attrs(c_node); - Nokogiri_xml_node_properties(ptr, attr); + /* ensure that the Reader won't be GCed as long as a node is referenced */ + for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) { + rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader); + } - return attr ; + return attr_nodes; +} + +/* + :call-seq: attribute_hash() → Hash + + Get the attributes of the current node as a Hash of names and values. + + See related: #attributes and #namespaces + */ +static VALUE +rb_xml_reader_attribute_hash(VALUE rb_reader) +{ + VALUE rb_attributes = rb_hash_new(); + xmlTextReaderPtr c_reader; + xmlNodePtr c_node; + xmlAttrPtr c_property; + VALUE rb_errors; + + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + + if (!has_attributes(c_reader)) { + return rb_attributes; + } + + rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0); + + xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher); + c_node = xmlTextReaderExpand(c_reader); + xmlSetStructuredErrorFunc(NULL, NULL); + + if (c_node == NULL) { + if (RARRAY_LEN(rb_errors) > 0) { + VALUE rb_error = rb_ary_entry(rb_errors, 0); + VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0); + rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError)); + } + return Qnil; + } + + c_property = c_node->properties; + while (c_property != NULL) { + VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name); + VALUE rb_value = Qnil; + xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property); + + if (c_value) { + rb_value = NOKOGIRI_STR_NEW2(c_value); + xmlFree(c_value); + } + + rb_hash_aset(rb_attributes, rb_name, rb_value); + + c_property = c_property->next; + } + + return rb_attributes; } /* @@ -179,7 +261,8 @@ static VALUE attribute_nodes(VALUE self) * * Get the value of attribute at +index+ */ -static VALUE attribute_at(VALUE self, VALUE index) +static VALUE +attribute_at(VALUE self, VALUE index) { xmlTextReaderPtr reader; xmlChar *value; @@ -187,14 +270,14 @@ static VALUE attribute_at(VALUE self, VALUE index) Data_Get_Struct(self, xmlTextReader, reader); - if(NIL_P(index)) return Qnil; + if (NIL_P(index)) { return Qnil; } index = rb_Integer(index); value = xmlTextReaderGetAttributeNo( - reader, - (int)NUM2INT(index) - ); - if(value == NULL) return Qnil; + reader, + (int)NUM2INT(index) + ); + if (value == NULL) { return Qnil; } rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); @@ -207,7 +290,8 @@ static VALUE attribute_at(VALUE self, VALUE index) * * Get the value of attribute named +name+ */ -static VALUE reader_attribute(VALUE self, VALUE name) +static VALUE +reader_attribute(VALUE self, VALUE name) { xmlTextReaderPtr reader; xmlChar *value ; @@ -215,11 +299,11 @@ static VALUE reader_attribute(VALUE self, VALUE name) Data_Get_Struct(self, xmlTextReader, reader); - if(NIL_P(name)) return Qnil; + if (NIL_P(name)) { return Qnil; } name = StringValue(name) ; - value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name)); - if(value == NULL) return Qnil; + value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name)); + if (value == NULL) { return Qnil; } rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); @@ -232,16 +316,17 @@ static VALUE reader_attribute(VALUE self, VALUE name) * * Get the number of attributes for the current node */ -static VALUE attribute_count(VALUE self) +static VALUE +attribute_count(VALUE self) { xmlTextReaderPtr reader; int count; Data_Get_Struct(self, xmlTextReader, reader); count = xmlTextReaderAttributeCount(reader); - if(count == -1) return Qnil; + if (count == -1) { return Qnil; } - return INT2NUM((long)count); + return INT2NUM(count); } /* @@ -250,16 +335,17 @@ static VALUE attribute_count(VALUE self) * * Get the depth of the node */ -static VALUE depth(VALUE self) +static VALUE +depth(VALUE self) { xmlTextReaderPtr reader; int depth; Data_Get_Struct(self, xmlTextReader, reader); depth = xmlTextReaderDepth(reader); - if(depth == -1) return Qnil; + if (depth == -1) { return Qnil; } - return INT2NUM((long)depth); + return INT2NUM(depth); } /* @@ -268,14 +354,15 @@ static VALUE depth(VALUE self) * * Get the XML version of the document being read */ -static VALUE xml_version(VALUE self) +static VALUE +xml_version(VALUE self) { xmlTextReaderPtr reader; const char *version; Data_Get_Struct(self, xmlTextReader, reader); version = (const char *)xmlTextReaderConstXmlVersion(reader); - if(version == NULL) return Qnil; + if (version == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(version); } @@ -286,14 +373,15 @@ static VALUE xml_version(VALUE self) * * Get the xml:lang scope within which the node resides. */ -static VALUE lang(VALUE self) +static VALUE +lang(VALUE self) { xmlTextReaderPtr reader; const char *lang; Data_Get_Struct(self, xmlTextReader, reader); lang = (const char *)xmlTextReaderConstXmlLang(reader); - if(lang == NULL) return Qnil; + if (lang == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(lang); } @@ -304,14 +392,15 @@ static VALUE lang(VALUE self) * * Get the text value of the node if present. Returns a utf-8 encoded string. */ -static VALUE value(VALUE self) +static VALUE +value(VALUE self) { xmlTextReaderPtr reader; const char *value; Data_Get_Struct(self, xmlTextReader, reader); value = (const char *)xmlTextReaderConstValue(reader); - if(value == NULL) return Qnil; + if (value == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(value); } @@ -322,14 +411,15 @@ static VALUE value(VALUE self) * * Get the shorthand reference to the namespace associated with the node. */ -static VALUE prefix(VALUE self) +static VALUE +prefix(VALUE self) { xmlTextReaderPtr reader; const char *prefix; Data_Get_Struct(self, xmlTextReader, reader); prefix = (const char *)xmlTextReaderConstPrefix(reader); - if(prefix == NULL) return Qnil; + if (prefix == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(prefix); } @@ -340,14 +430,15 @@ static VALUE prefix(VALUE self) * * Get the URI defining the namespace associated with the node */ -static VALUE namespace_uri(VALUE self) +static VALUE +namespace_uri(VALUE self) { xmlTextReaderPtr reader; const char *uri; Data_Get_Struct(self, xmlTextReader, reader); uri = (const char *)xmlTextReaderConstNamespaceUri(reader); - if(uri == NULL) return Qnil; + if (uri == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(uri); } @@ -358,14 +449,15 @@ static VALUE namespace_uri(VALUE self) * * Get the local name of the node */ -static VALUE local_name(VALUE self) +static VALUE +local_name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstLocalName(reader); - if(name == NULL) return Qnil; + if (name == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(name); } @@ -376,14 +468,15 @@ static VALUE local_name(VALUE self) * * Get the name of the node. Returns a utf-8 encoded string. */ -static VALUE name(VALUE self) +static VALUE +name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstName(reader); - if(name == NULL) return Qnil; + if (name == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(name); } @@ -394,16 +487,24 @@ static VALUE name(VALUE self) * * Get the xml:base of the node */ -static VALUE base_uri(VALUE self) +static VALUE +rb_xml_reader_base_uri(VALUE rb_reader) { - xmlTextReaderPtr reader; - const char * base_uri; + VALUE rb_base_uri; + xmlTextReaderPtr c_reader; + xmlChar *c_base_uri; - Data_Get_Struct(self, xmlTextReader, reader); - base_uri = (const char *)xmlTextReaderBaseUri(reader); - if (base_uri == NULL) return Qnil; + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + + c_base_uri = xmlTextReaderBaseUri(c_reader); + if (c_base_uri == NULL) { + return Qnil; + } - return NOKOGIRI_STR_NEW2(base_uri); + rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri); + xmlFree(c_base_uri); + + return rb_base_uri; } /* @@ -412,11 +513,12 @@ static VALUE base_uri(VALUE self) * * Get the state of the reader */ -static VALUE state(VALUE self) +static VALUE +state(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); - return INT2NUM((long)xmlTextReaderReadState(reader)); + return INT2NUM(xmlTextReaderReadState(reader)); } /* @@ -425,11 +527,12 @@ static VALUE state(VALUE self) * * Get the type of readers current node */ -static VALUE node_type(VALUE self) +static VALUE +node_type(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); - return INT2NUM((long)xmlTextReaderNodeType(reader)); + return INT2NUM(xmlTextReaderNodeType(reader)); } /* @@ -438,7 +541,8 @@ static VALUE node_type(VALUE self) * * Move the Reader forward through the XML document. */ -static VALUE read_more(VALUE self) +static VALUE +read_more(VALUE self) { xmlTextReaderPtr reader; xmlErrorPtr error; @@ -453,14 +557,15 @@ static VALUE read_more(VALUE self) ret = xmlTextReaderRead(reader); xmlSetStructuredErrorFunc(NULL, NULL); - if(ret == 1) return self; - if(ret == 0) return Qnil; + if (ret == 1) { return self; } + if (ret == 0) { return Qnil; } error = xmlGetLastError(); - if(error) + if (error) { rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); - else + } else { rb_raise(rb_eRuntimeError, "Error pulling: %d", ret); + } return Qnil; } @@ -472,10 +577,11 @@ static VALUE read_more(VALUE self) * Read the contents of the current node, including child nodes and markup. * Returns a utf-8 encoded string. */ -static VALUE inner_xml(VALUE self) +static VALUE +inner_xml(VALUE self) { xmlTextReaderPtr reader; - xmlChar* value; + xmlChar *value; VALUE str; Data_Get_Struct(self, xmlTextReader, reader); @@ -483,8 +589,8 @@ static VALUE inner_xml(VALUE self) value = xmlTextReaderReadInnerXml(reader); str = Qnil; - if(value) { - str = NOKOGIRI_STR_NEW2((char*)value); + if (value) { + str = NOKOGIRI_STR_NEW2((char *)value); xmlFree(value); } @@ -498,7 +604,8 @@ static VALUE inner_xml(VALUE self) * Read the current node and its contents, including child nodes and markup. * Returns a utf-8 encoded string. */ -static VALUE outer_xml(VALUE self) +static VALUE +outer_xml(VALUE self) { xmlTextReaderPtr reader; xmlChar *value; @@ -508,8 +615,8 @@ static VALUE outer_xml(VALUE self) value = xmlTextReaderReadOuterXml(reader); - if(value) { - str = NOKOGIRI_STR_NEW2((char*)value); + if (value) { + str = NOKOGIRI_STR_NEW2((char *)value); xmlFree(value); } return str; @@ -521,31 +628,32 @@ static VALUE outer_xml(VALUE self) * * Create a new reader that parses +string+ */ -static VALUE from_memory(int argc, VALUE *argv, VALUE klass) +static VALUE +from_memory(int argc, VALUE *argv, VALUE klass) { VALUE rb_buffer, rb_url, encoding, rb_options; xmlTextReaderPtr reader; - const char * c_url = NULL; - const char * c_encoding = NULL; + const char *c_url = NULL; + const char *c_encoding = NULL; int c_options = 0; VALUE rb_reader, args[3]; rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options); - if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil"); - if (RTEST(rb_url)) c_url = StringValueCStr(rb_url); - if (RTEST(encoding)) c_encoding = StringValueCStr(encoding); - if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options); + if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); } + if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); } + if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); } + if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); } reader = xmlReaderForMemory( - StringValuePtr(rb_buffer), - (int)RSTRING_LEN(rb_buffer), - c_url, - c_encoding, - c_options - ); - - if(reader == NULL) { + StringValuePtr(rb_buffer), + (int)RSTRING_LEN(rb_buffer), + c_url, + c_encoding, + c_options + ); + + if (reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } @@ -565,32 +673,33 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass) * * Create a new reader that parses +io+ */ -static VALUE from_io(int argc, VALUE *argv, VALUE klass) +static VALUE +from_io(int argc, VALUE *argv, VALUE klass) { VALUE rb_io, rb_url, encoding, rb_options; xmlTextReaderPtr reader; - const char * c_url = NULL; - const char * c_encoding = NULL; + const char *c_url = NULL; + const char *c_encoding = NULL; int c_options = 0; VALUE rb_reader, args[3]; rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options); - if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil"); - if (RTEST(rb_url)) c_url = StringValueCStr(rb_url); - if (RTEST(encoding)) c_encoding = StringValueCStr(encoding); - if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options); + if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); } + if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); } + if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); } + if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); } reader = xmlReaderForIO( - (xmlInputReadCallback)io_read_callback, - (xmlInputCloseCallback)io_close_callback, - (void *)rb_io, - c_url, - c_encoding, - c_options - ); - - if(reader == NULL) { + (xmlInputReadCallback)noko_io_read, + (xmlInputCloseCallback)noko_io_close, + (void *)rb_io, + c_url, + c_encoding, + c_options + ); + + if (reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } @@ -610,59 +719,76 @@ static VALUE from_io(int argc, VALUE *argv, VALUE klass) * * Returns true if the current node is empty, otherwise false. */ -static VALUE empty_element_p(VALUE self) +static VALUE +empty_element_p(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); - if(xmlTextReaderIsEmptyElement(reader)) + if (xmlTextReaderIsEmptyElement(reader)) { return Qtrue; + } return Qfalse; } -VALUE cNokogiriXmlReader; - -void init_xml_reader() +static VALUE +rb_xml_reader_encoding(VALUE rb_reader) { - VALUE module = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(module, "XML"); + xmlTextReaderPtr c_reader; + const char *parser_encoding; + VALUE constructor_encoding; + + constructor_encoding = rb_iv_get(rb_reader, "@encoding"); + if (RTEST(constructor_encoding)) { + return constructor_encoding; + } + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader); + if (parser_encoding == NULL) { return Qnil; } + return NOKOGIRI_STR_NEW2(parser_encoding); +} + +void +noko_init_xml_reader(void) +{ /* * The Reader parser allows you to effectively pull parse an XML document. * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each * node. Note that you may only iterate over the document once! */ - VALUE klass = rb_define_class_under(xml, "Reader", rb_cObject); - - cNokogiriXmlReader = klass; - - rb_define_singleton_method(klass, "from_memory", from_memory, -1); - rb_define_singleton_method(klass, "from_io", from_io, -1); - - rb_define_method(klass, "read", read_more, 0); - rb_define_method(klass, "inner_xml", inner_xml, 0); - rb_define_method(klass, "outer_xml", outer_xml, 0); - rb_define_method(klass, "state", state, 0); - rb_define_method(klass, "node_type", node_type, 0); - rb_define_method(klass, "name", name, 0); - rb_define_method(klass, "local_name", local_name, 0); - rb_define_method(klass, "namespace_uri", namespace_uri, 0); - rb_define_method(klass, "prefix", prefix, 0); - rb_define_method(klass, "value", value, 0); - rb_define_method(klass, "lang", lang, 0); - rb_define_method(klass, "xml_version", xml_version, 0); - rb_define_method(klass, "depth", depth, 0); - rb_define_method(klass, "attribute_count", attribute_count, 0); - rb_define_method(klass, "attribute", reader_attribute, 1); - rb_define_method(klass, "namespaces", namespaces, 0); - rb_define_method(klass, "attribute_at", attribute_at, 1); - rb_define_method(klass, "empty_element?", empty_element_p, 0); - rb_define_method(klass, "attributes?", attributes_eh, 0); - rb_define_method(klass, "value?", value_eh, 0); - rb_define_method(klass, "default?", default_eh, 0); - rb_define_method(klass, "base_uri", base_uri, 0); - - rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0); + cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject); + + rb_undef_alloc_func(cNokogiriXmlReader); + + rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1); + rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1); + + rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1); + rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1); + rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0); + rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0); + rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0); + rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0); + rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0); + rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0); + rb_define_method(cNokogiriXmlReader, "depth", depth, 0); + rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0); + rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0); + rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0); + rb_define_method(cNokogiriXmlReader, "lang", lang, 0); + rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0); + rb_define_method(cNokogiriXmlReader, "name", name, 0); + rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0); + rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0); + rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0); + rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0); + rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0); + rb_define_method(cNokogiriXmlReader, "read", read_more, 0); + rb_define_method(cNokogiriXmlReader, "state", state, 0); + rb_define_method(cNokogiriXmlReader, "value", value, 0); + rb_define_method(cNokogiriXmlReader, "value?", value_eh, 0); + rb_define_method(cNokogiriXmlReader, "xml_version", xml_version, 0); } diff --git a/ext/nokogiri/xml_reader.h b/ext/nokogiri/xml_reader.h deleted file mode 100644 index 3b2f8717d8..0000000000 --- a/ext/nokogiri/xml_reader.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_XML_READER -#define NOKOGIRI_XML_READER - -#include - -void init_xml_reader(); - -extern VALUE cNokogiriXmlReader; - -#endif diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c index e17b11ae8b..943b0a7cd5 100644 --- a/ext/nokogiri/xml_relax_ng.c +++ b/ext/nokogiri/xml_relax_ng.c @@ -1,10 +1,11 @@ -#include +#include -static void dealloc(xmlRelaxNGPtr schema) +VALUE cNokogiriXmlRelaxNG; + +static void +dealloc(xmlRelaxNGPtr schema) { - NOKOGIRI_DEBUG_START(schema); xmlRelaxNGFree(schema); - NOKOGIRI_DEBUG_END(schema); } /* @@ -13,7 +14,8 @@ static void dealloc(xmlRelaxNGPtr schema) * * Validate a Nokogiri::XML::Document against this RelaxNG schema. */ -static VALUE validate_document(VALUE self, VALUE document) +static VALUE +validate_document(VALUE self, VALUE document) { xmlDocPtr doc; xmlRelaxNGPtr schema; @@ -27,7 +29,7 @@ static VALUE validate_document(VALUE self, VALUE document) valid_ctxt = xmlRelaxNGNewValidCtxt(schema); - if(NULL == valid_ctxt) { + if (NULL == valid_ctxt) { /* we have a problem */ rb_raise(rb_eRuntimeError, "Could not create a validation context"); } @@ -53,16 +55,25 @@ static VALUE validate_document(VALUE self, VALUE document) * * Create a new RelaxNG from the contents of +string+ */ -static VALUE read_memory(VALUE klass, VALUE content) +static VALUE +read_memory(int argc, VALUE *argv, VALUE klass) { - xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt( - (const char *)StringValuePtr(content), - (int)RSTRING_LEN(content) - ); + VALUE content; + VALUE parse_options; + xmlRelaxNGParserCtxtPtr ctx; xmlRelaxNGPtr schema; - VALUE errors = rb_ary_new(); + VALUE errors; VALUE rb_schema; + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options); + if (scanned_args == 1) { + parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content)); + + errors = rb_ary_new(); xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher); #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS @@ -78,18 +89,20 @@ static VALUE read_memory(VALUE klass, VALUE content) xmlSetStructuredErrorFunc(NULL, NULL); xmlRelaxNGFreeParserCtxt(ctx); - if(NULL == schema) { + if (NULL == schema) { xmlErrorPtr error = xmlGetLastError(); - if(error) + if (error) { Nokogiri_error_raise(NULL, error); - else + } else { rb_raise(rb_eRuntimeError, "Could not parse document"); + } return Qnil; } rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } @@ -100,18 +113,26 @@ static VALUE read_memory(VALUE klass, VALUE content) * * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+ */ -static VALUE from_document(VALUE klass, VALUE document) +static VALUE +from_document(int argc, VALUE *argv, VALUE klass) { + VALUE document; + VALUE parse_options; xmlDocPtr doc; xmlRelaxNGParserCtxtPtr ctx; xmlRelaxNGPtr schema; VALUE errors; VALUE rb_schema; + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); Data_Get_Struct(document, xmlDoc, doc); + doc = doc->doc; /* In case someone passes us a node. ugh. */ - /* In case someone passes us a node. ugh. */ - doc = doc->doc; + if (scanned_args == 1) { + parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } ctx = xmlRelaxNGNewDocParserCtxt(doc); @@ -129,33 +150,34 @@ static VALUE from_document(VALUE klass, VALUE document) schema = xmlRelaxNGParse(ctx); xmlSetStructuredErrorFunc(NULL, NULL); + xmlRelaxNGFreeParserCtxt(ctx); - if(NULL == schema) { + if (NULL == schema) { xmlErrorPtr error = xmlGetLastError(); - if(error) + if (error) { Nokogiri_error_raise(NULL, error); - else + } else { rb_raise(rb_eRuntimeError, "Could not parse document"); + } return Qnil; } rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } -VALUE cNokogiriXmlRelaxNG; -void init_xml_relax_ng() +void +noko_init_xml_relax_ng(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema); + assert(cNokogiriXmlSchema); + cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema); - cNokogiriXmlRelaxNG = klass; + rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1); + rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1); - rb_define_singleton_method(klass, "read_memory", read_memory, 1); - rb_define_singleton_method(klass, "from_document", from_document, 1); - rb_define_private_method(klass, "validate_document", validate_document, 1); + rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1); } diff --git a/ext/nokogiri/xml_relax_ng.h b/ext/nokogiri/xml_relax_ng.h deleted file mode 100644 index 265978df2f..0000000000 --- a/ext/nokogiri/xml_relax_ng.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_RELAX_NG -#define NOKOGIRI_XML_RELAX_NG - -#include - -void init_xml_relax_ng(); - -extern VALUE cNokogiriXmlRelaxNG; -#endif diff --git a/ext/nokogiri/xml_sax_parser.c b/ext/nokogiri/xml_sax_parser.c index 1a5f6c5f51..3ff9906503 100644 --- a/ext/nokogiri/xml_sax_parser.c +++ b/ext/nokogiri/xml_sax_parser.c @@ -1,23 +1,23 @@ -#include +#include -int vasprintf (char **strp, const char *fmt, va_list ap); -void vasprintf_free (void *p); +VALUE cNokogiriXmlSaxParser ; static ID id_start_document, id_end_document, id_start_element, id_end_element; static ID id_start_element_namespace, id_end_element_namespace; static ID id_comment, id_characters, id_xmldecl, id_error, id_warning; -static ID id_cdata_block, id_cAttribute; +static ID id_cdata_block; static ID id_processing_instruction; -static void start_document(void * ctx) +static void +start_document(void *ctx) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx); - if(NULL != ctxt && ctxt->html != 1) { - if(ctxt->standalone != -1) { /* -1 means there was no declaration */ + if (NULL != ctxt && ctxt->html != 1) { + if (ctxt->standalone != -1) { /* -1 means there was no declaration */ VALUE encoding = Qnil ; VALUE standalone = Qnil; VALUE version; @@ -29,8 +29,7 @@ static void start_document(void * ctx) version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil; - switch(ctxt->standalone) - { + switch (ctxt->standalone) { case 0: standalone = NOKOGIRI_STR_NEW2("no"); break; @@ -46,138 +45,140 @@ static void start_document(void * ctx) rb_funcall(doc, id_start_document, 0); } -static void end_document(void * ctx) +static void +end_document(void *ctx) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); rb_funcall(doc, id_end_document, 0); } -static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts) +static void +start_element(void *ctx, const xmlChar *name, const xmlChar **atts) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); VALUE attributes = rb_ary_new(); - const xmlChar * attr; + const xmlChar *attr; int i = 0; - if(atts) { - while((attr = atts[i]) != NULL) { - const xmlChar * val = atts[i+1]; + if (atts) { + while ((attr = atts[i]) != NULL) { + const xmlChar *val = atts[i + 1]; VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil; rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value)); - i+=2; + i += 2; } } - rb_funcall( doc, - id_start_element, - 2, - NOKOGIRI_STR_NEW2(name), - attributes - ); + rb_funcall(doc, + id_start_element, + 2, + NOKOGIRI_STR_NEW2(name), + attributes + ); } -static void end_element(void * ctx, const xmlChar *name) +static void +end_element(void *ctx, const xmlChar *name) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name)); } -static VALUE attributes_as_list( - VALUE self, - int nb_attributes, - const xmlChar ** attributes) +static VALUE +attributes_as_array(int attributes_len, const xmlChar **c_attributes) { - VALUE list = rb_ary_new2((long)nb_attributes); + VALUE rb_array = rb_ary_new2((long)attributes_len); + VALUE cNokogiriXmlSaxParserAttribute; - VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute); - if (attributes) { + cNokogiriXmlSaxParserAttribute = rb_const_get_at(cNokogiriXmlSaxParser, rb_intern("Attribute")); + if (c_attributes) { /* Each attribute is an array of [localname, prefix, URI, value, end] */ int i; - for (i = 0; i < nb_attributes * 5; i += 5) { - VALUE argv[4], attribute; + for (i = 0; i < attributes_len * 5; i += 5) { + VALUE rb_constructor_args[4], rb_attribute; - argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */ - argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */ - argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */ + rb_constructor_args[0] = RBSTR_OR_QNIL(c_attributes[i + 0]); /* localname */ + rb_constructor_args[1] = RBSTR_OR_QNIL(c_attributes[i + 1]); /* prefix */ + rb_constructor_args[2] = RBSTR_OR_QNIL(c_attributes[i + 2]); /* URI */ /* value */ - argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3], - (attributes[i+4] - attributes[i+3])); + rb_constructor_args[3] = NOKOGIRI_STR_NEW((const char *)c_attributes[i + 3], + (c_attributes[i + 4] - c_attributes[i + 3])); - attribute = rb_class_new_instance(4, argv, attr_klass); - rb_ary_push(list, attribute); + rb_attribute = rb_class_new_instance(4, rb_constructor_args, cNokogiriXmlSaxParserAttribute); + rb_ary_push(rb_array, rb_attribute); } } - return list; + return rb_array; } static void -start_element_ns ( - void * ctx, - const xmlChar * localname, - const xmlChar * prefix, - const xmlChar * uri, +start_element_ns( + void *ctx, + const xmlChar *localname, + const xmlChar *prefix, + const xmlChar *uri, int nb_namespaces, - const xmlChar ** namespaces, + const xmlChar **namespaces, int nb_attributes, int nb_defaulted, - const xmlChar ** attributes) + const xmlChar **attributes) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); - VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes); + VALUE attribute_ary = attributes_as_array(nb_attributes, attributes); VALUE ns_list = rb_ary_new2((long)nb_namespaces); if (namespaces) { int i; - for (i = 0; i < nb_namespaces * 2; i += 2) - { + for (i = 0; i < nb_namespaces * 2; i += 2) { rb_ary_push(ns_list, - rb_ary_new3((long)2, - RBSTR_OR_QNIL(namespaces[i + 0]), - RBSTR_OR_QNIL(namespaces[i + 1]) - ) - ); + rb_ary_new3((long)2, + RBSTR_OR_QNIL(namespaces[i + 0]), + RBSTR_OR_QNIL(namespaces[i + 1]) + ) + ); } } - rb_funcall( doc, - id_start_element_namespace, - 5, - NOKOGIRI_STR_NEW2(localname), - attribute_list, - RBSTR_OR_QNIL(prefix), - RBSTR_OR_QNIL(uri), - ns_list - ); + rb_funcall(doc, + id_start_element_namespace, + 5, + NOKOGIRI_STR_NEW2(localname), + attribute_ary, + RBSTR_OR_QNIL(prefix), + RBSTR_OR_QNIL(uri), + ns_list + ); } /** * end_element_ns was borrowed heavily from libxml-ruby. */ static void -end_element_ns ( - void * ctx, - const xmlChar * localname, - const xmlChar * prefix, - const xmlChar * uri) +end_element_ns( + void *ctx, + const xmlChar *localname, + const xmlChar *prefix, + const xmlChar *uri) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); rb_funcall(doc, id_end_element_namespace, 3, - NOKOGIRI_STR_NEW2(localname), - RBSTR_OR_QNIL(prefix), - RBSTR_OR_QNIL(uri) - ); + NOKOGIRI_STR_NEW2(localname), + RBSTR_OR_QNIL(prefix), + RBSTR_OR_QNIL(uri) + ); } -static void characters_func(void * ctx, const xmlChar * ch, int len) +static void +characters_func(void *ctx, const xmlChar *ch, int len) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); @@ -185,7 +186,8 @@ static void characters_func(void * ctx, const xmlChar * ch, int len) rb_funcall(doc, id_characters, 1, str); } -static void comment_func(void * ctx, const xmlChar * value) +static void +comment_func(void *ctx, const xmlChar *value) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); @@ -193,41 +195,52 @@ static void comment_func(void * ctx, const xmlChar * value) rb_funcall(doc, id_comment, 1, str); } -static void warning_func(void * ctx, const char *msg, ...) +PRINTFLIKE_DECL(2, 3) +static void +warning_func(void *ctx, const char *msg, ...) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); - char * message; - VALUE ruby_message; + VALUE rb_message; +#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES + /* It is not currently possible to pass var args from native + functions to sulong, so we work around the issue here. */ + rb_message = rb_sprintf("warning_func: %s", msg); +#else va_list args; va_start(args, msg); - vasprintf(&message, msg, args); + rb_message = rb_vsprintf(msg, args); va_end(args); +#endif - ruby_message = NOKOGIRI_STR_NEW2(message); - vasprintf_free(message); - rb_funcall(doc, id_warning, 1, ruby_message); + rb_funcall(doc, id_warning, 1, rb_message); } -static void error_func(void * ctx, const char *msg, ...) +PRINTFLIKE_DECL(2, 3) +static void +error_func(void *ctx, const char *msg, ...) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); - char * message; - VALUE ruby_message; + VALUE rb_message; +#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES + /* It is not currently possible to pass var args from native + functions to sulong, so we work around the issue here. */ + rb_message = rb_sprintf("error_func: %s", msg); +#else va_list args; va_start(args, msg); - vasprintf(&message, msg, args); + rb_message = rb_vsprintf(msg, args); va_end(args); +#endif - ruby_message = NOKOGIRI_STR_NEW2(message); - vasprintf_free(message); - rb_funcall(doc, id_error, 1, ruby_message); + rb_funcall(doc, id_error, 1, rb_message); } -static void cdata_block(void * ctx, const xmlChar * value, int len) +static void +cdata_block(void *ctx, const xmlChar *value, int len) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); @@ -235,7 +248,8 @@ static void cdata_block(void * ctx, const xmlChar * value, int len) rb_funcall(doc, id_cdata_block, 1, string); } -static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content) +static void +processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content) { VALUE rb_content; VALUE self = NOKOGIRI_SAX_SELF(ctx); @@ -243,26 +257,24 @@ static void processing_instruction(void * ctx, const xmlChar * name, const xmlCh rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil; - rb_funcall( doc, - id_processing_instruction, - 2, - NOKOGIRI_STR_NEW2(name), - rb_content - ); + rb_funcall(doc, + id_processing_instruction, + 2, + NOKOGIRI_STR_NEW2(name), + rb_content + ); } -static void deallocate(xmlSAXHandlerPtr handler) +static void +deallocate(xmlSAXHandlerPtr handler) { - NOKOGIRI_DEBUG_START(handler); - free(handler); - NOKOGIRI_DEBUG_END(handler); + ruby_xfree(handler); } -static VALUE allocate(VALUE klass) +static VALUE +allocate(VALUE klass) { - xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler)); - - xmlSetStructuredErrorFunc(NULL, NULL); + xmlSAXHandlerPtr handler = ruby_xcalloc((size_t)1, sizeof(xmlSAXHandler)); handler->startDocument = start_document; handler->endDocument = end_document; @@ -281,29 +293,23 @@ static VALUE allocate(VALUE klass) return Data_Wrap_Struct(klass, NULL, deallocate, handler); } -VALUE cNokogiriXmlSaxParser ; -void init_xml_sax_parser() +void +noko_init_xml_sax_parser(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE sax = rb_define_module_under(xml, "SAX"); - VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject); - - cNokogiriXmlSaxParser = klass; + cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject); - rb_define_alloc_func(klass, allocate); + rb_define_alloc_func(cNokogiriXmlSaxParser, allocate); id_start_document = rb_intern("start_document"); - id_end_document = rb_intern("end_document"); - id_start_element = rb_intern("start_element"); - id_end_element = rb_intern("end_element"); - id_comment = rb_intern("comment"); - id_characters = rb_intern("characters"); - id_xmldecl = rb_intern("xmldecl"); - id_error = rb_intern("error"); - id_warning = rb_intern("warning"); - id_cdata_block = rb_intern("cdata_block"); - id_cAttribute = rb_intern("Attribute"); + id_end_document = rb_intern("end_document"); + id_start_element = rb_intern("start_element"); + id_end_element = rb_intern("end_element"); + id_comment = rb_intern("comment"); + id_characters = rb_intern("characters"); + id_xmldecl = rb_intern("xmldecl"); + id_error = rb_intern("error"); + id_warning = rb_intern("warning"); + id_cdata_block = rb_intern("cdata_block"); id_start_element_namespace = rb_intern("start_element_namespace"); id_end_element_namespace = rb_intern("end_element_namespace"); id_processing_instruction = rb_intern("processing_instruction"); diff --git a/ext/nokogiri/xml_sax_parser.h b/ext/nokogiri/xml_sax_parser.h deleted file mode 100644 index ca35991b0b..0000000000 --- a/ext/nokogiri/xml_sax_parser.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef NOKOGIRI_XML_SAX_PARSER -#define NOKOGIRI_XML_SAX_PARSER - -#include - -void init_xml_sax_parser(); - -extern VALUE cNokogiriXmlSaxParser ; - -typedef struct _nokogiriSAXTuple { - xmlParserCtxtPtr ctxt; - VALUE self; -} nokogiriSAXTuple; - -typedef nokogiriSAXTuple * nokogiriSAXTuplePtr; - -#define NOKOGIRI_SAX_SELF(_ctxt) \ - ((nokogiriSAXTuplePtr)(_ctxt))->self - -#define NOKOGIRI_SAX_CTXT(_ctxt) \ - ((nokogiriSAXTuplePtr)(_ctxt))->ctxt - -#define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) \ - nokogiri_sax_tuple_new(_ctxt, _self) - -static inline nokogiriSAXTuplePtr -nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self) -{ - nokogiriSAXTuplePtr tuple = malloc(sizeof(nokogiriSAXTuple)); - tuple->self = self; - tuple->ctxt = ctxt; - return tuple; -} - -#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) \ - free(_tuple) \ - -#endif - diff --git a/ext/nokogiri/xml_sax_parser_context.c b/ext/nokogiri/xml_sax_parser_context.c index 57aa74064f..f9461a3c7c 100644 --- a/ext/nokogiri/xml_sax_parser_context.c +++ b/ext/nokogiri/xml_sax_parser_context.c @@ -1,16 +1,14 @@ -#include +#include VALUE cNokogiriXmlSaxParserContext ; -static void deallocate(xmlParserCtxtPtr ctxt) -{ - NOKOGIRI_DEBUG_START(handler); +static ID id_read; +static void +deallocate(xmlParserCtxtPtr ctxt) +{ ctxt->sax = NULL; - xmlFreeParserCtxt(ctxt); - - NOKOGIRI_DEBUG_END(handler); } /* @@ -22,19 +20,23 @@ static void deallocate(xmlParserCtxtPtr ctxt) static VALUE parse_io(VALUE klass, VALUE io, VALUE encoding) { - xmlParserCtxtPtr ctxt; - xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding); - - ctxt = xmlCreateIOParserCtxt(NULL, NULL, - (xmlInputReadCallback)io_read_callback, - (xmlInputCloseCallback)io_close_callback, - (void *)io, enc); - if (ctxt->sax) { - xmlFree(ctxt->sax); - ctxt->sax = NULL; - } - - return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); + xmlParserCtxtPtr ctxt; + xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding); + + if (!rb_respond_to(io, id_read)) { + rb_raise(rb_eTypeError, "argument expected to respond to :read"); + } + + ctxt = xmlCreateIOParserCtxt(NULL, NULL, + (xmlInputReadCallback)noko_io_read, + (xmlInputCloseCallback)noko_io_close, + (void *)io, enc); + if (ctxt->sax) { + xmlFree(ctxt->sax); + ctxt->sax = NULL; + } + + return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); } /* @@ -43,7 +45,8 @@ parse_io(VALUE klass, VALUE io, VALUE encoding) * * Parse file given +filename+ */ -static VALUE parse_file(VALUE klass, VALUE filename) +static VALUE +parse_file(VALUE klass, VALUE filename) { xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename)); return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); @@ -58,41 +61,43 @@ static VALUE parse_file(VALUE klass, VALUE filename) static VALUE parse_memory(VALUE klass, VALUE data) { - xmlParserCtxtPtr ctxt; + xmlParserCtxtPtr ctxt; + + Check_Type(data, T_STRING); - if (NIL_P(data)) - rb_raise(rb_eArgError, "data cannot be nil"); - if (!(int)RSTRING_LEN(data)) - rb_raise(rb_eRuntimeError, "data cannot be empty"); + if (!(int)RSTRING_LEN(data)) { + rb_raise(rb_eRuntimeError, "data cannot be empty"); + } - ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data), - (int)RSTRING_LEN(data)); - if (ctxt->sax) { - xmlFree(ctxt->sax); - ctxt->sax = NULL; - } + ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data), + (int)RSTRING_LEN(data)); + if (ctxt->sax) { + xmlFree(ctxt->sax); + ctxt->sax = NULL; + } - return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); + return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); } static VALUE parse_doc(VALUE ctxt_val) { - xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val; - xmlParseDocument(ctxt); - return Qnil; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val; + xmlParseDocument(ctxt); + return Qnil; } static VALUE parse_doc_finalize(VALUE ctxt_val) { - xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val; - if (NULL != ctxt->myDoc) - xmlFreeDoc(ctxt->myDoc); + if (NULL != ctxt->myDoc) { + xmlFreeDoc(ctxt->myDoc); + } - NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData); - return Qnil; + NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData); + return Qnil; } /* @@ -104,25 +109,29 @@ parse_doc_finalize(VALUE ctxt_val) static VALUE parse_with(VALUE self, VALUE sax_handler) { - xmlParserCtxtPtr ctxt; - xmlSAXHandlerPtr sax; + xmlParserCtxtPtr ctxt; + xmlSAXHandlerPtr sax; + + if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) { + rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser"); + } - if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) - rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser"); + Data_Get_Struct(self, xmlParserCtxt, ctxt); + Data_Get_Struct(sax_handler, xmlSAXHandler, sax); - Data_Get_Struct(self, xmlParserCtxt, ctxt); - Data_Get_Struct(sax_handler, xmlSAXHandler, sax); + /* Free the sax handler since we'll assign our own */ + if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) { + xmlFree(ctxt->sax); + } - /* Free the sax handler since we'll assign our own */ - if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) - xmlFree(ctxt->sax); + ctxt->sax = sax; + ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); - ctxt->sax = sax; - ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); + xmlSetStructuredErrorFunc(NULL, NULL); - rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); + rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); - return Qnil; + return Qnil; } /* @@ -132,15 +141,17 @@ parse_with(VALUE self, VALUE sax_handler) * Should this parser replace entities? & will get converted to '&' if * set to true */ -static VALUE set_replace_entities(VALUE self, VALUE value) +static VALUE +set_replace_entities(VALUE self, VALUE value) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); - if(Qfalse == value) + if (Qfalse == value) { ctxt->replaceEntities = 0; - else + } else { ctxt->replaceEntities = 1; + } return value; } @@ -152,15 +163,17 @@ static VALUE set_replace_entities(VALUE self, VALUE value) * Should this parser replace entities? & will get converted to '&' if * set to true */ -static VALUE get_replace_entities(VALUE self) +static VALUE +get_replace_entities(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); - if(0 == ctxt->replaceEntities) + if (0 == ctxt->replaceEntities) { return Qfalse; - else + } else { return Qtrue; + } } /* @@ -168,7 +181,8 @@ static VALUE get_replace_entities(VALUE self) * * Get the current line the parser context is processing. */ -static VALUE line(VALUE self) +static VALUE +line(VALUE self) { xmlParserCtxtPtr ctxt; xmlParserInputPtr io; @@ -176,8 +190,9 @@ static VALUE line(VALUE self) Data_Get_Struct(self, xmlParserCtxt, ctxt); io = ctxt->input; - if(io) + if (io) { return INT2NUM(io->line); + } return Qnil; } @@ -187,7 +202,8 @@ static VALUE line(VALUE self) * * Get the current column the parser context is processing. */ -static VALUE column(VALUE self) +static VALUE +column(VALUE self) { xmlParserCtxtPtr ctxt; xmlParserInputPtr io; @@ -195,8 +211,9 @@ static VALUE column(VALUE self) Data_Get_Struct(self, xmlParserCtxt, ctxt); io = ctxt->input; - if(io) + if (io) { return INT2NUM(io->col); + } return Qnil; } @@ -208,15 +225,17 @@ static VALUE column(VALUE self) * Should this parser recover from structural errors? It will not stop processing * file on structural errors if set to true */ -static VALUE set_recovery(VALUE self, VALUE value) +static VALUE +set_recovery(VALUE self, VALUE value) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); - if(value == Qfalse) + if (value == Qfalse) { ctxt->recovery = 0; - else + } else { ctxt->recovery = 1; + } return value; } @@ -228,35 +247,37 @@ static VALUE set_recovery(VALUE self, VALUE value) * Should this parser recover from structural errors? It will not stop processing * file on structural errors if set to true */ -static VALUE get_recovery(VALUE self) +static VALUE +get_recovery(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); - if(ctxt->recovery == 0) + if (ctxt->recovery == 0) { return Qfalse; - else + } else { return Qtrue; + } } -void init_xml_sax_parser_context() +void +noko_init_xml_sax_parser_context(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE sax = rb_define_module_under(xml, "SAX"); - VALUE klass = rb_define_class_under(sax, "ParserContext", rb_cObject); - - cNokogiriXmlSaxParserContext = klass; - - rb_define_singleton_method(klass, "io", parse_io, 2); - rb_define_singleton_method(klass, "memory", parse_memory, 1); - rb_define_singleton_method(klass, "file", parse_file, 1); - - rb_define_method(klass, "parse_with", parse_with, 1); - rb_define_method(klass, "replace_entities=", set_replace_entities, 1); - rb_define_method(klass, "replace_entities", get_replace_entities, 0); - rb_define_method(klass, "recovery=", set_recovery, 1); - rb_define_method(klass, "recovery", get_recovery, 0); - rb_define_method(klass, "line", line, 0); - rb_define_method(klass, "column", column, 0); + cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject); + + rb_undef_alloc_func(cNokogiriXmlSaxParserContext); + + rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2); + rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1); + rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1); + + rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1); + rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1); + rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0); + rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1); + rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0); + rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0); + rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0); + + id_read = rb_intern("read"); } diff --git a/ext/nokogiri/xml_sax_parser_context.h b/ext/nokogiri/xml_sax_parser_context.h deleted file mode 100644 index a1556ed512..0000000000 --- a/ext/nokogiri/xml_sax_parser_context.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_XML_SAX_PARSER_CONTEXT -#define NOKOGIRI_XML_SAX_PARSER_CONTEXT - -#include - -extern VALUE cNokogiriXmlSaxParserContext; - -void init_xml_sax_parser_context(); - -#endif diff --git a/ext/nokogiri/xml_sax_push_parser.c b/ext/nokogiri/xml_sax_push_parser.c index dac0a24db5..5013b8f3ee 100644 --- a/ext/nokogiri/xml_sax_push_parser.c +++ b/ext/nokogiri/xml_sax_push_parser.c @@ -1,16 +1,18 @@ -#include +#include -static void deallocate(xmlParserCtxtPtr ctx) +VALUE cNokogiriXmlSaxPushParser ; + +static void +deallocate(xmlParserCtxtPtr ctx) { - NOKOGIRI_DEBUG_START(ctx); if (ctx != NULL) { NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData); xmlFreeParserCtxt(ctx); } - NOKOGIRI_DEBUG_END(ctx); } -static VALUE allocate(VALUE klass) +static VALUE +allocate(VALUE klass) { return Data_Wrap_Struct(klass, NULL, deallocate, NULL); } @@ -21,10 +23,11 @@ static VALUE allocate(VALUE klass) * * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle */ -static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) +static VALUE +native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) { xmlParserCtxtPtr ctx; - const char * chunk = NULL; + const char *chunk = NULL; int size = 0; @@ -35,6 +38,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) size = (int)RSTRING_LEN(_chunk); } + xmlSetStructuredErrorFunc(NULL, NULL); + if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { if (!(ctx->options & XML_PARSE_RECOVER)) { xmlErrorPtr e = xmlCtxtGetLastError(ctx); @@ -51,10 +56,11 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) * * Initialize the push parser with +xml_sax+ using +filename+ */ -static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename) +static VALUE +initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename) { xmlSAXHandlerPtr sax; - const char * filename = NULL; + const char *filename = NULL; xmlParserCtxtPtr ctx; Data_Get_Struct(_xml_sax, xmlSAXHandler, sax); @@ -79,7 +85,8 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename) return self; } -static VALUE get_options(VALUE self) +static VALUE +get_options(VALUE self) { xmlParserCtxtPtr ctx; Data_Get_Struct(self, xmlParserCtxt, ctx); @@ -87,7 +94,8 @@ static VALUE get_options(VALUE self) return INT2NUM(ctx->options); } -static VALUE set_options(VALUE self, VALUE options) +static VALUE +set_options(VALUE self, VALUE options) { xmlParserCtxtPtr ctx; Data_Get_Struct(self, xmlParserCtxt, ctx); @@ -106,7 +114,8 @@ static VALUE set_options(VALUE self, VALUE options) * Should this parser replace entities? & will get converted to '&' if * set to true */ -static VALUE get_replace_entities(VALUE self) +static VALUE +get_replace_entities(VALUE self) { xmlParserCtxtPtr ctx; Data_Get_Struct(self, xmlParserCtxt, ctx); @@ -125,7 +134,8 @@ static VALUE get_replace_entities(VALUE self) * Should this parser replace entities? & will get converted to '&' if * set to true */ -static VALUE set_replace_entities(VALUE self, VALUE value) +static VALUE +set_replace_entities(VALUE self, VALUE value) { xmlParserCtxtPtr ctx; Data_Get_Struct(self, xmlParserCtxt, ctx); @@ -139,21 +149,18 @@ static VALUE set_replace_entities(VALUE self, VALUE value) return value; } -VALUE cNokogiriXmlSaxPushParser ; -void init_xml_sax_push_parser() +void +noko_init_xml_sax_push_parser(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE sax = rb_define_module_under(xml, "SAX"); - VALUE klass = rb_define_class_under(sax, "PushParser", rb_cObject); - - cNokogiriXmlSaxPushParser = klass; - - rb_define_alloc_func(klass, allocate); - rb_define_private_method(klass, "initialize_native", initialize_native, 2); - rb_define_private_method(klass, "native_write", native_write, 2); - rb_define_method(klass, "options", get_options, 0); - rb_define_method(klass, "options=", set_options, 1); - rb_define_method(klass, "replace_entities", get_replace_entities, 0); - rb_define_method(klass, "replace_entities=", set_replace_entities, 1); + cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject); + + rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate); + + rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0); + rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1); + rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0); + rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1); + + rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2); + rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2); } diff --git a/ext/nokogiri/xml_sax_push_parser.h b/ext/nokogiri/xml_sax_push_parser.h deleted file mode 100644 index 51d8bc95b9..0000000000 --- a/ext/nokogiri/xml_sax_push_parser.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_SAX_PUSH_PARSER -#define NOKOGIRI_XML_SAX_PUSH_PARSER - -#include - -void init_xml_sax_push_parser(); - -extern VALUE cNokogiriXmlSaxPushParser ; -#endif diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index da2774ba8c..2f71dc876e 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -1,10 +1,11 @@ -#include +#include -static void dealloc(xmlSchemaPtr schema) +VALUE cNokogiriXmlSchema; + +static void +dealloc(xmlSchemaPtr schema) { - NOKOGIRI_DEBUG_START(schema); xmlSchemaFree(schema); - NOKOGIRI_DEBUG_END(schema); } /* @@ -13,7 +14,8 @@ static void dealloc(xmlSchemaPtr schema) * * Validate a Nokogiri::XML::Document against this Schema. */ -static VALUE validate_document(VALUE self, VALUE document) +static VALUE +validate_document(VALUE self, VALUE document) { xmlDocPtr doc; xmlSchemaPtr schema; @@ -21,13 +23,13 @@ static VALUE validate_document(VALUE self, VALUE document) VALUE errors; Data_Get_Struct(self, xmlSchema, schema); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); errors = rb_ary_new(); valid_ctxt = xmlSchemaNewValidCtxt(schema); - if(NULL == valid_ctxt) { + if (NULL == valid_ctxt) { /* we have a problem */ rb_raise(rb_eRuntimeError, "Could not create a validation context"); } @@ -53,7 +55,8 @@ static VALUE validate_document(VALUE self, VALUE document) * * Validate a file against this Schema. */ -static VALUE validate_file(VALUE self, VALUE rb_filename) +static VALUE +validate_file(VALUE self, VALUE rb_filename) { xmlSchemaPtr schema; xmlSchemaValidCtxtPtr valid_ctxt; @@ -61,13 +64,13 @@ static VALUE validate_file(VALUE self, VALUE rb_filename) VALUE errors; Data_Get_Struct(self, xmlSchema, schema); - filename = (const char*)StringValueCStr(rb_filename) ; + filename = (const char *)StringValueCStr(rb_filename) ; errors = rb_ary_new(); valid_ctxt = xmlSchemaNewValidCtxt(schema); - if(NULL == valid_ctxt) { + if (NULL == valid_ctxt) { /* we have a problem */ rb_raise(rb_eRuntimeError, "Could not create a validation context"); } @@ -93,15 +96,28 @@ static VALUE validate_file(VALUE self, VALUE rb_filename) * * Create a new Schema from the contents of +string+ */ -static VALUE read_memory(VALUE klass, VALUE content) +static VALUE +read_memory(int argc, VALUE *argv, VALUE klass) { + VALUE content; + VALUE parse_options; + int parse_options_int; + xmlSchemaParserCtxtPtr ctx; xmlSchemaPtr schema; - xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt( - (const char *)StringValuePtr(content), - (int)RSTRING_LEN(content) - ); + VALUE errors; VALUE rb_schema; - VALUE errors = rb_ary_new(); + int scanned_args = 0; + xmlExternalEntityLoader old_loader = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options); + if (scanned_args == 1) { + parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0)); + + ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content)); + + errors = rb_ary_new(); xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher); #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS @@ -112,45 +128,97 @@ static VALUE read_memory(VALUE klass, VALUE content) ); #endif - schema = xmlSchemaParse(ctx); + if (parse_options_int & XML_PARSE_NONET) { + old_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader); + } + + schema = xmlSchemaParse(ctx); + + if (old_loader) { + xmlSetExternalEntityLoader(old_loader); + } xmlSetStructuredErrorFunc(NULL, NULL); xmlSchemaFreeParserCtxt(ctx); - if(NULL == schema) { + if (NULL == schema) { xmlErrorPtr error = xmlGetLastError(); - if(error) + if (error) { Nokogiri_error_raise(NULL, error); - else + } else { rb_raise(rb_eRuntimeError, "Could not parse document"); + } return Qnil; } rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } +/* Schema creation will remove and deallocate "blank" nodes. + * If those blank nodes have been exposed to Ruby, they could get freed + * out from under the VALUE pointer. This function checks to see if any of + * those nodes have been exposed to Ruby, and if so we should raise an exception. + */ +static int +has_blank_nodes_p(VALUE cache) +{ + long i; + + if (NIL_P(cache)) { + return 0; + } + + for (i = 0; i < RARRAY_LEN(cache); i++) { + xmlNodePtr node; + VALUE element = rb_ary_entry(cache, i); + Noko_Node_Get_Struct(element, xmlNode, node); + if (xmlIsBlankNode(node)) { + return 1; + } + } + + return 0; +} + /* * call-seq: * from_document(doc) * * Create a new Schema from the Nokogiri::XML::Document +doc+ */ -static VALUE from_document(VALUE klass, VALUE document) +static VALUE +from_document(int argc, VALUE *argv, VALUE klass) { + VALUE document; + VALUE parse_options; + int parse_options_int; xmlDocPtr doc; xmlSchemaParserCtxtPtr ctx; xmlSchemaPtr schema; VALUE errors; VALUE rb_schema; + int scanned_args = 0; + xmlExternalEntityLoader old_loader = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); + doc = doc->doc; /* In case someone passes us a node. ugh. */ - /* In case someone passes us a node. ugh. */ - doc = doc->doc; + if (scanned_args == 1) { + parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0)); + + if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) { + rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous"); + } ctx = xmlSchemaNewDocParserCtxt(doc); @@ -165,41 +233,50 @@ static VALUE from_document(VALUE klass, VALUE document) ); #endif + if (parse_options_int & XML_PARSE_NONET) { + old_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader); + } + schema = xmlSchemaParse(ctx); + if (old_loader) { + xmlSetExternalEntityLoader(old_loader); + } + xmlSetStructuredErrorFunc(NULL, NULL); xmlSchemaFreeParserCtxt(ctx); - if(NULL == schema) { + if (NULL == schema) { xmlErrorPtr error = xmlGetLastError(); - if(error) + if (error) { Nokogiri_error_raise(NULL, error); - else + } else { rb_raise(rb_eRuntimeError, "Could not parse document"); + } return Qnil; } rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; return Qnil; } -VALUE cNokogiriXmlSchema; -void init_xml_schema() +void +noko_init_xml_schema(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - VALUE klass = rb_define_class_under(xml, "Schema", rb_cObject); + cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject); - cNokogiriXmlSchema = klass; + rb_undef_alloc_func(cNokogiriXmlSchema); - rb_define_singleton_method(klass, "read_memory", read_memory, 1); - rb_define_singleton_method(klass, "from_document", from_document, 1); + rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1); + rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1); - rb_define_private_method(klass, "validate_document", validate_document, 1); - rb_define_private_method(klass, "validate_file", validate_file, 1); + rb_define_private_method(cNokogiriXmlSchema, "validate_document", validate_document, 1); + rb_define_private_method(cNokogiriXmlSchema, "validate_file", validate_file, 1); } diff --git a/ext/nokogiri/xml_schema.h b/ext/nokogiri/xml_schema.h deleted file mode 100644 index 1451c18610..0000000000 --- a/ext/nokogiri/xml_schema.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_SCHEMA -#define NOKOGIRI_XML_SCHEMA - -#include - -void init_xml_schema(); - -extern VALUE cNokogiriXmlSchema; -#endif diff --git a/ext/nokogiri/xml_syntax_error.c b/ext/nokogiri/xml_syntax_error.c index 0b240f05a5..a5f0e1ad71 100644 --- a/ext/nokogiri/xml_syntax_error.c +++ b/ext/nokogiri/xml_syntax_error.c @@ -1,38 +1,64 @@ -#include +#include -void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error) +VALUE cNokogiriXmlSyntaxError; + +void +Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state) +{ + /* this method is tightly coupled to the implementation of xmlSetStructuredErrorFunc */ + handler_state->user_data = xmlStructuredErrorContext; + handler_state->handler = xmlStructuredError; +} + +void +Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, + void *user_data, + xmlStructuredErrorFunc handler) +{ + Nokogiri_structured_error_func_save(handler_state); + xmlSetStructuredErrorFunc(user_data, handler); +} + +void +Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state) +{ + xmlSetStructuredErrorFunc(handler_state->user_data, handler_state->handler); +} + +void +Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error) { VALUE list = (VALUE)ctx; Check_Type(list, T_ARRAY); rb_ary_push(list, Nokogiri_wrap_xml_syntax_error(error)); } -void Nokogiri_error_raise(void * ctx, xmlErrorPtr error) +void +Nokogiri_error_raise(void *ctx, xmlErrorPtr error) { rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); } -VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error) +VALUE +Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error) { VALUE msg, e, klass; klass = cNokogiriXmlSyntaxError; if (error && error->domain == XML_FROM_XPATH) { - VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath")); - klass = rb_const_get(xpath, rb_intern("SyntaxError")); + klass = cNokogiriXmlXpathSyntaxError; } msg = (error && error->message) ? NOKOGIRI_STR_NEW2(error->message) : Qnil; e = rb_class_new_instance( - 1, - &msg, - klass - ); + 1, + &msg, + klass + ); - if (error) - { + if (error) { rb_iv_set(e, "@domain", INT2NUM(error->domain)); rb_iv_set(e, "@code", INT2NUM(error->code)); rb_iv_set(e, "@level", INT2NUM((short)error->level)); @@ -48,17 +74,12 @@ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error) return e; } -VALUE cNokogiriXmlSyntaxError; -void init_xml_syntax_error() +void +noko_init_xml_syntax_error(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - + assert(cNokogiriSyntaxError); /* * The XML::SyntaxError is raised on parse errors */ - VALUE syntax_error_mommy = rb_define_class_under(nokogiri, "SyntaxError", rb_eStandardError); - VALUE klass = rb_define_class_under(xml, "SyntaxError", syntax_error_mommy); - cNokogiriXmlSyntaxError = klass; - + cNokogiriXmlSyntaxError = rb_define_class_under(mNokogiriXml, "SyntaxError", cNokogiriSyntaxError); } diff --git a/ext/nokogiri/xml_syntax_error.h b/ext/nokogiri/xml_syntax_error.h deleted file mode 100644 index 58475cb852..0000000000 --- a/ext/nokogiri/xml_syntax_error.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef NOKOGIRI_XML_SYNTAX_ERROR -#define NOKOGIRI_XML_SYNTAX_ERROR - -#include - -void init_xml_syntax_error(); -VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error); -void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error); -NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)); - -extern VALUE cNokogiriXmlSyntaxError; -#endif - diff --git a/ext/nokogiri/xml_text.c b/ext/nokogiri/xml_text.c index 971a31e879..9c8c592fa4 100644 --- a/ext/nokogiri/xml_text.c +++ b/ext/nokogiri/xml_text.c @@ -1,4 +1,6 @@ -#include +#include + +VALUE cNokogiriXmlText ; /* * call-seq: @@ -6,7 +8,8 @@ * * Create a new Text element on the +document+ with +content+ */ -static VALUE new(int argc, VALUE *argv, VALUE klass) +static VALUE +new (int argc, VALUE *argv, VALUE klass) { xmlDocPtr doc; xmlNodePtr node; @@ -17,36 +20,29 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) rb_scan_args(argc, argv, "2*", &string, &document, &rest); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); node = xmlNewText((xmlChar *)StringValueCStr(string)); node->doc = doc->doc; - nokogiri_root_node(node); + noko_xml_document_pin_node(node); - rb_node = Nokogiri_wrap_xml_node(klass, node) ; + rb_node = noko_xml_node_wrap(klass, node) ; rb_obj_call_init(rb_node, argc, argv); - if(rb_block_given_p()) rb_yield(rb_node); + if (rb_block_given_p()) { rb_yield(rb_node); } return rb_node; } -VALUE cNokogiriXmlText ; -void init_xml_text() +void +noko_init_xml_text(void) { - VALUE nokogiri = rb_define_module("Nokogiri"); - VALUE xml = rb_define_module_under(nokogiri, "XML"); - /* */ - VALUE node = rb_define_class_under(xml, "Node", rb_cObject); - VALUE char_data = rb_define_class_under(xml, "CharacterData", node); - + assert(cNokogiriXmlCharacterData); /* * Wraps Text nodes. */ - VALUE klass = rb_define_class_under(xml, "Text", char_data); - - cNokogiriXmlText = klass; + cNokogiriXmlText = rb_define_class_under(mNokogiriXml, "Text", cNokogiriXmlCharacterData); - rb_define_singleton_method(klass, "new", new, -1); + rb_define_singleton_method(cNokogiriXmlText, "new", new, -1); } diff --git a/ext/nokogiri/xml_text.h b/ext/nokogiri/xml_text.h deleted file mode 100644 index b5dd9afafc..0000000000 --- a/ext/nokogiri/xml_text.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NOKOGIRI_XML_TEXT -#define NOKOGIRI_XML_TEXT - -#include - -void init_xml_text(); - -extern VALUE cNokogiriXmlText ; -#endif diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index cb1be24b72..4a34e65e4c 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -1,29 +1,127 @@ -#include +#include -int vasprintf (char **strp, const char *fmt, va_list ap); +VALUE cNokogiriXmlXpathContext; + +/* + * these constants have matching declarations in + * ext/java/nokogiri/internals/NokogiriNamespaceContext.java + */ +static const xmlChar *NOKOGIRI_PREFIX = (const xmlChar *)"nokogiri"; +static const xmlChar *NOKOGIRI_URI = (const xmlChar *)"http://www.nokogiri.org/default_ns/ruby/extensions_functions"; +static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin"; +static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins"; -static void deallocate(xmlXPathContextPtr ctx) +static void +xml_xpath_context_deallocate(xmlXPathContextPtr ctx) { - NOKOGIRI_DEBUG_START(ctx); xmlXPathFreeContext(ctx); - NOKOGIRI_DEBUG_END(ctx); } +/* find a CSS class in an HTML element's `class` attribute */ +static const xmlChar * +builtin_css_class(const xmlChar *str, const xmlChar *val) +{ + int val_len; + + if (str == NULL) { return (NULL); } + if (val == NULL) { return (NULL); } + + val_len = xmlStrlen(val); + if (val_len == 0) { return (str); } + + while (*str != 0) { + if ((*str == *val) && !xmlStrncmp(str, val, val_len)) { + const xmlChar *next_byte = str + val_len; + + /* only match if the next byte is whitespace or end of string */ + if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) { + return ((const xmlChar *)str); + } + } + + /* advance str to whitespace */ + while ((*str != 0) && !IS_BLANK_CH(*str)) { + str++; + } + + /* advance str to start of next word or end of string */ + while ((*str != 0) && IS_BLANK_CH(*str)) { + str++; + } + } + + return (NULL); +} + +/* xmlXPathFunction to wrap builtin_css_class() */ +static void +xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs) +{ + xmlXPathObjectPtr hay, needle; + + CHECK_ARITY(2); + + CAST_TO_STRING; + needle = valuePop(ctxt); + if ((needle == NULL) || (needle->type != XPATH_STRING)) { + xmlXPathFreeObject(needle); + XP_ERROR(XPATH_INVALID_TYPE); + } + + CAST_TO_STRING; + hay = valuePop(ctxt); + if ((hay == NULL) || (hay->type != XPATH_STRING)) { + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); + XP_ERROR(XPATH_INVALID_TYPE); + } + + if (builtin_css_class(hay->stringval, needle->stringval)) { + valuePush(ctxt, xmlXPathNewBoolean(1)); + } else { + valuePush(ctxt, xmlXPathNewBoolean(0)); + } + + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); +} + + +/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */ +static void +xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs) +{ + xmlXPathObjectPtr element_name; + + assert(ctxt->context->node); + + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + element_name = valuePop(ctxt); + + valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval))); + + xmlXPathFreeObject(element_name); +} + + /* * call-seq: * register_ns(prefix, uri) * * Register the namespace with +prefix+ and +uri+. */ -static VALUE register_ns(VALUE self, VALUE prefix, VALUE uri) +static VALUE +rb_xml_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri) { xmlXPathContextPtr ctx; Data_Get_Struct(self, xmlXPathContext, ctx); - xmlXPathRegisterNs( ctx, - (const xmlChar *)StringValueCStr(prefix), - (const xmlChar *)StringValueCStr(uri) - ); + xmlXPathRegisterNs(ctx, + (const xmlChar *)StringValueCStr(prefix), + (const xmlChar *)StringValueCStr(uri) + ); return self; } @@ -33,83 +131,103 @@ static VALUE register_ns(VALUE self, VALUE prefix, VALUE uri) * * Register the variable +name+ with +value+. */ -static VALUE register_variable(VALUE self, VALUE name, VALUE value) +static VALUE +rb_xml_xpath_context_register_variable(VALUE self, VALUE name, VALUE value) { - xmlXPathContextPtr ctx; - xmlXPathObjectPtr xmlValue; - Data_Get_Struct(self, xmlXPathContext, ctx); + xmlXPathContextPtr ctx; + xmlXPathObjectPtr xmlValue; + Data_Get_Struct(self, xmlXPathContext, ctx); - xmlValue = xmlXPathNewCString(StringValueCStr(value)); + xmlValue = xmlXPathNewCString(StringValueCStr(value)); - xmlXPathRegisterVariable( ctx, - (const xmlChar *)StringValueCStr(name), - xmlValue - ); + xmlXPathRegisterVariable(ctx, + (const xmlChar *)StringValueCStr(name), + xmlValue + ); - return self; + return self; } -void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char* function_name) + +/* + * convert an XPath object into a Ruby object of the appropriate type. + * returns Qundef if no conversion was possible. + */ +static VALUE +xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx) { - int i; - VALUE result, doc; + VALUE rb_retval; + + assert(ctx->doc); + assert(DOC_RUBY_OBJECT_TEST(ctx->doc)); + + switch (c_xpath_object->type) { + case XPATH_STRING: + rb_retval = NOKOGIRI_STR_NEW2(c_xpath_object->stringval); + xmlFree(c_xpath_object->stringval); + return rb_retval; + + case XPATH_NODESET: + return noko_xml_node_set_wrap(c_xpath_object->nodesetval, + DOC_RUBY_OBJECT(ctx->doc)); + + case XPATH_NUMBER: + return rb_float_new(c_xpath_object->floatval); + + case XPATH_BOOLEAN: + return (c_xpath_object->boolval == 1) ? Qtrue : Qfalse; + + default: + return Qundef; + } +} + +void +Nokogiri_marshal_xpath_funcall_and_return_values( + xmlXPathParserContextPtr ctx, + int argc, + VALUE rb_xpath_handler, + const char *method_name +) +{ + VALUE rb_retval; VALUE *argv; - VALUE node_set = Qnil; - xmlNodeSetPtr xml_node_set = NULL; - xmlXPathObjectPtr obj; + VALUE rb_node_set = Qnil; + xmlNodeSetPtr c_node_set = NULL; + xmlXPathObjectPtr c_xpath_object; assert(ctx->context->doc); assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc)); - argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE)); - for (i = 0 ; i < nargs ; ++i) { - rb_gc_register_address(&argv[i]); + argv = (VALUE *)ruby_xcalloc((size_t)argc, sizeof(VALUE)); + for (int j = 0 ; j < argc ; ++j) { + rb_gc_register_address(&argv[j]); } - doc = DOC_RUBY_OBJECT(ctx->context->doc); - - if (nargs > 0) { - i = nargs - 1; - do { - obj = valuePop(ctx); - switch(obj->type) { - case XPATH_STRING: - argv[i] = NOKOGIRI_STR_NEW2(obj->stringval); - break; - case XPATH_BOOLEAN: - argv[i] = obj->boolval == 1 ? Qtrue : Qfalse; - break; - case XPATH_NUMBER: - argv[i] = rb_float_new(obj->floatval); - break; - case XPATH_NODESET: - argv[i] = Nokogiri_wrap_xml_node_set(obj->nodesetval, doc); - break; - default: - argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj)); - } - xmlXPathFreeNodeSetList(obj); - } while(i-- > 0); + for (int j = argc - 1 ; j >= 0 ; --j) { + c_xpath_object = valuePop(ctx); + argv[j] = xpath2ruby(c_xpath_object, ctx->context); + if (argv[j] == Qundef) { + argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object)); + } + xmlXPathFreeNodeSetList(c_xpath_object); } - result = rb_funcall2(handler, rb_intern((const char*)function_name), nargs, argv); + rb_retval = rb_funcall2(rb_xpath_handler, rb_intern((const char *)method_name), argc, argv); - for (i = 0 ; i < nargs ; ++i) { - rb_gc_unregister_address(&argv[i]); + for (int j = 0 ; j < argc ; ++j) { + rb_gc_unregister_address(&argv[j]); } - free(argv); + ruby_xfree(argv); - switch(TYPE(result)) { + switch (TYPE(rb_retval)) { case T_FLOAT: case T_BIGNUM: case T_FIXNUM: - xmlXPathReturnNumber(ctx, NUM2DBL(result)); + xmlXPathReturnNumber(ctx, NUM2DBL(rb_retval)); break; case T_STRING: - xmlXPathReturnString( - ctx, - xmlCharStrdup(StringValueCStr(result)) - ); + xmlXPathReturnString(ctx, xmlCharStrdup(StringValueCStr(rb_retval))); break; case T_TRUE: xmlXPathReturnTrue(ctx); @@ -119,66 +237,76 @@ void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr c break; case T_NIL: break; - case T_ARRAY: - { - VALUE args[2]; - args[0] = doc; - args[1] = result; - node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet); - Data_Get_Struct(node_set, xmlNodeSet, xml_node_set); - xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set)); - } + case T_ARRAY: { + VALUE construct_args[2] = { DOC_RUBY_OBJECT(ctx->context->doc), rb_retval }; + rb_node_set = rb_class_new_instance(2, construct_args, cNokogiriXmlNodeSet); + Data_Get_Struct(rb_node_set, xmlNodeSet, c_node_set); + xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, c_node_set)); + } break; case T_DATA: - if(rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) { - Data_Get_Struct(result, xmlNodeSet, xml_node_set); + if (rb_obj_is_kind_of(rb_retval, cNokogiriXmlNodeSet)) { + Data_Get_Struct(rb_retval, xmlNodeSet, c_node_set); /* Copy the node set, otherwise it will get GC'd. */ - xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set)); + xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, c_node_set)); break; } default: rb_raise(rb_eRuntimeError, "Invalid return type"); - } + } } -static void ruby_funcall(xmlXPathParserContextPtr ctx, int nargs) +static void +method_caller(xmlXPathParserContextPtr ctx, int argc) { - VALUE handler = Qnil; - const char *function = NULL ; + VALUE rb_xpath_handler = Qnil; + const char *method_name = NULL ; assert(ctx); assert(ctx->context); assert(ctx->context->userData); assert(ctx->context->function); - handler = (VALUE)(ctx->context->userData); - function = (const char*)(ctx->context->function); + rb_xpath_handler = (VALUE)(ctx->context->userData); + method_name = (const char *)(ctx->context->function); - Nokogiri_marshal_xpath_funcall_and_return_values(ctx, nargs, handler, function); + Nokogiri_marshal_xpath_funcall_and_return_values(ctx, argc, rb_xpath_handler, method_name); } -static xmlXPathFunction lookup( void *ctx, - const xmlChar * name, - const xmlChar* ns_uri ) +static xmlXPathFunction +handler_lookup(void *ctx, const xmlChar *c_name, const xmlChar *c_ns_uri) { - VALUE xpath_handler = (VALUE)ctx; - if(rb_respond_to(xpath_handler, rb_intern((const char *)name))) - return ruby_funcall; + VALUE rb_xpath_handler = (VALUE)ctx; + if (rb_respond_to(rb_xpath_handler, rb_intern((const char *)c_name))) { + return method_caller; + } return NULL; } -NORETURN(static void xpath_generic_exception_handler(void * ctx, const char *msg, ...)); -static void xpath_generic_exception_handler(void * ctx, const char *msg, ...) +PRINTFLIKE_DECL(2, 3) +static void +generic_exception_pusher(void *ctx, const char *msg, ...) { - char * message; + VALUE rb_errors = (VALUE)ctx; + VALUE rb_message; + VALUE rb_exception; + Check_Type(rb_errors, T_ARRAY); + +#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES + /* It is not currently possible to pass var args from native + functions to sulong, so we work around the issue here. */ + rb_message = rb_sprintf("generic_exception_pusher: %s", msg); +#else va_list args; va_start(args, msg); - vasprintf(&message, msg, args); + rb_message = rb_vsprintf(msg, args); va_end(args); +#endif - rb_raise(rb_eRuntimeError, "%s", message); + rb_exception = rb_exc_new_str(cNokogiriXmlXpathSyntaxError, rb_message); + rb_ary_push(rb_errors, rb_exception); } /* @@ -187,68 +315,50 @@ static void xpath_generic_exception_handler(void * ctx, const char *msg, ...) * * Evaluate the +search_path+ returning an XML::XPath object. */ -static VALUE evaluate(int argc, VALUE *argv, VALUE self) +static VALUE +rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE self) { VALUE search_path, xpath_handler; - VALUE thing = Qnil; + VALUE retval = Qnil; xmlXPathContextPtr ctx; xmlXPathObjectPtr xpath; xmlChar *query; + VALUE errors = rb_ary_new(); Data_Get_Struct(self, xmlXPathContext, ctx); - if(rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) + if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) { xpath_handler = Qnil; + } query = (xmlChar *)StringValueCStr(search_path); - if(Qnil != xpath_handler) { + if (Qnil != xpath_handler) { /* FIXME: not sure if this is the correct place to shove private data. */ ctx->userData = (void *)xpath_handler; - xmlXPathRegisterFuncLookup(ctx, lookup, (void *)xpath_handler); + xmlXPathRegisterFuncLookup(ctx, handler_lookup, (void *)xpath_handler); } - xmlResetLastError(); - xmlSetStructuredErrorFunc(NULL, Nokogiri_error_raise); - - /* For some reason, xmlXPathEvalExpression will blow up with a generic error */ - /* when there is a non existent function. */ - xmlSetGenericErrorFunc(NULL, xpath_generic_exception_handler); + xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher); + xmlSetGenericErrorFunc((void *)errors, generic_exception_pusher); xpath = xmlXPathEvalExpression(query, ctx); + xmlSetStructuredErrorFunc(NULL, NULL); xmlSetGenericErrorFunc(NULL, NULL); - if(xpath == NULL) { - xmlErrorPtr error = xmlGetLastError(); - rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); + if (xpath == NULL) { + rb_exc_raise(rb_ary_entry(errors, 0)); } - assert(ctx->doc); - assert(DOC_RUBY_OBJECT_TEST(ctx->doc)); - - switch(xpath->type) { - case XPATH_STRING: - thing = NOKOGIRI_STR_NEW2(xpath->stringval); - xmlFree(xpath->stringval); - break; - case XPATH_NODESET: - thing = Nokogiri_wrap_xml_node_set(xpath->nodesetval, - DOC_RUBY_OBJECT(ctx->doc)); - break; - case XPATH_NUMBER: - thing = rb_float_new(xpath->floatval); - break; - case XPATH_BOOLEAN: - thing = xpath->boolval == 1 ? Qtrue : Qfalse; - break; - default: - thing = Nokogiri_wrap_xml_node_set(NULL, DOC_RUBY_OBJECT(ctx->doc)); + retval = xpath2ruby(xpath, ctx); + if (retval == Qundef) { + retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc)); } xmlXPathFreeNodeSetList(xpath); - return thing; + return retval; } /* @@ -257,42 +367,47 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self) * * Create a new XPathContext with +node+ as the reference point. */ -static VALUE new(VALUE klass, VALUE nodeobj) +static VALUE +rb_xml_xpath_context_new(VALUE klass, VALUE nodeobj) { xmlNodePtr node; xmlXPathContextPtr ctx; VALUE self; - xmlXPathInit(); + Noko_Node_Get_Struct(nodeobj, xmlNode, node); - Data_Get_Struct(nodeobj, xmlNode, node); +#if LIBXML_VERSION < 21000 + /* deprecated in 40483d0 */ + xmlXPathInit(); +#endif ctx = xmlXPathNewContext(node->doc); ctx->node = node; - self = Data_Wrap_Struct(klass, 0, deallocate, ctx); - /*rb_iv_set(self, "@xpath_handler", Qnil); */ + + xmlXPathRegisterNs(ctx, NOKOGIRI_PREFIX, NOKOGIRI_URI); + xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI); + xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI, + xpath_builtin_css_class); + xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI, + xpath_builtin_local_name_is); + + self = Data_Wrap_Struct(klass, 0, xml_xpath_context_deallocate, ctx); return self; } -VALUE cNokogiriXmlXpathContext; -void init_xml_xpath_context(void) +void +noko_init_xml_xpath_context(void) { - VALUE module = rb_define_module("Nokogiri"); - - /* - * Nokogiri::XML - */ - VALUE xml = rb_define_module_under(module, "XML"); - /* * XPathContext is the entry point for searching a Document by using XPath. */ - VALUE klass = rb_define_class_under(xml, "XPathContext", rb_cObject); + cNokogiriXmlXpathContext = rb_define_class_under(mNokogiriXml, "XPathContext", rb_cObject); + + rb_undef_alloc_func(cNokogiriXmlXpathContext); - cNokogiriXmlXpathContext = klass; + rb_define_singleton_method(cNokogiriXmlXpathContext, "new", rb_xml_xpath_context_new, 1); - rb_define_singleton_method(klass, "new", new, 1); - rb_define_method(klass, "evaluate", evaluate, -1); - rb_define_method(klass, "register_variable", register_variable, 2); - rb_define_method(klass, "register_ns", register_ns, 2); + rb_define_method(cNokogiriXmlXpathContext, "evaluate", rb_xml_xpath_context_evaluate, -1); + rb_define_method(cNokogiriXmlXpathContext, "register_variable", rb_xml_xpath_context_register_variable, 2); + rb_define_method(cNokogiriXmlXpathContext, "register_ns", rb_xml_xpath_context_register_ns, 2); } diff --git a/ext/nokogiri/xml_xpath_context.h b/ext/nokogiri/xml_xpath_context.h deleted file mode 100644 index a2cf1f31a2..0000000000 --- a/ext/nokogiri/xml_xpath_context.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NOKOGIRI_XML_XPATH_CONTEXT -#define NOKOGIRI_XML_XPATH_CONTEXT - -#include - -void init_xml_xpath_context(); -void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char* function_name) ; - -extern VALUE cNokogiriXmlXpathContext; -#endif diff --git a/ext/nokogiri/xslt_stylesheet.c b/ext/nokogiri/xslt_stylesheet.c index e98302eb07..fef9fafa65 100644 --- a/ext/nokogiri/xslt_stylesheet.c +++ b/ext/nokogiri/xslt_stylesheet.c @@ -1,46 +1,43 @@ -#include +#include -#include -#include -#include -#include - -VALUE xslt; - -int vasprintf (char **strp, const char *fmt, va_list ap); -void vasprintf_free (void *p); +VALUE cNokogiriXsltStylesheet ; -static void mark(nokogiriXsltStylesheetTuple *wrapper) +static void +mark(nokogiriXsltStylesheetTuple *wrapper) { rb_gc_mark(wrapper->func_instances); } -static void dealloc(nokogiriXsltStylesheetTuple *wrapper) +static void +dealloc(nokogiriXsltStylesheetTuple *wrapper) { - xsltStylesheetPtr doc = wrapper->ss; - - NOKOGIRI_DEBUG_START(doc); - xsltFreeStylesheet(doc); /* commented out for now. */ - NOKOGIRI_DEBUG_END(doc); - - free(wrapper); + xsltStylesheetPtr doc = wrapper->ss; + xsltFreeStylesheet(doc); + ruby_xfree(wrapper); } -static void xslt_generic_error_handler(void * ctx, const char *msg, ...) +PRINTFLIKE_DECL(2, 3) +static void +xslt_generic_error_handler(void *ctx, const char *msg, ...) { - char * message; + VALUE message; +#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES + /* It is not currently possible to pass var args from native + functions to sulong, so we work around the issue here. */ + message = rb_sprintf("xslt_generic_error_handler: %s", msg); +#else va_list args; va_start(args, msg); - vasprintf(&message, msg, args); + message = rb_vsprintf(msg, args); va_end(args); +#endif - rb_str_cat2((VALUE)ctx, message); - - vasprintf_free(message); + rb_str_concat((VALUE)ctx, message); } -VALUE Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss) +VALUE +Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss) { VALUE self; nokogiriXsltStylesheetTuple *wrapper; @@ -61,29 +58,29 @@ VALUE Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss) * * Parse a stylesheet from +document+. */ -static VALUE parse_stylesheet_doc(VALUE klass, VALUE xmldocobj) +static VALUE +parse_stylesheet_doc(VALUE klass, VALUE xmldocobj) { - xmlDocPtr xml, xml_cpy; - VALUE errstr, exception; - xsltStylesheetPtr ss ; - Data_Get_Struct(xmldocobj, xmlDoc, xml); - exsltRegisterAll(); + xmlDocPtr xml, xml_cpy; + VALUE errstr, exception; + xsltStylesheetPtr ss ; + Data_Get_Struct(xmldocobj, xmlDoc, xml); - errstr = rb_str_new(0, 0); - xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler); + errstr = rb_str_new(0, 0); + xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler); - xml_cpy = xmlCopyDoc(xml, 1); /* 1 => recursive */ - ss = xsltParseStylesheetDoc(xml_cpy); + xml_cpy = xmlCopyDoc(xml, 1); /* 1 => recursive */ + ss = xsltParseStylesheetDoc(xml_cpy); - xsltSetGenericErrorFunc(NULL, NULL); + xsltSetGenericErrorFunc(NULL, NULL); - if (!ss) { - xmlFreeDoc(xml_cpy); - exception = rb_exc_new3(rb_eRuntimeError, errstr); - rb_exc_raise(exception); - } + if (!ss) { + xmlFreeDoc(xml_cpy); + exception = rb_exc_new3(rb_eRuntimeError, errstr); + rb_exc_raise(exception); + } - return Nokogiri_wrap_xslt_stylesheet(ss); + return Nokogiri_wrap_xslt_stylesheet(ss); } @@ -93,144 +90,244 @@ static VALUE parse_stylesheet_doc(VALUE klass, VALUE xmldocobj) * * Serialize +document+ to an xml string. */ -static VALUE serialize(VALUE self, VALUE xmlobj) -{ - xmlDocPtr xml ; - nokogiriXsltStylesheetTuple *wrapper; - xmlChar* doc_ptr ; - int doc_len ; - VALUE rval ; - - Data_Get_Struct(xmlobj, xmlDoc, xml); - Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper); - xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss); - rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len); - xmlFree(doc_ptr); - return rval ; -} - -static void swallow_superfluous_xml_errors(void * userdata, xmlErrorPtr error, ...) +static VALUE +serialize(VALUE self, VALUE xmlobj) { + xmlDocPtr xml ; + nokogiriXsltStylesheetTuple *wrapper; + xmlChar *doc_ptr ; + int doc_len ; + VALUE rval ; + + Data_Get_Struct(xmlobj, xmlDoc, xml); + Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper); + xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss); + rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len); + xmlFree(doc_ptr); + return rval ; } /* - * call-seq: - * transform(document, params = []) + * call-seq: + * transform(document) + * transform(document, params = {}) + * + * Apply an XSLT stylesheet to an XML::Document. + * + * [Parameters] + * - +document+ (Nokogiri::XML::Document) the document to be transformed. + * - +params+ (Hash, Array) strings used as XSLT parameters. + * + * [Returns] Nokogiri::XML::Document + * + * *Example* of basic transformation: + * + * xslt = <<~XSLT + * + * + * + * + * + * + * + *

    + *
      + * + *
    1. + *
      + *
    + * + * + *
    + * XSLT + * + * xml = <<~XML + * + * + * + * EMP0001 + * Accountant + * + * + * EMP0002 + * Developer + * + * + * XML * - * Apply an XSLT stylesheet to an XML::Document. - * +params+ is an array of strings used as XSLT parameters. - * returns Nokogiri::XML::Document + * doc = Nokogiri::XML::Document.parse(xml) + * stylesheet = Nokogiri::XSLT.parse(xslt) * - * Example: + * ⚠ Note that the +h1+ element is empty because no param has been provided! * - * doc = Nokogiri::XML(File.read(ARGV[0])) - * xslt = Nokogiri::XSLT(File.read(ARGV[1])) - * puts xslt.transform(doc, ['key', 'value']) + * stylesheet.transform(doc).to_xml + * # => "\n" + + * # "

    \n" + + * # "
      \n" + + * # "
    1. EMP0001
    2. \n" + + * # "
    3. EMP0002
    4. \n" + + * # "
    \n" + + * # "\n" * + * *Example* of using an input parameter hash: + * + * ⚠ The title is populated, but note how we need to quote-escape the value. + * + * stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml + * # => "\n" + + * # "

    Employee List

    \n" + + * # "
      \n" + + * # "
    1. EMP0001
    2. \n" + + * # "
    3. EMP0002
    4. \n" + + * # "
    \n" + + * # "\n" + * + * *Example* using the XSLT.quote_params helper method to safely quote-escape strings: + * + * stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml + * # => "\n" + + * # "

    Aaron's List

    \n" + + * # "
      \n" + + * # "
    1. EMP0001
    2. \n" + + * # "
    3. EMP0002
    4. \n" + + * # "
    \n" + + * # "\n" + * + * *Example* using an array of XSLT parameters + * + * You can also use an array if you want to. + * + * stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml + * # => "\n" + + * # "

    Employee List

    \n" + + * # "
      \n" + + * # "
    1. EMP0001
    2. \n" + + * # "
    3. EMP0002
    4. \n" + + * # "
    \n" + + * # "\n" + * + * Or pass an array to XSLT.quote_params: + * + * stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml + * # => "\n" + + * # "

    Aaron's List

    \n" + + * # "
      \n" + + * # "
    1. EMP0001
    2. \n" + + * # "
    3. EMP0002
    4. \n" + + * # "
    \n" + + * # "\n" + * + * See: Nokogiri::XSLT.quote_params */ -static VALUE transform(int argc, VALUE* argv, VALUE self) +static VALUE +transform(int argc, VALUE *argv, VALUE self) { - VALUE xmldoc, paramobj, errstr, exception ; - xmlDocPtr xml ; - xmlDocPtr result ; - nokogiriXsltStylesheetTuple *wrapper; - const char** params ; - long param_len, j ; - int parse_error_occurred ; - - rb_scan_args(argc, argv, "11", &xmldoc, ¶mobj); - if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; } - if (!rb_obj_is_kind_of(xmldoc, cNokogiriXmlDocument)) - rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document"); - - /* handle hashes as arguments. */ - if(T_HASH == TYPE(paramobj)) { - paramobj = rb_funcall(paramobj, rb_intern("to_a"), 0); - paramobj = rb_funcall(paramobj, rb_intern("flatten"), 0); - } - - Check_Type(paramobj, T_ARRAY); - - Data_Get_Struct(xmldoc, xmlDoc, xml); - Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper); - - param_len = RARRAY_LEN(paramobj); - params = calloc((size_t)param_len+1, sizeof(char*)); - for (j = 0 ; j < param_len ; j++) { - VALUE entry = rb_ary_entry(paramobj, j); - const char * ptr = StringValueCStr(entry); - params[j] = ptr; - } - params[param_len] = 0 ; - - errstr = rb_str_new(0, 0); - xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler); - xmlSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler); - - result = xsltApplyStylesheet(wrapper->ss, xml, params); - free(params); - - xsltSetGenericErrorFunc(NULL, NULL); - xmlSetGenericErrorFunc(NULL, NULL); - - parse_error_occurred = (Qfalse == rb_funcall(errstr, rb_intern("empty?"), 0)); - - if (parse_error_occurred) { - exception = rb_exc_new3(rb_eRuntimeError, errstr); - rb_exc_raise(exception); - } - - return Nokogiri_wrap_xml_document((VALUE)0, result) ; + VALUE xmldoc, paramobj, errstr, exception ; + xmlDocPtr xml ; + xmlDocPtr result ; + nokogiriXsltStylesheetTuple *wrapper; + const char **params ; + long param_len, j ; + int parse_error_occurred ; + + rb_scan_args(argc, argv, "11", &xmldoc, ¶mobj); + if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; } + if (!rb_obj_is_kind_of(xmldoc, cNokogiriXmlDocument)) { + rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document"); + } + + /* handle hashes as arguments. */ + if (T_HASH == TYPE(paramobj)) { + paramobj = rb_funcall(paramobj, rb_intern("to_a"), 0); + paramobj = rb_funcall(paramobj, rb_intern("flatten"), 0); + } + + Check_Type(paramobj, T_ARRAY); + + Data_Get_Struct(xmldoc, xmlDoc, xml); + Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper); + + param_len = RARRAY_LEN(paramobj); + params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *)); + for (j = 0 ; j < param_len ; j++) { + VALUE entry = rb_ary_entry(paramobj, j); + const char *ptr = StringValueCStr(entry); + params[j] = ptr; + } + params[param_len] = 0 ; + + errstr = rb_str_new(0, 0); + xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler); + xmlSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler); + + result = xsltApplyStylesheet(wrapper->ss, xml, params); + ruby_xfree(params); + + xsltSetGenericErrorFunc(NULL, NULL); + xmlSetGenericErrorFunc(NULL, NULL); + + parse_error_occurred = (Qfalse == rb_funcall(errstr, rb_intern("empty?"), 0)); + + if (parse_error_occurred) { + exception = rb_exc_new3(rb_eRuntimeError, errstr); + rb_exc_raise(exception); + } + + return noko_xml_document_wrap((VALUE)0, result) ; } -static void method_caller(xmlXPathParserContextPtr ctxt, int nargs) +static void +method_caller(xmlXPathParserContextPtr ctxt, int nargs) { - VALUE handler; - const char *function_name; - xsltTransformContextPtr transform; - const xmlChar *functionURI; + VALUE handler; + const char *function_name; + xsltTransformContextPtr transform; + const xmlChar *functionURI; - transform = xsltXPathGetTransformContext(ctxt); - functionURI = ctxt->context->functionURI; - handler = (VALUE)xsltGetExtData(transform, functionURI); - function_name = (const char*)(ctxt->context->function); + transform = xsltXPathGetTransformContext(ctxt); + functionURI = ctxt->context->functionURI; + handler = (VALUE)xsltGetExtData(transform, functionURI); + function_name = (const char *)(ctxt->context->function); - Nokogiri_marshal_xpath_funcall_and_return_values(ctxt, nargs, handler, (const char*)function_name); + Nokogiri_marshal_xpath_funcall_and_return_values(ctxt, nargs, handler, (const char *)function_name); } -static void * initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri) +static void * +initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri) { - VALUE modules = rb_iv_get(xslt, "@modules"); - VALUE obj = rb_hash_aref(modules, rb_str_new2((const char *)uri)); - VALUE args = { Qfalse }; - VALUE methods = rb_funcall(obj, rb_intern("instance_methods"), 1, args); - VALUE inst; - nokogiriXsltStylesheetTuple *wrapper; - int i; - - for(i = 0; i < RARRAY_LEN(methods); i++) { - VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i)); - xsltRegisterExtFunction(ctxt, - (unsigned char *)StringValueCStr(method_name), uri, method_caller); - } - - Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple, - wrapper); - inst = rb_class_new_instance(0, NULL, obj); - rb_ary_push(wrapper->func_instances, inst); - - return (void *)inst; + VALUE modules = rb_iv_get(mNokogiriXslt, "@modules"); + VALUE obj = rb_hash_aref(modules, rb_str_new2((const char *)uri)); + VALUE args = { Qfalse }; + VALUE methods = rb_funcall(obj, rb_intern("instance_methods"), 1, args); + VALUE inst; + nokogiriXsltStylesheetTuple *wrapper; + int i; + + for (i = 0; i < RARRAY_LEN(methods); i++) { + VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i)); + xsltRegisterExtFunction(ctxt, + (unsigned char *)StringValueCStr(method_name), uri, method_caller); + } + + Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple, + wrapper); + inst = rb_class_new_instance(0, NULL, obj); + rb_ary_push(wrapper->func_instances, inst); + + return (void *)inst; } -static void shutdownFunc(xsltTransformContextPtr ctxt, - const xmlChar *uri, void *data) +static void +shutdownFunc(xsltTransformContextPtr ctxt, + const xmlChar *uri, void *data) { - nokogiriXsltStylesheetTuple *wrapper; + nokogiriXsltStylesheetTuple *wrapper; - Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple, - wrapper); + Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple, + wrapper); - rb_ary_clear(wrapper->func_instances); + rb_ary_clear(wrapper->func_instances); } /* @@ -239,32 +336,28 @@ static void shutdownFunc(xsltTransformContextPtr ctxt, * * Register a class that implements custom XSLT transformation functions. */ -static VALUE registr(VALUE self, VALUE uri, VALUE obj) +static VALUE +registr(VALUE self, VALUE uri, VALUE obj) { - VALUE modules = rb_iv_get(self, "@modules"); - if(NIL_P(modules)) rb_raise(rb_eRuntimeError, "wtf! @modules isn't set"); + VALUE modules = rb_iv_get(self, "@modules"); + if (NIL_P(modules)) { rb_raise(rb_eRuntimeError, "wtf! @modules isn't set"); } - rb_hash_aset(modules, uri, obj); - xsltRegisterExtModule((unsigned char *)StringValueCStr(uri), initFunc, shutdownFunc); - return self; + rb_hash_aset(modules, uri, obj); + xsltRegisterExtModule((unsigned char *)StringValueCStr(uri), initFunc, shutdownFunc); + return self; } -VALUE cNokogiriXsltStylesheet ; -void init_xslt_stylesheet() +void +noko_init_xslt_stylesheet(void) { - VALUE nokogiri; - VALUE klass; - - nokogiri = rb_define_module("Nokogiri"); - xslt = rb_define_module_under(nokogiri, "XSLT"); - klass = rb_define_class_under(xslt, "Stylesheet", rb_cObject); + rb_define_singleton_method(mNokogiriXslt, "register", registr, 2); + rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new()); - rb_iv_set(xslt, "@modules", rb_hash_new()); + cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject); - cNokogiriXsltStylesheet = klass; + rb_undef_alloc_func(cNokogiriXsltStylesheet); - rb_define_singleton_method(klass, "parse_stylesheet_doc", parse_stylesheet_doc, 1); - rb_define_singleton_method(xslt, "register", registr, 2); - rb_define_method(klass, "serialize", serialize, 1); - rb_define_method(klass, "transform", transform, -1); + rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1); + rb_define_method(cNokogiriXsltStylesheet, "serialize", serialize, 1); + rb_define_method(cNokogiriXsltStylesheet, "transform", transform, -1); } diff --git a/ext/nokogiri/xslt_stylesheet.h b/ext/nokogiri/xslt_stylesheet.h deleted file mode 100644 index 95a14e0591..0000000000 --- a/ext/nokogiri/xslt_stylesheet.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef NOKOGIRI_XSLT_STYLESHEET -#define NOKOGIRI_XSLT_STYLESHEET - -#include - -void init_xslt_stylesheet(); - -extern VALUE cNokogiriXsltStylesheet ; - -typedef struct _nokogiriXsltStylesheetTuple { - xsltStylesheetPtr ss; - VALUE func_instances; -} nokogiriXsltStylesheetTuple; -#endif diff --git a/gumbo-parser/.gitignore b/gumbo-parser/.gitignore new file mode 100644 index 0000000000..3d04bd296b --- /dev/null +++ b/gumbo-parser/.gitignore @@ -0,0 +1,3 @@ +build +googletest +src/*.o diff --git a/gumbo-parser/CHANGES.md b/gumbo-parser/CHANGES.md new file mode 100644 index 0000000000..277b3a2bbe --- /dev/null +++ b/gumbo-parser/CHANGES.md @@ -0,0 +1,63 @@ +## Gumbo 0.10.1 (2015-04-30) + +Same as 0.10.0, but with the version number bumped because the last version-number commit to v0.9.4 makes GitHub think that v0.9.4 is the latest version and so it's not highlighted on the webpage. + +## Gumbo 0.10.0 (2015-04-30) + +* Full support for `