diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..7c2ced0d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,163 @@ +# This file was automatically generated by sbt-github-actions using the +# githubWorkflowGenerate task. You should add and commit this file to +# your git repository. It goes without saying that you shouldn't edit +# this file by hand! Instead, if you wish to make changes, you should +# change your sbt build configuration to revise the workflow description +# to meet your needs, then regenerate this file. + +name: Continuous Integration + +on: + pull_request: + branches: [master, backport/v*] + types: [opened, reopened, synchronize, ready_for_review] + push: + branches: [master, backport/v*] + +env: + SBT: ./sbt + JABBA_INDEX: 'https://github.com/1Jo1/jabba/raw/support-graalvm-java-8-and-11/index.json' + REPO_SLUG: ${{ github.repository }} + ENCRYPTION_PASSWORD: ${{ secrets.ENCRYPTION_PASSWORD }} + GITHUB_ACTOR: precog-bot + GITHUB_TOKEN: ${{ secrets.PRECOG_GITHUB_TOKEN }} + +jobs: + build: + name: Build and Test + if: '!(github.event_name == ''pull_request'' && github.event.pull_request.draft)' + strategy: + matrix: + os: [ubuntu-latest] + scala: [2.12.10] + java: [adopt@1.8, graalvm8@20.1.0] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout current branch (full) + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Setup Java and Scala + uses: olafurpg/setup-scala@v10 + with: + java-version: ${{ matrix.java }} + + - name: Cache sbt + uses: actions/cache@v2 + with: + path: | + ~/.sbt + ~/.ivy2/cache + ~/.coursier/cache/v1 + ~/.cache/coursier/v1 + ~/AppData/Local/Coursier/Cache/v1 + ~/Library/Caches/Coursier/v1 + key: ${{ runner.os }}-sbt-cache-v2-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/build.properties') }}-${{ hashFiles('.versions.json') }} + + - name: Common sbt setup + if: env.ENCRYPTION_PASSWORD != null + run: $SBT ++${{ matrix.scala }} transferCommonResources exportSecretsForActions + + - name: Decode testCredentials + run: base64 -d testCredentials.json.b64 > testCredentials.json + + - name: Check that workflows are up to date + run: $SBT ++${{ matrix.scala }} githubWorkflowCheck + + - run: $SBT ++${{ matrix.scala }} ci + + - name: Compress target directories + run: tar cf targets.tar target datasource/target project/target + + - name: Upload target directories + uses: actions/upload-artifact@v2 + with: + name: target-${{ matrix.os }}-${{ matrix.scala }}-${{ matrix.java }} + path: targets.tar + + publish: + name: Publish Artifacts + needs: [build] + if: github.event_name != 'pull_request' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/backport/v') || github.ref == 'refs/heads/master') + strategy: + matrix: + os: [ubuntu-latest] + scala: [2.12.10] + java: [adopt@1.8] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout current branch (full) + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Setup Java and Scala + uses: olafurpg/setup-scala@v10 + with: + java-version: ${{ matrix.java }} + + - name: Cache sbt + uses: actions/cache@v2 + with: + path: | + ~/.sbt + ~/.ivy2/cache + ~/.coursier/cache/v1 + ~/.cache/coursier/v1 + ~/AppData/Local/Coursier/Cache/v1 + ~/Library/Caches/Coursier/v1 + key: ${{ runner.os }}-sbt-cache-v2-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/build.properties') }}-${{ hashFiles('.versions.json') }} + + - name: Download target directories (2.12.10) + uses: actions/download-artifact@v2 + with: + name: target-${{ matrix.os }}-2.12.10-${{ matrix.java }} + + - name: Inflate target directories (2.12.10) + run: | + tar xf targets.tar + rm targets.tar + + - name: Common sbt setup + run: $SBT ++${{ matrix.scala }} transferCommonResources transferPublishAndTagResources exportSecretsForActions + + - run: ./scripts/commonSetup + + - name: Publish artifacts and create tag + run: ./scripts/publishAndTag ${{ github.repository }} + + auto-merge: + name: Auto Merge + needs: [build] + if: 'github.event_name == ''pull_request'' && contains(github.head_ref, ''version-bump'') && contains(github.event.pull_request.labels.*.name, ''version: revision'')' + strategy: + matrix: + os: [ubuntu-latest] + scala: [2.12.10] + java: [adopt@1.8] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout current branch (fast) + uses: actions/checkout@v2 + + - name: Setup Java and Scala + uses: olafurpg/setup-scala@v10 + with: + java-version: ${{ matrix.java }} + + - name: Common sbt setup + run: $SBT ++${{ matrix.scala }} transferCommonResources exportSecretsForActions + + - name: Fetch the latest sdmerge + run: | + curl -L https://github.com/precog/devtools/raw/master/bin/sdmerge > /tmp/sdmerge + chmod +x /tmp/sdmerge + + - name: Self-merge + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + run: | + git config --global user.email "bot@precog.com" + git config --global user.name "Precog Bot" + /tmp/sdmerge $GITHUB_REPOSITORY $PR_NUMBER \ No newline at end of file diff --git a/.github/workflows/clean.yml b/.github/workflows/clean.yml new file mode 100644 index 00000000..b535fcc1 --- /dev/null +++ b/.github/workflows/clean.yml @@ -0,0 +1,59 @@ +# This file was automatically generated by sbt-github-actions using the +# githubWorkflowGenerate task. You should add and commit this file to +# your git repository. It goes without saying that you shouldn't edit +# this file by hand! Instead, if you wish to make changes, you should +# change your sbt build configuration to revise the workflow description +# to meet your needs, then regenerate this file. + +name: Clean + +on: push + +jobs: + delete-artifacts: + name: Delete Artifacts + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Delete artifacts + run: | + # Customize those three lines with your repository and credentials: + REPO=${GITHUB_API_URL}/repos/${{ github.repository }} + + # A shortcut to call GitHub API. + ghapi() { curl --silent --location --user _:$GITHUB_TOKEN "$@"; } + + # A temporary file which receives HTTP response headers. + TMPFILE=/tmp/tmp.$$ + + # An associative array, key: artifact name, value: number of artifacts of that name. + declare -A ARTCOUNT + + # Process all artifacts on this repository, loop on returned "pages". + URL=$REPO/actions/artifacts + while [[ -n "$URL" ]]; do + + # Get current page, get response headers in a temporary file. + JSON=$(ghapi --dump-header $TMPFILE "$URL") + + # Get URL of next page. Will be empty if we are at the last page. + URL=$(grep '^Link:' "$TMPFILE" | tr ',' '\n' | grep 'rel="next"' | head -1 | sed -e 's/.*.*//') + rm -f $TMPFILE + + # Number of artifacts on this page: + COUNT=$(( $(jq <<<$JSON -r '.artifacts | length') )) + + # Loop on all artifacts on this page. + for ((i=0; $i < $COUNT; i++)); do + + # Get name of artifact and count instances of this name. + name=$(jq <<<$JSON -r ".artifacts[$i].name?") + ARTCOUNT[$name]=$(( $(( ${ARTCOUNT[$name]} )) + 1)) + + id=$(jq <<<$JSON -r ".artifacts[$i].id?") + size=$(( $(jq <<<$JSON -r ".artifacts[$i].size_in_bytes?") )) + printf "Deleting '%s' #%d, %'d bytes\n" $name ${ARTCOUNT[$name]} $size + ghapi -X DELETE $REPO/actions/artifacts/$id + done + done \ No newline at end of file diff --git a/.gitignore b/.gitignore index 698aeefa..e94fc51d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,19 @@ tmp *.sw* tags testCredentials.json + +# sbt-slamdata common resources +/scripts/bumpDependentProject +/scripts/checkAndAutoMerge +/scripts/commonSetup +/credentials.bintray.enc +/credentials.sonatype.enc +/scripts/discordTravisPost +/scripts/isRevision +/scripts/listLabels +/scripts/closePR +/pgppassphrase.sbt.enc +/scripts/publishAndTag +/pubring.pgp.enc +/scripts/readVersion +/secring.pgp.enc diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f8980090..00000000 --- a/.travis.yml +++ /dev/null @@ -1,57 +0,0 @@ -language: scala -scala: 2.12.4 -jdk: oraclejdk8 -dist: trusty -sudo: false - -# Just protecting against automated scrapers -before_script: - - base64 -d testCredentials.json.b64 > testCredentials.json - -script: - - sbt test datasource/assembly assembleDatasource - -branches: - except: - - /^v\d+\.\d+\.\d+$/ # don't redundantly build tags - -cache: - directories: - - $HOME/.coursier/cache - - $HOME/.ivy2/cache - - $HOME/.sbt - -before_cache: - - find "$HOME/.sbt/" -name '*.lock' -print0 | xargs -0 rm - - find "$HOME/.ivy2/" -name 'ivydata-*.properties' -print0 | xargs -0 rm - -env: - global: - - SBT=./sbt - # ENCRYPTION_PASSWORD - - secure: "gcVd+nxNvnls9pZbLUvmLDWqU9t5/34kfDFQOv8iOj3QPiS4FYPPoA/N66g/YgTOuA/yeqZoJwQw8HfJwBqc+fjBczQz37taC5wYAjnNLJYzE6iF2dbYl15g8lad2q68rvFSOcbdLMB73fDNBuZN1WDq2Go6RSxZsYZXgbXoZKmGtgod4Qs/ERPkTt5mvy5nplS/mYVWWj7fbHt9dB2PErCdxB+Gb+txlcBUNbI5MhIcfjsnlKUudgGCkTjtid9HOw/kkY+o7MVBMjIRPVJRKNK2pJRQA+SWJf1tWHk3vhrqzgrZMIrbkbaTQjxPn7TuXItWlKcd7eONe9ByPwpGml/mZvfq3BFegqCezeWaDNiKqyjxuilCUNV5C6WNOiAbWvpkkt79hhqXrA9uAxk6IeGW4e6LjNhWffZsUBd4pX1MocUKX7c+PM0oBvn9H9SJgVl7tWlnCxoGQzpbU7HT7/rmFldofbLz3fRNRmEx6XzTNV3LI3472oQxH8yanyBRUJmXwdxvm+/EL39KcOF3qwevbVCTvn7VkSEYaujOPNvt8mEpw/Yk2cf5/ISG3tA1mrzihQ9PV/NBfbxkek2B1i1Sf5ImMpL7PrsoogbCbD4NbKNh0CWvIR9/EJrRpKkZOLKUF+qaeH81tH/QMl9Z9b9zIJJ1NfKTYCUG2X34srY=" - # GITHUB_TOKEN - - secure: "UP8462/zizkkV6svHsXJyJHhQpm8d/3H0nkRH1b6/Jf63woaZ0CwByA96JJEB/Co+P8b4ekaNqfwQVyrUqlP0r5oba4pbZPhJtXnP0TTtmv5dOd+MXGbw65di6FufXwsUFP03fFGJgeazX+QVxJfbq3Oq5idaSF1B+ee9P62Mzt9bZBvK5YlwhTyqHd9ZnZYoIAPZlWfJCNCu+YY/pe0tim0k9blTAUnxWGw6HvZYYdEg/38RJuvehr8LZ71S5b56mjI841hfdI3L56xTXvYunjyvkgFsFzk+JpKslumPRmWxJnAmPTxQsYnTYtE+u41f/apULbJZVNgUPOJo/3b8UlhjYCnD7gWOJ3PZ6B160hz7xSVwvJZDgQAd4IgDmZ+DUai4kkdPgS+9rAKT95V8j5oe/uMHgxCBRUCKx2ODhseX/6fbbn3Q7hMyjbyYT29XgSjDioeWjADpBVWWSlpVzlUf9Z6aLYpTVZqhyp1pidLzLBm6Fu9BX0n7gxWK6rP55qhQm8GrmhGyCGAY7OEyyIoTokEX7A8v8UsE++R2zhd+sPeLLibkO2ob1a+yj6t9DGahG9I34bExo5+tb3hsvA2ofesNatry4YFQCzIja+m7i4z54XL8VwQnhxjTStxurq3rj6ls/XmnSqo9D3PqO9A/VC41KNIY9NtwBt4Z00=" - # GITHUB_ACCESS_TOKEN - - secure: "kLgfd9j3H8Ed8Odel0k52M0L1w0RMRUybHO2akPZH74g0kW15IP4qoFXgA1AJnNTVyj0R1QJYg2fxSBeDCyadkf3af76fDyVFBdpT98Ahb5qhq815Ow+howPHyuWhGmDxuI/SuReFDoztK1d9gzY5lqjg+WyCm0A65l/PNXUQaLwAV2M9bFXgYYrbXJLSBdhK2mVLLJVW6B/gTLr3eG2pt8cgscTtP6qSx31q1xZYi7Ffzt/NFq5s3q6a3ckCjqptgwoyDKw0J1rn/mDdAHGrcLRht6S9vU/ChkABpVta9xQNZuORYoDod3aAgEGBxzR/uklBo6XnqU330SEDSBtH9Rt0M6Fr83CARPe920pDXOKZ1oY1w/tZsBH397PAWBX6YpMKq9UrkG9Yv5c6mSdpMhq9gkUky9WzZvS12yXbPomSugh8caxuaQTb5WR0KdBlFB4MOVduEac7DLVDzxGCcSRbB2ORE4oNjm7oDSQmmYWHuvlCqp0ZLpVxaflNFusWBRbvc6Mmkstf0m1yvV/TvV3FMx7Z3SujrA+xXDKRoVrsCP1dz/c1jN4gQ+YQ0L2LmT8OFU7RcL3GwgS4Akla4x4dbk7BlqrXtiL1I4YcTpMRuqDsnr7IX1fgXozagYEhRUMFW+Wl6K3IKYM3uwJL2Z8Ks1oOk1ln9FPavTIGCk=" - -deploy: - provider: releases - api_key: - secure: "W7dalSaVSxIveCTs6H+gZjETdrp26MJr5/q5uncg2R3uEJ3WteejYahLih9DoYAekyegJznxF3pyc9Jha+j9n77BaFVzhYJEOaz8C1tBaw8oxfts3yohfAluxZ/oZ7sweCiIoFUv0Txb3aCPanwOj9u1mL/7ogFfyuN3O74HxPmeKTczjLVZklHnnqp/1gtdkrshKVuwpprwlq8gdZbZ9pOr9Lrm0AjcKjMXk0gze8pfBC5gDQZTdHJobxBsIQq+uwpQegEwc4hwzAtWKjzCxRuuQM63IoANo+f1PEAlbEPNyku02cjqNikVVxKSSEiNgn2MdS9/8/PRnCOi4ubzYqRIeiipynlcjcyfj/DeAL277+9IZwpl8e1TW0emrwkZXrK+P++i9LAWVByOa2rkJTj0iVfJanR1NKbehUeUo73YKE+eRmwpuP2KtvnKxgYTOuNYwbmYHVlcVc3zsJTCQqbe6cwhOmDOAve4knp7ceELRdBQ5nPaq93VIOWQMjd9PwuH/VJaVczS1aTJZHF//un8BYqlFbGHjGR5hB1eylSLlZDY2otzM6SdpsnNaC4qv+jVRD1GrfNiOoUVCBnbSQrr5lvr0i/BkRViQrluf6cWI0OnekY2os1IOewJcAjqwWCT6qKxj2LSVrDPxlgXobfaaZC5dZioKlii84GW7lg=" - file_glob: false - file: - - .targets/datasource/scala-2.12/quasar-s3-$VERSION-explode.tar.gz - - .targets/datasource/scala-2.12/quasar-s3-assembly-$VERSION.jar - skip_cleanup: true - script: - - ./scripts/version - on: - tags: false - #all_branches: true - # Omit tagged builds and publish on master and backport/* - #condition: $TRAVIS_TAG = '' && ($TRAVIS_BRANCH == "master" || $TRAVIS_BRANCH == backport/*) - - -before_deploy: - - scripts/lwcPublishAndTag diff --git a/.versions.json b/.versions.json new file mode 100644 index 00000000..e91ccb79 --- /dev/null +++ b/.versions.json @@ -0,0 +1,3 @@ +{ + "precog-quasar": "214.1.4" +} \ No newline at end of file diff --git a/CLA.md b/CLA.md deleted file mode 100644 index f6990e48..00000000 --- a/CLA.md +++ /dev/null @@ -1,85 +0,0 @@ -# Quasar Contributor License Agreement - -Thank you for your interest in contributing to the Quasar open source project. - -This contributor agreement ("Agreement") describes the terms and conditions under which you may Submit a Contribution to Us. By Submitting a Contribution to Us, you accept the terms and conditions in the Agreement. If you do not accept the terms and conditions in the Agreement, you must not Submit any Contribution to Us. - -This is a legally binding document, so please read it carefully before accepting the terms and conditions. If you accept this Agreement, the then-current version of this Agreement shall apply each time you Submit a Contribution. The Agreement may cover more than one software project managed by Us. - -## 1. Definitions - -"We" or "Us" means SlamData, Inc, a corporation registered in the state of Delaware, whose file number is 5368278 under request number 130885478 for authentication number 0591375. - -"You" means the individual or entity who Submits a Contribution to Us. - -"Contribution" means any work of authorship that is Submitted by You to Us in which You own or assert ownership of the Copyright. You may not Submit a Contribution if you do not own the Copyright in the entire work of authorship. - -"Copyright" means all rights protecting works of authorship owned or controlled by You, including copyright, moral and neighboring rights, as appropriate, for the full term of their existence including any extensions by You. - -"Material" means the work of authorship which is made available by Us to third parties. When this Agreement covers more than one software project, the Material means the work of authorship to which the Contribution was Submitted. After You Submit the Contribution, it may be included in the Material. - -"Submit" means any form of electronic, verbal, or written communication sent to Us or our representatives, including but not limited to electronic mailing lists, electronic mail, source code control systems, pull requests, and issue tracking systems that are managed by, or on behalf of, Us for the purpose of discussing and improving the Material, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." - -"Submission Date" means the date on which You Submit a Contribution to Us. - -"Effective Date" means the earliest date You execute this Agreement by Submitting a Contribution to Us. - -## 2. Grant of Rights - -### 2.1 Copyright License - -2.1.1. You retain ownership of the Copyright in Your Contribution and have the same rights to use or license the Contribution which You would have had without entering into the Agreement. - -2.1.2. To the maximum extent permitted by the relevant law, You grant to Us a perpetual, worldwide, non-exclusive, transferable, royalty-free, irrevocable license under the Copyright covering the Contribution, with the right to sublicense such rights through multiple tiers of sublicensees, to reproduce, modify, display, perform and distribute the Contribution as part of the Material; provided that this license is conditioned upon compliance with Section 2.3. - -### 2.2 Patent License - -For patent claims including, without limitation, method, process, and apparatus claims which You own, control or have the right to grant, now or in the future, You grant to Us a perpetual, worldwide, non-exclusive, transferable, royalty-free, irrevocable patent license, with the right to sublicense these rights to multiple tiers of sublicensees, to make, have made, use, sell, offer for sale, import and otherwise transfer the Contribution and the Contribution in combination with the Material (and portions of such combination). This license is granted only to the extent that the exercise of the licensed rights infringes such patent claims; and provided that this license is conditioned upon compliance with Section 2.3. - -### 2.3 Outbound License - -Based on the grant of rights in Sections 2.1 and 2.2, if We include Your Contribution in a Material, We may license the Contribution under any license, including copyleft, permissive, commercial, or proprietary licenses. As a condition on the exercise of this right, We agree to also license the Contribution under the terms of the license or licenses which We are using for the Material on the Submission Date. - -### 2.4 Moral Rights. - -If moral rights apply to the Contribution, to the maximum extent permitted by law, You waive and agree not to assert such moral rights against Us or our successors in interest, or any of our licensees, either direct or indirect. - -### 2.5 Our Rights. - -You acknowledge that We are not obligated to use Your Contribution as part of the Material and may decide to include any Contribution We consider appropriate. - -### 2.6 Reservation of Rights. - -Any rights not expressly licensed under this section are expressly reserved by You. - -## 3. Agreement - -You confirm that: - -a. You have the legal authority to enter into this Agreement. - -b. You own the Copyright and patent claims covering the Contribution which are required to grant the rights under Section 2. - -c. The grant of rights under Section 2 does not violate any grant of rights which You have made to third parties, including Your employer. If You are an employee, You have had Your employer approve this Agreement or sign the Entity version of this document. If You are less than eighteen years old, please have Your parents or guardian sign the Agreement. - -d. You have followed the instructions in, if You do not own the Copyright in the entire work of authorship Submitted. - -## 4. Disclaimer - -EXCEPT FOR THE EXPRESS WARRANTIES IN SECTION 3, THE CONTRIBUTION IS PROVIDED "AS IS". MORE PARTICULARLY, ALL EXPRESS OR IMPLIED WARRANTIES INCLUDING, WITHOUT LIMITATION, ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED BY YOU TO US. TO THE EXTENT THAT ANY SUCH WARRANTIES CANNOT BE DISCLAIMED, SUCH WARRANTY IS LIMITED IN DURATION TO THE MINIMUM PERIOD PERMITTED BY LAW. - -## 5. Consequential Damage Waiver - -TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL YOU BE LIABLE FOR ANY LOSS OF PROFITS, LOSS OF ANTICIPATED SAVINGS, LOSS OF DATA, INDIRECT, SPECIAL, INCIDENTAL, CONSEQUENTIAL AND EXEMPLARY DAMAGES ARISING OUT OF THIS AGREEMENT REGARDLESS OF THE LEGAL OR EQUITABLE THEORY (CONTRACT, TORT OR OTHERWISE) UPON WHICH THE CLAIM IS BASED. - -## 6. Miscellaneous - -6.1. This Agreement will be governed by and construed in accordance with the laws of the state of Colorado, in the United States of America, excluding its conflicts of law provisions. Under certain circumstances, the governing law in this section might be superseded by the United Nations Convention on Contracts for the International Sale of Goods ("UN Convention") and the parties intend to avoid the application of the UN Convention to this Agreement and, thus, exclude the application of the UN Convention in its entirety to this Agreement. - -6.2. This Agreement sets out the entire agreement between You and Us for Your Contributions to Us and overrides all other agreements or understandings. - -6.3. If You or We assign the rights or obligations received through this Agreement to a third party, as a condition of the assignment, that third party must agree in writing to abide by all the rights and obligations in the Agreement. - -6.4. The failure of either party to require performance by the other party of any provision of this Agreement in one situation shall not affect the right of a party to require such performance at any time in the future. A waiver of performance under a provision in one situation shall not be considered a waiver of the performance of the provision in the future or a waiver of the provision in its entirety. - -6.5. If any provision of this Agreement is found void and unenforceable, such provision will be replaced to the extent possible with a provision that comes closest to the meaning of the original provision and which is enforceable. The terms and conditions set forth in this Agreement shall apply notwithstanding any failure of essential purpose of this Agreement or any limited remedy to the maximum extent possible under law. \ No newline at end of file diff --git a/README.md b/README.md index 1ca73712..ee393313 100644 --- a/README.md +++ b/README.md @@ -1,79 +1,66 @@ # Quasar S3 Datasource -[![Build Status](https://travis-ci.org/slamdata/quasar-s3.svg?branch=master)](https://travis-ci.org/slamdata/quasar-s3) +## Usage -A datasource for the Quasar open source analytics engine, that -provides access to Amazon S3. - -## How to use this - -1. Clone this repository (`git@github.com:slamdata/quasar-s3.git`) -2. At the root of the repo, run `./sbt assembleDatasource`. This will generate a tarball with a loadable `slamdata-backend` plugin -in `.targets/datasource/scala-2.12/quasar-s3--explode.tar.gz` -3. Extract the tarball to SlamData Backend's plugin directory. By default that is `$HOME/.config/slamdata/plugin/` -4. Run SlamData backend and the datasource should be available +```sbt +libraryDependencies += "com.precog" %% "quasar-datasource-s3" % +``` ## Configuration -You can create a new S3 datasource after you've loaded this plugin into -Quasar. Refer to the previous section for instructions on how to do -that. In order to create a datasource, you will need to send a POST -request to `/datasource` including a JSON -document specifiying the datasource's configuration. The format of the -JSON document can be found in [`slamdata-backend`'s -documentation.](https://github.com/slamdata/slamdata-backend#applicationvndslamdatadatasource). +The configuration of the S3 datasource has the following JSON format -The connector-specific configuration needs to specify at least a -bucket URI and the JSON parsing to use when decoding JSON files stored -in S3. An example of a JSON configuration to create a datasource that -parses line-delimited JSON: - -```json -{ - "bucket": "https://yourbucket.s3.amazonaws.com", - "jsonParsing": "lineDelimited" -} ``` - -As another example, this is a JSON configuration to parse array -JSON: - -```json { - "bucket": "https://yourbucket.s3.amazonaws.com", - "jsonParsing": "array" + "bucket": String, + "format": { + "type": "json" | "separated-values" + // for "json" + "precise": Boolean, + "variant" "array-wrapped" | "line-delimited" + // for "separated-values", all strings must be one symbol length + "header": Boolean, + // The first char of row break + "row1": String, + // The second char of row break, empty string if row break has only one symbol + "row2": String, + // Column separator (char) + "record": String, + "openQuote": String, + "closeQuote": String, + "escape": String + }, + ["compressionScheme": "gzip" | "zip"] + ["credentials": Object] } ``` -Along with the request, you also need to specify a `Content-Type` header: +* `bucket` the URL of the S3 bucket to use, e.g. `https://yourbucket.s3.amazonaws.com` +* `format` the format of the resource referred to by `url`. CSV/TSV, array wrapped json and line delimited jsons are supported +* `compressionScheme` (optional, default = empty) compression scheme that the resources in the container are assumed + to be compressed with. Currrently `"gzip"` and `"zip"` are supported. + If omitted, the resources are not assumed to be compressed. +* `credentials` (optional, default = empty) S3 credentials to use for access in case the bucket is not public. + Object has the following format: `{ "accessKey": String, "secretKey": String, "region": String }`. + The `credentials` section can be omitted completely for public buckets, but for private buckets the section needs + to be there with all 3 fields specified. -``` -Content-Type: application/vnd.slamdata.datasource" -``` - -### Secure buckets - -If your bucket is not public you need to include a `credentials` -subdocument with the credentials you use to access the bucket. For -example: +Example: ``` { - "bucket":"https://some.bucket.uri", - "jsonParsing":"array", + "bucket": "https://yourbucket.s3.amazonaws.com", + "format": {"type": "json", "variant": "line-delimited", "precise": false}, + "jsonParsing": "array", + "compressionScheme": "gzip", "credentials": { - "accessKey":"some access key", - "secretKey":"super secret key", - "region":"us-east-1" + "accessKey": "some access key", + "secretKey": "super secret key", + "region": "us-east-1" } } ``` -`accessKey`, `secretKey`, and `region` are all mandatory. You may omit -`credentials` entirely if your bucket is public. As with public -buckets, you need to include a `Content-Type` header. Refer to the previous -section for an example. - ### Running the test suite for secure buckets You need to decode the base64-encoded credentials. @@ -90,25 +77,5 @@ For BSD (or macOS) `base64`: base64 -D -i testCredentials.json.b64 -o testCredentials.json ``` -After this, you should be able to run the `SecureS3DataSourceSpec` spec - - -## Thanks to Sponsors - -YourKit supports open source projects with its full-featured Java Profiler. YourKit, LLC is the creator of YourKit Java Profiler and YourKit .NET Profiler, innovative and intelligent tools for profiling Java and .NET applications. - -## Legal - -Copyright © 2014 - 2018 SlamData Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 +After this, you should be able to run the `SecureS3DatasourceSpec` spec -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/build.sbt b/build.sbt index 89dceee3..a5296052 100644 --- a/build.sbt +++ b/build.sbt @@ -1,169 +1,66 @@ -import github.GithubPlugin._ - -import scala.Predef._ -import quasar.s3.project._ - -import java.lang.{Integer, String, Throwable} -import scala.{Boolean, List, Predef, None, Some, StringContext, sys, Unit}, Predef.{any2ArrowAssoc, assert, augmentString} import scala.collection.Seq -import scala.collection.immutable.Map - -import sbt._, Keys._ -import sbt.std.Transform.DummyTaskMap -import sbt.TestFrameworks.Specs2 -import sbtrelease._, ReleaseStateTransformations._, Utilities._ - -val BothScopes = "test->test;compile->compile" - -// Exclusive execution settings -lazy val ExclusiveTests = config("exclusive") extend Test - -def exclusiveTasks(tasks: Scoped*) = - tasks.flatMap(inTask(_)(tags := Seq((ExclusiveTest, 1)))) - -lazy val buildSettings = Seq( - - // NB: Some warts are disabled in specific projects. Here’s why: - // • AsInstanceOf – wartremover/wartremover#266 - // • others – simply need to be reviewed & fixed - wartremoverWarnings in (Compile, compile) --= Seq( - Wart.Any, // - see wartremover/wartremover#263 - Wart.PublicInference, // - creates many compile errors when enabled - needs to be enabled incrementally - Wart.ImplicitParameter, // - creates many compile errors when enabled - needs to be enabled incrementally - Wart.ImplicitConversion, // - see mpilquist/simulacrum#35 - Wart.Nothing), // - see wartremover/wartremover#263 - // Normal tests exclude those tagged in Specs2 with 'exclusive'. - testOptions in Test := Seq(Tests.Argument(Specs2, "exclude", "exclusive", "showtimes")), - // Exclusive tests include only those tagged with 'exclusive'. - testOptions in ExclusiveTests := Seq(Tests.Argument(Specs2, "include", "exclusive", "showtimes")), - - console := { (console in Test).value }, // console alias test:console - assemblyMergeStrategy in assembly := { - case PathList("META-INF", xs @ _*) => MergeStrategy.discard - case x => MergeStrategy.first - }) - -val targetSettings = Seq( - target := { - import java.io.File - - val root = (baseDirectory in ThisBuild).value.getAbsolutePath - val ours = baseDirectory.value.getAbsolutePath - new File(root + File.separator + ".targets" + File.separator + ours.substring(root.length)) - } -) +ThisBuild / scalaVersion := "2.12.10" -// In Travis, the processor count is reported as 32, but only ~2 cores are -// actually available to run. -concurrentRestrictions in Global := { - val maxTasks = 2 - if (isTravisBuild.value) - // Recreate the default rules with the task limit hard-coded: - Seq(Tags.limitAll(maxTasks), Tags.limit(Tags.ForkedTestGroup, 1)) - else - (concurrentRestrictions in Global).value -} +ThisBuild / githubRepository := "quasar-datasource-s3" -// Tasks tagged with `ExclusiveTest` should be run exclusively. -concurrentRestrictions in Global += Tags.exclusive(ExclusiveTest) +publishAsOSSProject in ThisBuild := true -lazy val publishSettings = Seq( - performMavenCentralSync := false, - organizationName := "SlamData Inc.", - organizationHomepage := Some(url("http://quasar-analytics.org")), - homepage := Some(url("https://github.com/slamdata/quasar-s3")), - scmInfo := Some( - ScmInfo( - url("https://github.com/slamdata/quasar-s3"), - "scm:git@github.com:slamdata/quasar-s3.git" - ) - )) +homepage in ThisBuild := Some(url("https://github.com/precog/quasar-datasource-s3")) -lazy val assemblySettings = Seq( - test in assembly := {}, +scmInfo in ThisBuild := Some(ScmInfo( + url("https://github.com/precog/quasar-datasource-s3"), + "scm:git@github.com:precog/quasar-datasource-s3.git")) - assemblyExcludedJars in assembly := { - val cp = (fullClasspath in assembly).value - cp filter { attributedFile => - val file = attributedFile.data +ThisBuild / githubWorkflowBuildPreamble += + WorkflowStep.Run( + List("base64 -d testCredentials.json.b64 > testCredentials.json"), + name = Some("Decode testCredentials")) - val excludeByName: Boolean = file.getName.matches("""scala-library-2\.12\.\d+\.jar""") - val excludeByPath: Boolean = file.getPath.contains("org/typelevel") - - excludeByName && excludeByPath - } - } -) - -// Build and publish a project, excluding its tests. -lazy val commonSettings = buildSettings ++ publishSettings ++ assemblySettings - -// not doing this causes NoSuchMethodErrors when using coursier -lazy val excludeTypelevelScalaLibrary = - Seq(excludeDependencies += "org.typelevel" % "scala-library") - -// Include to also publish a project's tests -lazy val publishTestsSettings = Seq( - publishArtifact in (Test, packageBin) := true -) - -lazy val githubReleaseSettings = - githubSettings ++ Seq( - GithubKeys.assets := Seq(assembly.value), - GithubKeys.repoSlug := "slamdata/quasar-s3", - GithubKeys.releaseName := "quasar " + GithubKeys.tag.value, - releaseVersionFile := file("version.sbt"), - releaseUseGlobalVersion := true, - releaseProcess := Seq[ReleaseStep]( - checkSnapshotDependencies, - inquireVersions, - runTest, - setReleaseVersion, - commitReleaseVersion, - pushChanges) - ) - -lazy val isCIBuild = settingKey[Boolean]("True when building in any automated environment (e.g. Travis)") -lazy val isIsolatedEnv = settingKey[Boolean]("True if running in an isolated environment") -lazy val exclusiveTestTag = settingKey[String]("Tag for exclusive execution tests") - -lazy val sideEffectTestFSConfig = taskKey[Unit]("Rewrite the JVM environment to contain the filesystem classpath information for integration tests") - -def createBackendEntry(childPath: Seq[File], parentPath: Seq[File]): Seq[File] = - (childPath.toSet -- parentPath.toSet).toSeq - -lazy val root = project.in(file(".")) - .settings(commonSettings) +lazy val root = project + .in(file(".")) .settings(noPublishSettings) - .settings(aggregate in assembly := false) - .settings(excludeTypelevelScalaLibrary) - .aggregate(datasource) - .enablePlugins(AutomateHeaderPlugin) + .aggregate(core) -// common components +val http4sVersion = "0.21.24" +val scalaXmlVersion = "1.1.0" -// Quasar needs to know where the DataSourceModule for the connector is -lazy val manifestSettings = - packageOptions in (Compile, packageBin) += - Package.ManifestAttributes("DataSource-Module" -> "quasar.physical.s3.S3DataSourceModule$") +val catsEffectVersion = "2.5.1" +val shimsVersion = "2.0.0" +val specsVersion = "4.10.6" -/** Lightweight connector module. - */ -lazy val datasource = project - .settings(name := "quasar-s3") - .settings(commonSettings) - .settings(targetSettings) - .settings(resolvers += Resolver.bintrayRepo("slamdata-inc", "maven-public")) +lazy val core = project + .in(file("datasource")) + .settings(addCompilerPlugin("com.olegpy" %% "better-monadic-for" % "0.3.1")) .settings( - libraryDependencies ++= Dependencies.datasource, - wartremoverWarnings in (Compile, compile) --= Seq( - Wart.AsInstanceOf, - Wart.Equals, - Wart.Overloading)) - .settings(githubReleaseSettings) - .settings(excludeTypelevelScalaLibrary) - .settings(AssembleDatasource.setAssemblyKey) - .settings(manifestSettings) - .enablePlugins(AutomateHeaderPlugin) - + name := "quasar-datasource-s3", + + quasarPluginName := "s3", + + quasarPluginQuasarVersion := managedVersions.value("precog-quasar"), + + quasarPluginDatasourceFqcn := Some("quasar.physical.s3.S3DatasourceModule$"), + + /** Specify managed dependencies here instead of with `libraryDependencies`. + * Do not include quasar libs, they will be included based on the value of + * `datasourceQuasarVersion`. + */ + quasarPluginDependencies ++= Seq( + "org.slf4s" %% "slf4s-api" % "1.7.25", + "org.http4s" %% "http4s-scala-xml" % http4sVersion, + "org.http4s" %% "http4s-async-http-client" % http4sVersion, + "org.scala-lang.modules" %% "scala-xml" % scalaXmlVersion, + "com.codecommit" %% "shims" % shimsVersion, + "org.typelevel" %% "cats-effect" % catsEffectVersion + ), + + libraryDependencies ++= Seq( + "com.precog" %% "quasar-foundation" % managedVersions.value("precog-quasar") % Test classifier "tests", + "org.http4s" %% "http4s-dsl" % http4sVersion % Test, + "org.specs2" %% "specs2-core" % specsVersion % Test, + "org.specs2" %% "specs2-scalaz" % specsVersion % Test, + "org.specs2" %% "specs2-scalacheck" % specsVersion % Test + )) + .enablePlugins(QuasarPlugin) + .evictToLocal("QUASAR_PATH", "connector", true) + .evictToLocal("QUASAR_PATH", "api", true) diff --git a/datasource/src/main/scala/quasar/physical/s3/AsyncHttpClientBuilder.scala b/datasource/src/main/scala/quasar/physical/s3/AsyncHttpClientBuilder.scala new file mode 100644 index 00000000..79bbda98 --- /dev/null +++ b/datasource/src/main/scala/quasar/physical/s3/AsyncHttpClientBuilder.scala @@ -0,0 +1,89 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef._ + +import quasar.concurrent.NamedDaemonThreadFactory +import quasar.contrib.proxy.Search + +import org.asynchttpclient.proxy.{ProxyServer, ProxyServerSelector} +import org.asynchttpclient.uri.Uri +import org.asynchttpclient.{AsyncHttpClientConfig, DefaultAsyncHttpClientConfig} + +import org.http4s.client.Client +import org.http4s.client.asynchttpclient.AsyncHttpClient + +import org.slf4s.Logging + +import java.net.{InetSocketAddress, ProxySelector} +import java.net.Proxy +import java.net.Proxy.{Type => ProxyType} + +import scala.collection.JavaConverters._ + +import cats.effect.{ConcurrentEffect, Resource} + +object AsyncHttpClientBuilder extends Logging { + def apply[F[_]: ConcurrentEffect]: Resource[F, Client[F]] = + Resource.eval(Search[F]).flatMap(selector => + AsyncHttpClient.resource(mkConfig(selector))) + + def mkConfig[F[_]](proxySelector: ProxySelector): AsyncHttpClientConfig = + new DefaultAsyncHttpClientConfig.Builder() + .setMaxConnectionsPerHost(200) + .setMaxConnections(400) + .setRequestTimeout(Int.MaxValue) + .setReadTimeout(Int.MaxValue) + .setConnectTimeout(Int.MaxValue) + .setProxyServerSelector(ProxyVoleProxyServerSelector(proxySelector)) + .setThreadFactory(new NamedDaemonThreadFactory("http4s-async-http-client-worker")) + .build() + + private[s3] def sortProxies(proxies: List[Proxy]): List[Proxy] = + proxies.sortWith((l, r) => (l.`type`, r.`type`) match { + case (ProxyType.HTTP, ProxyType.DIRECT) => true + case (ProxyType.SOCKS, ProxyType.DIRECT) => true + case _ => false + }) + + private case class ProxyVoleProxyServerSelector(selector: ProxySelector) + extends ProxyServerSelector { + def select(uri: Uri): ProxyServer = { + ProxySelector.setDefault(selector) // NB: I don't think this is necessary + + Option(selector) + .flatMap(s => Option(s.select(uri.toJavaNetURI))) + .flatMap(proxies0 => { + val proxies = proxies0.asScala.toList + log.debug(s"Found proxies: $proxies") + + val sortedProxies = sortProxies(proxies) + log.debug(s"Prioritized proxies as: $sortedProxies") + + sortedProxies.headOption + }) + .flatMap(server => Option(server.address)) + .map(_.asInstanceOf[InetSocketAddress]) // because Java + .map(uriToProxyServer) + .orNull // because Java x2 + } + + private def uriToProxyServer(u: InetSocketAddress): ProxyServer = + (new ProxyServer.Builder(u.getHostName, u.getPort)).build + } +} diff --git a/datasource/src/main/scala/quasar/physical/s3/RequestSigning.scala b/datasource/src/main/scala/quasar/physical/s3/RequestSigning.scala index cecc1969..d73641ee 100644 --- a/datasource/src/main/scala/quasar/physical/s3/RequestSigning.scala +++ b/datasource/src/main/scala/quasar/physical/s3/RequestSigning.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,18 +17,21 @@ package quasar.physical.s3 import slamdata.Predef._ -import java.net.URLEncoder +import quasar.physical.s3.impl.s3EncodeQueryParams + import java.nio.charset.StandardCharsets import java.security.MessageDigest -import java.time.LocalDateTime +import java.time.{LocalDateTime, OffsetDateTime, ZoneOffset} import java.time.format.DateTimeFormatter import javax.crypto.Mac import javax.crypto.spec.SecretKeySpec -import cats.effect.Sync + +import cats.effect.{Bracket, Effect, Resource, Sync} import cats.implicits._ import fs2.Stream +import org.http4s.client.Client import org.http4s.headers.{Authorization, Date} -import org.http4s.{Header, Headers, Method, Request, Uri} +import org.http4s.{Header, Headers, Method, Request, Response, Uri} /** * Extracted from aws4s: https://github.com/aws4s/aws4s @@ -41,7 +44,7 @@ import org.http4s.{Header, Headers, Method, Request, Uri} object RequestSigning { private val hashAlg = "SHA-256" - private def sha256[F[_]: Sync](payload: Stream[F, Byte]): F[Array[Byte]] = + private def sha256Stream[F[_]: Sync](payload: Stream[F, Byte]): F[Array[Byte]] = payload.chunks.compile.fold(MessageDigest.getInstance(hashAlg))((md, chunk) => { md.update(chunk.toArray); md }).map(_.digest) private def sha256(payload: Array[Byte]): Array[Byte] = { @@ -56,10 +59,7 @@ object RequestSigning { } private def renderCanonicalQueryString(queryParams: Map[String, String]): String = - queryParams.toSeq - .sortBy(_._1) - .map({ case (k, v) => k + "=" + URLEncoder.encode(v, StandardCharsets.UTF_8.toString) }) - .mkString("&") + s3EncodeQueryParams(queryParams) private def hmacSha256(data: String, key: Array[Byte]): Array[Byte] = { val macAlg = "HmacSHA256" @@ -109,9 +109,9 @@ final case class RequestSigning( import RequestSigning._ def signedHeaders[F[_]: Sync](req: Request[F]): F[Headers] = - signedHeaders(req.uri.path, req.method, req.params, req.headers, req.body) + signHeaders(req.uri.path, req.method, req.params, req.headers, req.body) - def signedHeaders[F[_]: Sync](path: Uri.Path, method: Method, queryParams: Map[String, String], headers: Headers, payload: Stream[F, Byte]): F[Headers] = { + def signHeaders[F[_]: Sync](path: Uri.Path, method: Method, queryParams: Map[String, String], headers: Headers, payload: Stream[F, Byte]): F[Headers] = { val now: LocalDateTime = clock val credentialsNow = credentials @@ -120,7 +120,7 @@ final case class RequestSigning( Headers(credentialsNow.sessionToken.toList map xAmzSecurityTokenHeader) val extraDateHeaders: Headers = - if (!headers.iterator.exists(_.name == Date.name)) Headers(xAmzDateHeader(now)) else Headers() + if (!headers.iterator.exists(_.name === Date.name)) Headers.of(xAmzDateHeader(now)) else Headers.of() val signedHeaders = headers ++ extraDateHeaders ++ extraSecurityHeaders @@ -129,7 +129,7 @@ final case class RequestSigning( val sha256Payload: F[String] = payloadSigning match { case PayloadSigning.Unsigned => "UNSIGNED-PAYLOAD".pure[F] - case PayloadSigning.Signed => sha256(payload) map base16 + case PayloadSigning.Signed => sha256Stream(payload) map base16 } sha256Payload map { payloadHash => @@ -202,3 +202,40 @@ object PayloadSigning { /** Payload is not signed. Use only if consuming the payload twice would be problematic. */ case object Unsigned extends PayloadSigning } + +object AwsV4Signing { + def apply[F[_]: Bracket[?[_], Throwable]: Effect](conf: S3Config)(client: Client[F]): Client[F] = { + def signRequest: Request[F] => F[Request[F]] = + conf.credentials match { + case Some(creds) => { + val requestSigning = for { + time <- Effect[F].delay(OffsetDateTime.now()) + datetime <- Effect[F].catchNonFatal( + LocalDateTime.ofEpochSecond(time.toEpochSecond, 0, ZoneOffset.UTC)) + signing = RequestSigning( + Credentials(creds.accessKey, creds.secretKey, None), + creds.region, + ServiceName.S3, + PayloadSigning.Signed, + datetime) + } yield signing + + req => { + // Requests that require signing also require `host` to always be present + val req0 = req.uri.host match { + case Some(host) => req.withHeaders(Headers.of(Header("host", host.value))) + case None => req + } + + requestSigning >>= (_.signedHeaders[F](req0).map(req0.withHeaders(_))) + } + } + case None => req => req.pure[F] + } + + def signAndSubmit: Request[F] => Resource[F, Response[F]] = + (req => Resource.suspend(signRequest(req).map(client.run(_)))) + + Client(signAndSubmit) + } +} diff --git a/datasource/src/main/scala/quasar/physical/s3/S3Config.scala b/datasource/src/main/scala/quasar/physical/s3/S3Config.scala index 2a6714b9..bf6fcc70 100644 --- a/datasource/src/main/scala/quasar/physical/s3/S3Config.scala +++ b/datasource/src/main/scala/quasar/physical/s3/S3Config.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,16 +17,24 @@ package quasar.physical.s3 import slamdata.Predef._ -import argonaut.{DecodeJson, DecodeResult, EncodeJson, Json} +import quasar.connector.{CompressionScheme, DataFormat} + +import cats.implicits._ +import argonaut._ , Argonaut._ import org.http4s.Uri -import cats.syntax.apply._ -import cats.syntax.flatMap._ -import cats.instances.option._ -import scalaz.syntax.show._ -import slamdata.Predef._ -import shims._ -final case class S3Config(bucket: Uri, parsing: S3JsonParsing, credentials: Option[S3Credentials]) +final case class S3Config( + bucket: Uri, + format: DataFormat, + credentials: Option[S3Credentials]) { self => + + def reconfigure(patch: S3Config): Either[S3Config, S3Config] = patch.credentials match { + case Some(_) => patch.sanitize.asLeft[S3Config] + case None => self.copy(bucket = patch.bucket, format = patch.format).asRight[S3Config] + } + + def sanitize: S3Config = self.copy(credentials = credentials as S3Config.RedactedCreds) +} final case class AccessKey(value: String) final case class SecretKey(value: String) @@ -35,109 +43,56 @@ final case class Region(name: String) final case class S3Credentials(accessKey: AccessKey, secretKey: SecretKey, region: Region) object S3Config { - /* Example configuration for public buckets with line-delimited JSON: - * { - * "bucket": "", - * "jsonParsing": "lineDelimited" - * } - * - * Example configuration for public buckets with array JSON: - * { - * "bucket": "", - * "jsonParsing": "array" - * } - * - * Example configuration for a secure bucket with array JSON: - * { - * "bucket":"https://some.bucket.uri", - * "jsonParsing":"array", - * "credentials": { - * "accessKey":"some access key", - * "secretKey":"super secret key", - * "region":"us-east-1" - * } - * } - * - * Example configuration for a secure bucket with line-delimited JSON: - * { - * "bucket":"https://some.bucket.uri", - * "jsonParsing":"lineDelimited", - * "credentials": { - * "accessKey":"some access key", - * "secretKey":"super secret key", - * "region":"us-east-1" - * } - * } - * - */ - private val parseStrings = - Map[String, S3JsonParsing]( - "array" -> S3JsonParsing.JsonArray, - "lineDelimited" -> S3JsonParsing.LineDelimited) + private val Redacted = "" + private val RedactedCreds = S3Credentials(AccessKey(Redacted), SecretKey(Redacted), Region(Redacted)) private val failureMsg = "Failed to parse configuration for S3 connector." - implicit val decodeJson: DecodeJson[S3Config] = - DecodeJson { c => - val b = c.get[String]("bucket").toOption >>= (Uri.fromString(_).toOption) - val jp = c.get[String]("jsonParsing").toOption >>= (parseStrings.get(_)) - - (c.downField("credentials").success, b, jp) match { - case (Some(_), Some(bk), Some(p)) => { - val creds = DecodeJson.of[S3Credentials].decode(c) - - creds.toOption match { - case Some(creds0) => DecodeResult.ok(S3Config(bk, p, Some(creds0))) - case None => DecodeResult.fail(creds.message.getOrElse(failureMsg), c.history) - } - } - - case (None, Some(bk), Some(p)) => - DecodeResult.ok(S3Config(bk, p, None)) - - case _ => - DecodeResult.fail(failureMsg, c.history) - } - } - - implicit val encodeJson: EncodeJson[S3Config] = - EncodeJson(config => config.credentials.fold( - Json.obj( - "bucket" -> Json.jString(config.bucket.renderString), - "jsonParsing" -> Json.jString(config.parsing.shows))) - (creds => Json.obj( - "bucket" -> Json.jString(config.bucket.renderString), - "jsonParsing" -> Json.jString(config.parsing.shows), - "credentials" -> Json.obj( - "accessKey" -> Json.jString(creds.accessKey.value), - "secretKey" -> Json.jString(creds.secretKey.value), - "region" -> Json.jString(creds.region.name))))) + implicit val uriCodec: CodecJson[Uri] = CodecJson( + u => Json.jString(u.renderString), + optionDecoder(_.as[String].toOption.flatMap(Uri.fromString(_).toOption), "Uri").decode(_)) + + val legacyDecodeFlatFormat: DecodeJson[DataFormat] = DecodeJson { c => c.as[String].flatMap { + case "array" => DecodeResult.ok(DataFormat.json) + case "lineDelimited" => DecodeResult.ok(DataFormat.ldjson) + case other => DecodeResult.fail(s"Unrecognized parsing format: $other", c.history) + }} + + val legacyDecodeDataFormat: DecodeJson[DataFormat] = DecodeJson( c => for { + parsing <- (c --\ "jsonParsing").as(legacyDecodeFlatFormat) + compressionScheme <- (c --\ "compressionScheme").as[Option[CompressionScheme]] + } yield compressionScheme match { + case None => parsing + case Some(_) => DataFormat.gzipped(parsing) + }) + + implicit val configCodec: CodecJson[S3Config] = CodecJson({ (config: S3Config) => + ("bucket" := config.bucket) ->: + ("credentials" := config.credentials) ->: + config.format.asJson + }, (c => for { + format <- c.as[DataFormat] ||| c.as(legacyDecodeDataFormat) + + bucket <- (c --\ "bucket").as[Uri] + credentials <- (c --\ "credentials").as[Option[S3Credentials]] + } yield S3Config(bucket, format, credentials))).setName(failureMsg) } object S3Credentials { - private val incompleteCredsMsg = - "The 'credentials' key must include 'accessKey', 'secretKey', and 'region'" - - implicit val decodeJson: DecodeJson[S3Credentials] = - DecodeJson { c => - val creds = c.downField("credentials") - val akey = creds.get[String]("accessKey").map(AccessKey(_)).toOption - val skey = creds.get[String]("secretKey").map(SecretKey(_)).toOption - val rkey = creds.get[String]("region").map(Region(_)).toOption - - (akey, skey, rkey).mapN(S3Credentials(_, _, _)) match { - case Some(creds0) => DecodeResult.ok(creds0) - case None => DecodeResult.fail(incompleteCredsMsg, c.history) - } - } - - implicit val encodeJson: EncodeJson[S3Credentials] = - EncodeJson { creds => - Json.obj( - "accessKey" -> Json.jString(creds.accessKey.value), - "secretKey" -> Json.jString(creds.secretKey.value), - "region" -> Json.jString(creds.region.name)) - } + implicit val accessKeyCodec: CodecJson[AccessKey] = + CodecJson(_.value.asJson, jdecode1(AccessKey(_)).decode) + + implicit val secretKeyCodec: CodecJson[SecretKey] = + CodecJson(_.value.asJson, jdecode1(SecretKey(_)).decode) + + implicit val regionCodec: CodecJson[Region] = + CodecJson(_.name.asJson, jdecode1(Region(_)).decode) + + implicit val credentialsCodec: CodecJson[S3Credentials] = + casecodec3( + S3Credentials.apply, S3Credentials.unapply)( + "accessKey", "secretKey", "region" + ).setName("Credentials must include 'accessKey', 'secretKey', and 'region'") } diff --git a/datasource/src/main/scala/quasar/physical/s3/S3DataSource.scala b/datasource/src/main/scala/quasar/physical/s3/S3DataSource.scala deleted file mode 100644 index e9d6a28a..00000000 --- a/datasource/src/main/scala/quasar/physical/s3/S3DataSource.scala +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright 2014–2018 SlamData Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package quasar.physical.s3 - -import quasar.api.QueryEvaluator -import quasar.api.datasource.DatasourceType -import quasar.api.resource.ResourcePath.{Leaf, Root} -import quasar.api.resource.{ResourceName, ResourcePath, ResourcePathType} -import quasar.connector.{MonadResourceErr, ResourceError} -import quasar.connector.datasource.LightweightDatasource -import quasar.contrib.pathy.APath -import quasar.contrib.scalaz.MonadError_ - -import slamdata.Predef.{Stream => _, _} - -import java.time.{OffsetDateTime, ZoneOffset, LocalDateTime} - -import cats.effect.Effect -import cats.syntax.applicative._ -import cats.syntax.flatMap._ -import cats.syntax.functor._ -import cats.syntax.option._ -import fs2.Stream -import jawn.Facade -import org.http4s.{Request, Header, Headers} -import org.http4s.client.Client -import pathy.Path -import pathy.Path.{DirName, FileName} -import qdata.QDataEncode -import qdata.json.QDataFacade -import scalaz.{\/-, -\/, OptionT} -import shims._ - -final class S3DataSource[F[_]: Effect: MonadResourceErr]( - client: Client[F], - config: S3Config) - extends LightweightDatasource[F, Stream[F, ?]] { - def kind: DatasourceType = s3.datasourceKind - - def evaluator[R: QDataEncode]: QueryEvaluator[F, ResourcePath, Stream[F, R]] = - new QueryEvaluator[F, ResourcePath, Stream[F, R]] { - implicit val facade: Facade[R] = QDataFacade.qdata[R] - - val MR = MonadError_[F, ResourceError] - - def evaluate(path: ResourcePath): F[Stream[F, R]] = - path match { - case Root => - Stream.empty.covaryAll[F, R].pure[F] - case Leaf(file) => - impl.evaluate[F, R](config.parsing, client, config.bucket, file, signRequest(config)) map { - case None => - Stream.eval(MR.raiseError(ResourceError.pathNotFound(path))) - case Some(s) => s - } - } - } - - def prefixedChildPaths(path: ResourcePath): F[Option[Stream[F, (ResourceName, ResourcePathType)]]] = - impl.children( - client, - config.bucket, - dropEmpty(path.toPath), - signRequest(config)) map { - case None => - none[Stream[F, (ResourceName, ResourcePathType)]] - case Some(paths) => - paths.map { - case -\/(Path.DirName(dn)) => (ResourceName(dn), ResourcePathType.prefix) - case \/-(Path.FileName(fn)) => (ResourceName(fn), ResourcePathType.leafResource) - }.some - } - - def pathIsResource(path: ResourcePath): F[Boolean] = path match { - case Root => false.pure[F] - case Leaf(file) => Path.refineType(dropEmpty(file)) match { - case -\/(_) => false.pure[F] - case \/-(f) => impl.isResource(client, config.bucket, f, signRequest(config)) - } - } - - def isLive: F[Boolean] = - OptionT(prefixedChildPaths(ResourcePath.Root)).isDefined - - // - - private def dropEmpty(path: APath): APath = - Path.peel(path) match { - case Some((d, \/-(FileName(fn)))) if fn.isEmpty => d - case Some((d, -\/(DirName(dn)))) if dn.isEmpty => d - case _ => path - } - - private def signRequest(c: S3Config): Request[F] => F[Request[F]] = - S3DataSource.signRequest(c) -} - -object S3DataSource { - def signRequest[F[_]: Effect](c: S3Config): Request[F] => F[Request[F]] = - c.credentials match { - case Some(creds) => { - val requestSigning = for { - time <- Effect[F].delay(OffsetDateTime.now()) - datetime <- Effect[F].catchNonFatal( - LocalDateTime.ofEpochSecond(time.toEpochSecond, 0, ZoneOffset.UTC)) - signing = RequestSigning( - Credentials(creds.accessKey, creds.secretKey, None), - creds.region, - ServiceName.S3, - PayloadSigning.Signed, - datetime) - } yield signing - - req => { - // Requests that require signing also require `host` to always be present - val req0 = req.uri.host match { - case Some(host) => req.withHeaders(Headers(Header("host", host.value))) - case None => req - } - - requestSigning >>= (_.signedHeaders[F](req0).map(req0.withHeaders(_))) - } - } - case None => req => req.pure[F] - } -} diff --git a/datasource/src/main/scala/quasar/physical/s3/S3DataSourceModule.scala b/datasource/src/main/scala/quasar/physical/s3/S3DataSourceModule.scala deleted file mode 100644 index 5feb96d7..00000000 --- a/datasource/src/main/scala/quasar/physical/s3/S3DataSourceModule.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright 2014–2018 SlamData Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package quasar.physical.s3 - - -import quasar.Disposable -import quasar.api.datasource.DatasourceError -import quasar.api.datasource.DatasourceError.InitializationError -import quasar.api.datasource.DatasourceType -import quasar.api.resource.ResourcePath -import quasar.connector.Datasource -import quasar.connector.LightweightDatasourceModule -import quasar.connector.MonadResourceErr - -import argonaut.{EncodeJson, Json} -import cats.effect.{ConcurrentEffect, Timer} -import fs2.Stream -import org.http4s.client.blaze.Http1Client -import scalaz.{\/, NonEmptyList} -import scalaz.syntax.either._ -import cats.syntax.applicative._ -import cats.syntax.flatMap._ -import cats.syntax.option._ -import shims._ -import slamdata.Predef.{Stream => _, _} - -object S3DataSourceModule extends LightweightDatasourceModule { - def kind: DatasourceType = s3.datasourceKind - - def lightweightDatasource[F[_]: ConcurrentEffect: MonadResourceErr: Timer](config: Json) - : F[InitializationError[Json] \/ Disposable[F, Datasource[F, Stream[F, ?], ResourcePath]]] = { - config.as[S3Config].result match { - case Right(s3Config) => { - Http1Client[F]() flatMap { client => - val s3Ds = new S3DataSource[F](client, s3Config) - val ds: Datasource[F, Stream[F, ?], ResourcePath] = s3Ds - - s3Ds.isLive.ifM({ - Disposable(ds, client.shutdown).right.pure[F] - }, - { - val msg = "Unable to ListObjects at the root of the bucket" - - DatasourceError - .accessDenied[Json, InitializationError[Json]](kind, config, msg) - .left.pure[F] - }) - } - } - - case Left((msg, _)) => - DatasourceError - .invalidConfiguration[Json, InitializationError[Json]](kind, config, NonEmptyList(msg)) - .left.pure[F] - } - } - - def sanitizeConfig(config: Json): Json = { - val redactedCreds = - S3Credentials( - AccessKey(""), - SecretKey(""), - Region("")) - - config.as[S3Config].result.toOption.map((c: S3Config) => - c.credentials.fold(c)(_ => c.copy(credentials = redactedCreds.some))) - .fold(config)(rc => EncodeJson.of[S3Config].encode(rc)) - } -} diff --git a/datasource/src/main/scala/quasar/physical/s3/S3Datasource.scala b/datasource/src/main/scala/quasar/physical/s3/S3Datasource.scala new file mode 100644 index 00000000..d755ce94 --- /dev/null +++ b/datasource/src/main/scala/quasar/physical/s3/S3Datasource.scala @@ -0,0 +1,122 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef._ + +import quasar.api.datasource.DatasourceType +import quasar.api.resource.ResourcePath.{Leaf, Root} +import quasar.api.resource.{ResourceName, ResourcePath, ResourcePathType} +import quasar.connector.{MonadResourceErr, QueryResult, ResourceError, ResultData} +import quasar.connector.datasource.{BatchLoader, DatasourceModule, Loader} +import quasar.contrib.scalaz.MonadError_ +import quasar.qscript.InterpretedRead + +import cats.data.{NonEmptyList, OptionT} +import cats.effect.{Resource, Sync} +import cats.syntax.applicative._ +import cats.syntax.eq._ +import cats.syntax.flatMap._ +import cats.syntax.functor._ +import cats.syntax.option._ +import fs2.Stream +import org.http4s.client.Client +import pathy.Path +import scalaz.{\/-, -\/} +import shims._ + +final class S3Datasource[F[_]: Sync: MonadResourceErr]( + client: Client[F], + config: S3Config) + extends DatasourceModule.DS[F] { + + import S3Datasource._ + + def kind: DatasourceType = s3.datasourceKind + + val loaders = NonEmptyList.of(Loader.Batch(BatchLoader.Full { (iRead: InterpretedRead[ResourcePath]) => + iRead.path match { + case Root => + Resource.eval(MonadError_[F, ResourceError].raiseError[QueryResult[F]]( + ResourceError.notAResource(iRead.path))) + + case Leaf(file) => + impl.evaluate[F](client, config.bucket, file) map { bytes => + QueryResult.typed(config.format, ResultData.Continuous(bytes), iRead.stages) + } + } + })) + + def prefixedChildPaths(path: ResourcePath) + : Resource[F, Option[Stream[F, (ResourceName, ResourcePathType.Physical)]]] = + pathIsResource(path) evalMap { + case true => + Stream.empty + .covaryOutput[(ResourceName, ResourcePathType.Physical)] + .covary[F].some.pure[F] // FIXME: static guarantees from pathIsResource + + case false => + impl.children(client, config.bucket, path.toPath) map { + case None => + none[Stream[F, (ResourceName, ResourcePathType.Physical)]] + case Some(paths) => + paths.map { + case -\/(Path.DirName(dn)) => (ResourceName(dn), ResourcePathType.prefix) + case \/-(Path.FileName(fn)) => (ResourceName(fn), ResourcePathType.leafResource) + }.some + } + } + + def pathIsResource(path: ResourcePath): Resource[F, Boolean] = + Resource.eval(path match { + case Root => false.pure[F] + case Leaf(file) => Path.refineType(file) match { + case -\/(_) => false.pure[F] + case \/-(f) => impl.isResource(client, config.bucket, f) + } + }) + + def isLive(maxRedirects: Int): F[Liveness] = + impl.preflightCheck(client, config.bucket, maxRedirects) flatMap { + case Some(newBucket) => + OptionT(impl.children(client, newBucket, Path.rootDir)) + .fold(Liveness.notLive)(_ => + if(newBucket === config.bucket) + Liveness.live + else + Liveness.redirected(config.copy(bucket = newBucket))) + case None => + Liveness.notLive.pure[F] + } +} + +object S3Datasource { + sealed abstract class Liveness + final case class Redirected(conf: S3Config) extends Liveness + final case object Live extends Liveness + final case object NotLive extends Liveness + + object Liveness { + def live: Liveness = Live + def notLive: Liveness = NotLive + def redirected(conf: S3Config): Liveness = Redirected(conf) + } + + def apply[F[_]: Sync: MonadResourceErr](client: Client[F], config: S3Config) + : S3Datasource[F] = + new S3Datasource[F](client, config) +} diff --git a/datasource/src/main/scala/quasar/physical/s3/S3DatasourceModule.scala b/datasource/src/main/scala/quasar/physical/s3/S3DatasourceModule.scala new file mode 100644 index 00000000..68e5b6bf --- /dev/null +++ b/datasource/src/main/scala/quasar/physical/s3/S3DatasourceModule.scala @@ -0,0 +1,133 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef._ + +import quasar.RateLimiting +import quasar.api.datasource.{DatasourceError, DatasourceType} +import quasar.api.datasource.DatasourceError.{ConfigurationError, InitializationError} +import quasar.connector.{ByteStore, MonadResourceErr, ExternalCredentials} +import quasar.connector.datasource.{DatasourceModule, Reconfiguration} +import quasar.physical.s3.S3Datasource.{Live, NotLive, Redirected} + +import scala.concurrent.ExecutionContext +import scala.util.Either +import java.util.UUID + +import argonaut.{Json, Argonaut}, Argonaut._ +import cats.effect.{ConcurrentEffect, ContextShift, Resource, Sync, Timer} +import cats.kernel.Hash +import cats.implicits._ +import org.http4s.client.Client +import org.http4s.client.middleware.FollowRedirect +import scalaz.NonEmptyList +import shims._ + +object S3DatasourceModule extends DatasourceModule { + + private val MaxRedirects = 3 + + def kind: DatasourceType = s3.datasourceKind + + @SuppressWarnings(Array("org.wartremover.warts.ImplicitParameter")) + def datasource[F[_]: ConcurrentEffect: ContextShift: MonadResourceErr: Timer, A: Hash]( + config: Json, + rateLimiting: RateLimiting[F, A], + byteStore: ByteStore[F], + getAuth: UUID => F[Option[ExternalCredentials[F]]])( + implicit ec: ExecutionContext) + : Resource[F, Either[InitializationError[Json], DatasourceModule.DS[F]]] = + config.as[S3Config].result match { + case Right(s3Config) => + mkClient(s3Config) evalMap { client => + val s3Ds = S3Datasource[F](client, s3Config) + // FollowRediret is not mounted in mkClient because it interferes + // with permanent redirect handling + val redirectClient = FollowRedirect(MaxRedirects)(client) + + s3Ds.isLive(MaxRedirects) map { + case Redirected(newConfig) => + Right(new S3Datasource[F](redirectClient, newConfig)) + + case Live => + Right(new S3Datasource[F](redirectClient, s3Config)) + + case NotLive => + val msg = "Unable to ListObjects at the root of the bucket" + Left(DatasourceError + .accessDenied[Json, InitializationError[Json]](kind, sanitizeConfig(config), msg)) + } + } + + case Left((msg, _)) => + DatasourceError + .invalidConfiguration[Json, InitializationError[Json]](kind, sanitizeConfig(config), NonEmptyList(msg)) + .asLeft[DatasourceModule.DS[F]] + .pure[Resource[F, ?]] + } + + def migrateConfig[F[_]: Sync](from: Long, to: Long, config: Json): F[Either[ConfigurationError[Json], Json]] = + Sync[F] delay { + config.as[S3Config].result match { + case Left(_) => + Left(DatasourceError.MalformedConfiguration[Json]( + kind, + sanitizeConfig(config), + "Configuration to migrate is malformed.")) + case Right(cfg) => Right(cfg.asJson) + } + } + + def reconfigure(originalJson: Json, patchJson: Json): Either[ConfigurationError[Json], (Reconfiguration, Json)] = { + val back = for { + original <- originalJson.as[S3Config].result.leftMap(_ => + DatasourceError + .MalformedConfiguration[Json]( + kind, + sanitizeConfig(originalJson), + "Source configuration in reconfiguration is malformed.")) + + patch <- patchJson.as[S3Config].result.leftMap(_ => + DatasourceError + .MalformedConfiguration[Json]( + kind, + sanitizeConfig(patchJson), + "Patch configuration in reconfiguration is malformed.")) + + reconfigured <- original.reconfigure(patch).leftMap(c => + DatasourceError.InvalidConfiguration[Json]( + kind, + c.asJson, + NonEmptyList("Patch configuration contains sensitive information."))) + } yield reconfigured.asJson + + back.tupleLeft(Reconfiguration.Reset) + } + + override def sanitizeConfig(config: Json): Json = config.as[S3Config].result match { + case Left(_) => + config + case Right(cfg) => + cfg.sanitize.asJson + } + + @SuppressWarnings(Array("org.wartremover.warts.ImplicitParameter")) + private def mkClient[F[_]: ConcurrentEffect](conf: S3Config) + : Resource[F, Client[F]] = + AsyncHttpClientBuilder[F].map[F, Client[F]](AwsV4Signing(conf)) +} diff --git a/datasource/src/main/scala/quasar/physical/s3/impl/children.scala b/datasource/src/main/scala/quasar/physical/s3/impl/children.scala index 17d26a86..8f324b28 100644 --- a/datasource/src/main/scala/quasar/physical/s3/impl/children.scala +++ b/datasource/src/main/scala/quasar/physical/s3/impl/children.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,27 +25,14 @@ import quasar.contrib.pathy._ import quasar.physical.s3.S3Error import cats.data.{EitherT, OptionT} -import cats.effect.{Sync, Effect} -import cats.instances.either._ -import cats.instances.int._ -import cats.instances.list._ -import cats.instances.option._ -import cats.instances.tuple._ -import cats.syntax.alternative._ -import cats.syntax.applicative._ -import cats.syntax.bifunctor._ -import cats.syntax.either._ -import cats.syntax.eq._ -import cats.syntax.flatMap._ -import cats.syntax.functor._ -import cats.syntax.option._ -import cats.syntax.traverse._ +import cats.effect.Sync +import cats.implicits._ import fs2.Stream -import org.http4s.{MalformedMessageBodyFailure, Status} +import org.http4s.{MalformedMessageBodyFailure, Query, Status} import org.http4s.client.{Client, UnexpectedStatus} import org.http4s.headers.`Content-Type` import org.http4s.scalaxml.{xml => xmlDecoder} -import org.http4s.{Charset, DecodeResult, EntityDecoder, MediaRange, Message, Request, Uri} +import org.http4s.{Charset, DecodeResult, EntityDecoder, Media, MediaRange, Request, Uri} import pathy.Path import pathy.Path.{DirName, FileName} import scala.xml.Elem @@ -53,27 +40,16 @@ import scalaz.{\/-, -\/} import shims._ object children { - // S3 provides a recursive listing (akin to `find` or - // `dirtree`); we filter out children that aren't direct - // children. We can only list 1000 keys, and need pagination - // to do more. That's 1000 *recursively listed* keys, so we - // could conceivably list *none* of the direct children of a - // folder without pagination, depending on the order AWS - // sends them in. - // // FIXME: dir should be ADir and pathToDir should be deleted - def apply[F[_]: Effect]( - client: Client[F], - bucket: Uri, - dir: APath, - sign: Request[F] => F[Request[F]]) + def apply[F[_]: Sync](client: Client[F], bucket: Uri, dir: APath) : F[Option[Stream[F, PathSegment]]] = { val msg = "Unexpected failure when streaming a multi-page response for ListBuckets" + val stream0 = - handleS3(fetchResults(client, bucket, dir, None, sign)) map (results => + handleS3(fetchResults(client, bucket, dir, None)) map (results => Stream.iterateEval(results) { case (_, next0) => - handleS3(fetchResults(client, bucket, dir, next0, sign)) + handleS3(fetchResults(client, bucket, dir, next0)) .getOrElseF(Sync[F].raiseError(new Exception(msg))) }) @@ -86,48 +62,38 @@ object children { /// - // converts non-recoverable errors to runtime errors. Also decide - // which errors we want to report as None rather than runtime exceptions. - private def handleS3[F[_]: Sync, A](e: EitherT[F, S3Error, A]): OptionT[F, A] = - OptionT(e.value.flatMap { - case Left(S3Error.NotFound) => none.pure[F] - case Left(S3Error.Forbidden) => none.pure[F] - case Left(S3Error.MalformedResponse) => none.pure[F] - case Left(S3Error.UnexpectedResponse(msg)) => Sync[F].raiseError(new Exception(msg)) - case Right(a) => a.some.pure[F] - }) + private def handleS3[F[_]: Sync, A](e: EitherT[F, S3Error, A]) + : OptionT[F, A] = e.toOption // FIXME parse the results as they arrive using an XML streaming parser, instead of paging // one response at a time - private def fetchResults[F[_]: Effect]( + private def fetchResults[F[_]: Sync]( client: Client[F], bucket: Uri, dir: APath, - next: Option[ContinuationToken], - sign: Request[F] => F[Request[F]]) + next: Option[ContinuationToken]) : EitherT[F, S3Error, (Stream[F, APath], Option[ContinuationToken])] = - listObjects(client, bucket, dir, next, sign) + listObjects(client, bucket, dir, next) .flatMap(extractList(_).toEitherT) .map(_.leftMap(Stream.emits(_))) private def toPathSegment[F[_]](s: Stream[F, APath], dir: APath): Stream[F, PathSegment] = - s.filter(path => Path.parentDir(path) === pathToDir(dir)) - .filter(path => path =!= dir) - .flatMap(p => Stream.emits(Path.peel(p).toList)) - .map(_._2) + s.flatMap(p => Stream.emits(Path.peel(p).toList)).map(_._2) - private def listObjects[F[_]: Effect]( + private def listObjects[F[_]: Sync]( client: Client[F], bucket: Uri, dir: APath, - next: Option[ContinuationToken], - sign: Request[F] => F[Request[F]]) + next: Option[ContinuationToken]) : EitherT[F, S3Error, Elem] = - EitherT(sign(listingRequest(client, bucket, dir, next)).flatMap { r => - Sync[F].recover[Either[S3Error, Elem]](client.expect[Elem](r)(utf8Xml).map(_.asRight)) { - case UnexpectedStatus(Status.Forbidden) => S3Error.Forbidden.asLeft - case MalformedMessageBodyFailure(_, _) => S3Error.MalformedResponse.asLeft - } + EitherT(Sync[F].recover[Either[S3Error, Elem]]( + client.expect(listingRequest(client, bucket, dir, next))(utf8Xml).map(_.asRight)) { + case UnexpectedStatus(Status.Forbidden) => + S3Error.Forbidden.asLeft[Elem] + case UnexpectedStatus(Status.MovedPermanently) => + S3Error.UnexpectedResponse(Status.MovedPermanently.reason).asLeft[Elem] + case MalformedMessageBodyFailure(_, _) => + S3Error.MalformedResponse.asLeft[Elem] }) private def listingRequest[F[_]]( @@ -146,23 +112,25 @@ object children { val listType = ("list-type", "2").some // Converts a pathy Path to an S3 object prefix. val objectPrefix = aPathToObjectPrefix(dir) - val prefix = objectPrefix.map(("prefix", _)) + val prefix = objectPrefix.map(("prefix", _)).getOrElse(("prefix", "")).some + val startAfter = objectPrefix.map(("start-after", _)) + val delimiter = ("delimiter", "/").some val ct0 = ct.map(_.value).map(("continuation-token", _)) - val queryUri = List(listType, prefix, ct0).unite.foldLeft(listingQuery) { - case (uri0, (param, value)) => uri0.withQueryParam(param, value) - } + val q = Query.fromString(s3EncodeQueryParams( + List(listType, delimiter, prefix, startAfter, ct0).unite.toMap)) - Request[F](uri = queryUri) + Request[F](uri = listingQuery.copy(query = q)) } - // Lists all objects and prefixes from a ListObjects request. This needs to be filtered + // Lists all objects and prefixes from a ListObjects request. private def extractList(doc: Elem): Either[S3Error, (List[APath], Option[ContinuationToken])] = { val noContentMsg = S3Error.UnexpectedResponse("XML received from AWS API has no top-level ") val noKeyCountMsg = S3Error.UnexpectedResponse("XML received from AWS API has no top-level ") val noKeyMsg = S3Error.UnexpectedResponse("XML received from AWS API has no elements under ") - val noParseObjectMsg = S3Error.UnexpectedResponse("Failed to parse object path in S3 API response") + val noParseObjectMsg = S3Error.UnexpectedResponse("Failed to parse objects path in S3 API response") + val noParsePrefixesMsg = S3Error.UnexpectedResponse("Failed to parse prefixes in S3 API response") val contents = Either.catchNonFatal(doc \ "Contents").leftMap(_ => noContentMsg) @@ -178,15 +146,33 @@ object children { .map(_.toList) .flatMap(_.traverse[Either[S3Error, ?], String](elem => Either.catchNonFatal((elem \ "Key").text).leftMap(_ => noKeyMsg))) - val childPaths = children .flatMap(_.traverse[Either[S3Error, ?], APath](pth => Either.fromOption(s3NameToPath(pth), noParseObjectMsg))) + val commonPrefixes = Either.catchNonFatal(doc \ "CommonPrefixes").leftMap(_ => noParsePrefixesMsg) + val prefixes: Either[S3Error, List[String]] = + commonPrefixes + .map(_.toList) + .flatMap(_.traverse[Either[S3Error, ?], String](elem => + Either.catchNonFatal((elem \ "Prefix").text).leftMap(_ => noParsePrefixesMsg))) + val prefixesPaths = + prefixes + .flatMap(_.traverse[Either[S3Error, ?], APath](pth => + Either.fromOption(s3NameToPath(pth), noParsePrefixesMsg))) + + val allPaths = (prefixesPaths, childPaths) match { + case (Left(errL), Left(errR)) => + S3Error.UnexpectedResponse("No prefixes or objects in response").asLeft + case (Right(listing), Left(_)) => listing.asRight + case (Left(_), Right(listing)) => listing.asRight + case (Right(listingL), Right(listingR)) => (listingL ++ listingR).asRight + } + keyCount.flatMap(kc => if (kc === 0) S3Error.NotFound.asLeft - else childPaths.map((_, continuationToken))) + else allPaths.map((_, continuationToken))) } private def aPathToObjectPrefix(apath: APath): Option[String] = { @@ -194,8 +180,8 @@ object children { // entire bucket. Otherwise, we have to drop // the first `/`, because object prefixes can't // begin with `/`. - if (apath != Path.rootDir) { - Path.posixCodec.printPath(apath).drop(1).self.some // .replace("/", "%2F") + if (apath =!= Path.rootDir) { + pathToDir(apath).map(Path.posixCodec.printPath(_).drop(1).self) } else { none[String] @@ -231,10 +217,11 @@ object children { new EntityDecoder[F, Elem] { override def consumes: Set[MediaRange] = ev.consumes - override def decode(msg: Message[F], strict: Boolean): DecodeResult[F, Elem] = { - val utf8ContentType = msg.headers.get(`Content-Type`).map(_.withCharset(Charset.`UTF-8`)) + override def decode(media: Media[F], strict: Boolean): DecodeResult[F, Elem] = { + val utf8ContentType = media.headers.get(`Content-Type`).map(_.withCharset(Charset.`UTF-8`)) + val h = utf8ContentType.fold(media.headers)(media.headers.put(_)) - ev.decode(msg.withContentTypeOption(utf8ContentType), strict) + ev.decode(Media[F](media.body, h), strict) } } } diff --git a/datasource/src/main/scala/quasar/physical/s3/impl/evaluate.scala b/datasource/src/main/scala/quasar/physical/s3/impl/evaluate.scala index fd8f177e..81bf8077 100644 --- a/datasource/src/main/scala/quasar/physical/s3/impl/evaluate.scala +++ b/datasource/src/main/scala/quasar/physical/s3/impl/evaluate.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,100 +17,54 @@ package quasar.physical.s3 package impl +import slamdata.Predef._ import quasar.api.resource.ResourcePath import quasar.connector.{MonadResourceErr, ResourceError} import quasar.contrib.pathy._ -import quasar.physical.s3.S3JsonParsing - -import slamdata.Predef._ -import cats.data.OptionT -import cats.effect.{Effect, Sync} +import cats.Monad +import cats.effect.Resource import cats.syntax.applicative._ -import cats.syntax.flatMap._ -import cats.syntax.option._ -import fs2.{Pipe, Stream} -import jawn.{Facade, ParseException} -import jawnfs2._ +import fs2.Stream +import org.http4s.{Request, Status, Uri} import org.http4s.client._ -import org.http4s.{Request, Response, Status, Uri} import pathy.Path import shims._ object evaluate { - def apply[F[_]: Effect, R: Facade]( - jsonParsing: S3JsonParsing, - client: Client[F], - uri: Uri, - file: AFile, - sign: Request[F] => F[Request[F]]) - (implicit MR: MonadResourceErr[F]) - : F[Option[Stream[F, R]]] = { + def apply[F[_]: Monad: MonadResourceErr]( + client: Client[F], uri: Uri, file: AFile) + : Resource[F, Stream[F, Byte]] = { // Convert the pathy Path to a POSIX path, dropping // the first slash, which is what S3 expects for object paths val objectPath = Path.posixCodec.printPath(file).drop(1) - // Put the object path after the bucket URI - val queryUri = appendPathUnencoded(uri, objectPath) + val queryUri = appendPathS3Encoded(uri, objectPath) val request = Request[F](uri = queryUri) - sign(request) >>= { req => - val stream = OptionT( - streamRequest[F, R](client, req) { resp => - resp.body.chunks.map(_.toByteBuffer).through(parse(jsonParsing)) - }) - - stream.map(_.handleErrorWith { - case ParseException(message, _, _, _) => - Stream.eval(MR.raiseError(parseError(file, jsonParsing, message))) - }).value - } + streamRequest[F](client, request, file) } //// - private def parse[F[_], R: Facade](jsonParsing: S3JsonParsing) - : Pipe[F, ByteBuffer, R] = - jsonParsing match { - case S3JsonParsing.JsonArray => unwrapJsonArray[F, ByteBuffer, R] - case S3JsonParsing.LineDelimited => parseJsonStream[F, ByteBuffer, R] - } - - private def parseError(path: AFile, parsing: S3JsonParsing, message: String) - : ResourceError = { - val msg: String = - s"Could not parse the file as JSON. Ensure you've configured the correct jsonParsing option for this bucket: $message" + private def streamRequest[F[_]: Monad: MonadResourceErr]( + client: Client[F], req: Request[F], file: AFile) + : Resource[F, Stream[F, Byte]] = + client.run(req).evalMap[F, Stream[F, Byte]](res => res.status match { + case Status.NotFound => + MonadResourceErr[F].raiseError(ResourceError.pathNotFound(ResourcePath.leaf(file))) - val expectedFormat: String = parsing match { - case S3JsonParsing.LineDelimited => "Newline-delimited JSON" - case S3JsonParsing.JsonArray => "Array-wrapped JSON" - } + case Status.Forbidden => + MonadResourceErr[F].raiseError(accessDeniedError(ResourcePath.leaf(file))) - ResourceError.malformedResource( - ResourcePath.Leaf(path), - expectedFormat, - msg) - } + case Status.Ok => + res.body.pure[F] - // there is no method in http4s 0.16.6a that does what we - // want here, so we have to implement it ourselves. - // what we want specifically is to make an HTTP request, - // take the response, if it's a 404 return `None`, - // if it's `Some(resp)` we compute an fs2 stream from - // it using `f` and then call `dispose` on that response - // once we've finished streaming. - private def streamRequest[F[_]: Sync, A]( - client: Client[F], req: Request[F])( - f: Response[F] => Stream[F, A]) - : F[Option[Stream[F, A]]] = - client.open(req).flatMap { - case DisposableResponse(response, dispose) => - response.status match { - case Status.NotFound => none.pure[F] - case Status.Ok => f(response).onFinalize(dispose).some.pure[F] - case s => Sync[F].raiseError(new Exception(s"Unexpected status $s")) - } - } + case other => + MonadResourceErr[F].raiseError(unexpectedStatusError( + ResourcePath.leaf(file), + other)) + }) } diff --git a/datasource/src/main/scala/quasar/physical/s3/impl/isResource.scala b/datasource/src/main/scala/quasar/physical/s3/impl/isResource.scala index 76a16da4..5f79ea81 100644 --- a/datasource/src/main/scala/quasar/physical/s3/impl/isResource.scala +++ b/datasource/src/main/scala/quasar/physical/s3/impl/isResource.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,50 +16,56 @@ package quasar.physical.s3.impl +import slamdata.Predef._ +import quasar.api.resource.ResourcePath +import quasar.connector.MonadResourceErr import quasar.contrib.pathy._ -import slamdata.Predef._ -import cats.effect.Effect +import cats.Monad import cats.syntax.applicative._ import cats.syntax.flatMap._ + import org.http4s.client.Client import org.http4s.headers.Range import org.http4s.{Method, Request, Status, Uri, Headers} + import pathy.Path // The simplest method to implement, check that HEAD doesn't // give a 404. object isResource { - def apply[F[_]: Effect](client: Client[F], uri: Uri, file: AFile, sign: Request[F] => F[Request[F]]): F[Boolean] = { + def apply[F[_]: Monad: MonadResourceErr]( + client: Client[F], uri: Uri, file: AFile) + : F[Boolean] = { // Print pathy.Path as POSIX path, without leading slash, // for S3's consumption. val objectPath = Path.posixCodec.printPath(file).drop(1) // Add the object's path to the bucket URI. - val queryUri = appendPathUnencoded(uri, objectPath) + val queryUri = appendPathS3Encoded(uri, objectPath) // Request with HEAD, to get metadata. // attempt to get the first byte to verify this is not empty val request = Request[F]() .withUri(queryUri) .withMethod(Method.HEAD) - .withHeaders(Headers(Range(0, 1))) + .withHeaders(Headers.of(Range(0, 1))) - if (Path.identicalPath(Path.rootDir, file)) { + if (Path.identicalPath(Path.rootDir, file)) false.pure[F] - } else { + else // Don't use the metadata, just check the request status - sign(request) >>= (r => - client.status(r) >>= { - case Status.Ok => true.pure[F] - case Status.PartialContent => true.pure[F] - case Status.NotFound => false.pure[F] - case Status.RangeNotSatisfiable => false.pure[F] - case Status.Forbidden => Effect[F].raiseError(new Exception(s"Permission denied. Make sure you have access to the configured bucket")) - case s => Effect[F].raiseError(new Exception(s"Unexpected status returned during `isResource` call: $s")) - }) - } + client.status(request) >>= { + case Status.Ok => true.pure[F] + case Status.PartialContent => true.pure[F] + case Status.NotFound => false.pure[F] + case Status.RangeNotSatisfiable => false.pure[F] + case Status.Forbidden => + MonadResourceErr[F].raiseError(accessDeniedError(ResourcePath.leaf(file))) + case other => + MonadResourceErr[F].raiseError(unexpectedStatusError(ResourcePath.leaf(file), other)) + } } } diff --git a/datasource/src/main/scala/quasar/physical/s3/impl/package.scala b/datasource/src/main/scala/quasar/physical/s3/impl/package.scala index 3b693063..c667709f 100644 --- a/datasource/src/main/scala/quasar/physical/s3/impl/package.scala +++ b/datasource/src/main/scala/quasar/physical/s3/impl/package.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,11 +17,17 @@ package quasar.physical.s3 import slamdata.Predef._ +import quasar.api.resource.ResourcePath +import quasar.connector.ResourceError + +import java.net.URLEncoder +import java.nio.charset.StandardCharsets import cats.instances.char._ import cats.instances.option._ import cats.syntax.eq._ -import org.http4s.Uri + +import org.http4s.{Status, Uri} package object impl { // this type comes up too many times to write out myself. @@ -30,13 +36,44 @@ package object impl { private type APath = pathy.Path[pathy.Path.Abs, scala.Any, pathy.Path.Sandboxed] // This should be used instead of the `/` method from http4's Uri - // class since that method does URL encoding on the path, which + // class since that method does standard URL encoding on the path, which // breaks AWS request signing for S3 - def appendPathUnencoded(uri: Uri, newSegment: Uri.Path): Uri = { - val newPath = - if (uri.path.isEmpty || uri.path.lastOption =!= Some('/')) s"${uri.path}/$newSegment" - else s"${uri.path}$newSegment" + def appendPathS3Encoded(uri: Uri, newSegment: Uri.Path): Uri = { + val sep = + if (uri.path.isEmpty || uri.path.lastOption =!= Some('/')) "/" + else "" + val newPath = s"${uri.path}$sep${s3Encode(newSegment, encodeSlash = false)}" uri.withPath(newPath) } + + // S3 specific encoding, see + // https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html + def s3Encode(s: String, encodeSlash: Boolean): String = { + val e = URLEncoder.encode(s, StandardCharsets.UTF_8.toString) + .replaceAll("\\+", "%20") + .replaceAll("\\*", "%2A") + .replaceAll("%7E", "~") + // URLEncoder already encodes / to %2F + if (encodeSlash) e // .. so here we already have the correct result + else e.replaceAll("%2F", "/") // .. and here we need to decode %2F back to / + } + + def s3EncodeQueryParams(queryParams: Map[String, String]): String = + queryParams.toSeq + .sortBy(_._1) + .map({ case (k, v) => s3Encode(k, encodeSlash = true) + "=" + s3Encode(v, encodeSlash = true) }) + .mkString("&") + + def accessDeniedError(path: ResourcePath): ResourceError = + ResourceError.accessDenied( + path, + Some("Access denied, make sure you have access to the configured bucket."), + None) + + def unexpectedStatusError(path: ResourcePath, status: Status): ResourceError = + ResourceError.connectionFailed( + path, + Some(s"Unexpeced HTTP response status: $status"), + None) } diff --git a/datasource/src/main/scala/quasar/physical/s3/impl/preflightCheck.scala b/datasource/src/main/scala/quasar/physical/s3/impl/preflightCheck.scala new file mode 100644 index 00000000..51f98515 --- /dev/null +++ b/datasource/src/main/scala/quasar/physical/s3/impl/preflightCheck.scala @@ -0,0 +1,66 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3.impl + +import slamdata.Predef._ + +import cats.effect.{Bracket, Sync} +import cats.syntax.applicative._ +import cats.syntax.flatMap._ +import cats.syntax.option._ +import fs2.Stream +import org.http4s.Uri +import org.http4s.client.Client +import org.http4s.headers.Location +import org.http4s.Status.{Found, MovedPermanently, Ok, PermanentRedirect, SeeOther, TemporaryRedirect} +import org.http4s.{Method, Request, Status} + +object preflightCheck { + def apply[F[_]: Sync](client: Client[F], bucket: Uri, maxRedirects: Int) + : F[Option[Uri]] = + redirectFor(client, bucket).flatMap { + case Some((TemporaryRedirect | Found | SeeOther | Ok, _)) => + bucket.some.pure[F] + case redirect @ Some((MovedPermanently | PermanentRedirect, _)) => + Stream.iterateEval[F, Option[(Status, Uri)]](redirect) { + case Some((_, u)) => redirectFor(client, u) + case _ => none.pure[F] + // maxRedirects plus one for the last succesful request + }.take(maxRedirects.toLong + 1).filter { + case Some((Ok, u)) => true + case _ => false + }.unNone.map(_._2).compile.last + case _ => none.pure[F] + } + + private def redirectFor[F[_]: Bracket[?[_], Throwable]](client: Client[F], u: Uri) + : F[Option[(Status, Uri)]] = + client.run(Request[F](uri = appendPathS3Encoded(u, ""), method = Method.HEAD)) use { resp => + val back = resp.status match { + case status @ (MovedPermanently | PermanentRedirect) => + resp.headers.get(Location).map(loc => (status, loc.uri)) + case status @ (TemporaryRedirect | Found | SeeOther) => + (status, u).some + case Ok => + (Ok, u).some + case _ => + none + } + + back.pure[F] + } +} diff --git a/datasource/src/main/scala/quasar/physical/s3/package.scala b/datasource/src/main/scala/quasar/physical/s3/package.scala index e400290f..8d32b5e4 100644 --- a/datasource/src/main/scala/quasar/physical/s3/package.scala +++ b/datasource/src/main/scala/quasar/physical/s3/package.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,9 @@ package quasar.physical.s3 -import scala.Predef._ +import slamdata.Predef._ + import quasar.api.datasource.DatasourceType -import eu.timepit.refined.auto._ -import cats.Show sealed trait S3Error @@ -30,19 +29,6 @@ object S3Error { final case object MalformedResponse extends S3Error } -sealed trait S3JsonParsing - -object S3JsonParsing { - case object JsonArray extends S3JsonParsing - case object LineDelimited extends S3JsonParsing - - implicit def showS3JsonParsing: Show[S3JsonParsing] = - Show.show { - case JsonArray => "array" - case LineDelimited => "lineDelimited" - } -} - package object s3 { val datasourceKind: DatasourceType = DatasourceType("s3", 1L) } diff --git a/datasource/src/test/scala/quasar/physical/s3/AsyncHttpClientBuilderSpec.scala b/datasource/src/test/scala/quasar/physical/s3/AsyncHttpClientBuilderSpec.scala new file mode 100644 index 00000000..2f8da6a3 --- /dev/null +++ b/datasource/src/test/scala/quasar/physical/s3/AsyncHttpClientBuilderSpec.scala @@ -0,0 +1,35 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef._ + +import java.net.{Proxy, InetSocketAddress} +import java.net.Proxy.{Type => ProxyType} + +import org.specs2.mutable.Specification + +object AsyncHttpClientBuilderSpec extends Specification { + "direct proxies sort last" >> { + val HttpProxy = new Proxy(ProxyType.HTTP, new InetSocketAddress("foo.com", 1337)) + val NoProxy = Proxy.NO_PROXY + val SocksProxy = new Proxy(ProxyType.SOCKS, new InetSocketAddress("bar.com", 1337)) + + AsyncHttpClientBuilder.sortProxies( + List(NoProxy, HttpProxy, SocksProxy)) must_== List(HttpProxy, SocksProxy, NoProxy) + } +} diff --git a/datasource/src/test/scala/quasar/physical/s3/GzipS3DatasourceSpec.scala b/datasource/src/test/scala/quasar/physical/s3/GzipS3DatasourceSpec.scala new file mode 100644 index 00000000..512857a1 --- /dev/null +++ b/datasource/src/test/scala/quasar/physical/s3/GzipS3DatasourceSpec.scala @@ -0,0 +1,64 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef._ + +import quasar.api.resource.ResourcePath +import quasar.connector.{CompressionScheme, QueryResult, DataFormat} +import quasar.connector.datasource.DatasourceModule, DatasourceModule.DS + +import cats.effect.{IO, Resource} + +import org.http4s.Uri + +final class GzipS3DatasourceSpec extends S3DatasourceSpec { + + override val testBucket = Uri.uri("https://slamdata-public-gzip-test.s3.amazonaws.com") + + override def assertResultBytes( + ds: Resource[IO, DS[IO]], + path: ResourcePath, + expected: Array[Byte]) = + ds.flatMap(_.loadFull(iRead(path)).value) use { + case Some(QueryResult.Typed(DataFormat.Compressed(CompressionScheme.Gzip, _), data, _)) => + // not worth checking the exact data here since it's still just transferring the exact byte stream + // (as with non-gzipped configs) + IO(ok) + + case _ => + IO(ko("Unexpected QueryResult")) + } + + override val datasourceLD = + Resource.eval(credentials) flatMap { creds => + mkDatasource(S3Config(testBucket, DataFormat.gzipped(DataFormat.ldjson), creds)) + } + + override val datasource = + Resource.eval(credentials) flatMap { creds => + mkDatasource(S3Config(testBucket, DataFormat.gzipped(DataFormat.json), creds)) + } + + override val datasourceCSV = + Resource.eval(credentials) flatMap { creds => + mkDatasource(S3Config( + testBucket, + DataFormat.gzipped(DataFormat.SeparatedValues.Default), + creds)) + } +} diff --git a/datasource/src/test/scala/quasar/physical/s3/S3ConfigSpec.scala b/datasource/src/test/scala/quasar/physical/s3/S3ConfigSpec.scala index e5627a94..abfa3245 100644 --- a/datasource/src/test/scala/quasar/physical/s3/S3ConfigSpec.scala +++ b/datasource/src/test/scala/quasar/physical/s3/S3ConfigSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,13 @@ package quasar.physical.s3 import slamdata.Predef._ +import quasar.connector.DataFormat, DataFormat._ + +import org.http4s.Uri import org.specs2.mutable.Specification import argonaut.{Json, DecodeJson} + class S3ConfigSpec extends Specification { val decode = DecodeJson.of[S3Config].decodeJson(_) @@ -53,4 +57,71 @@ class S3ConfigSpec extends Specification { decode(conf).toEither must beRight((c: S3Config) => c.credentials must beNone) } + + "parsable type" >> { + "precise json" >> { + val conf = Json.obj( + "bucket" -> Json.jString("https://some.bucket.uri"), + "format" -> Json.obj( + "type" -> Json.jString("json"), + "precise" -> Json.jBool(true), + "variant" -> Json.jString("line-delimited"))) + decode(conf).toEither must beRight((c: S3Config) => c.format === DataFormat.precise(DataFormat.ldjson)) + } + } + + "sanitize removes sensitive information" >> { + "credentials presented" >> { + val inp = S3Config( + Uri.uri("www.foo.bar"), + DataFormat.precise(DataFormat.ldjson), + Some(S3Credentials(AccessKey("access"), SecretKey("secret"), Region("region")))) + val expected = + inp.copy(credentials = Some(S3Credentials(AccessKey(""), SecretKey(""), Region("")))) + + inp.sanitize must_=== expected + } + "credentials omitted" >> { + val inp = S3Config( + Uri.uri("www.foo.bar"), + DataFormat.precise(DataFormat.ldjson), + None) + inp.sanitize must_=== inp + } + } + + "reconfigure" >> { + "replaces non-sensitive information" >> { + val inp = S3Config( + Uri.uri("www.foo.bar"), + DataFormat.precise(DataFormat.ldjson), + Some(S3Credentials(AccessKey("access"), SecretKey("secret"), Region("region")))) + val patch = S3Config( + Uri.uri("www.bar.baz"), + DataFormat.precise(DataFormat.json), + None) + val expected = S3Config( + Uri.uri("www.bar.baz"), + DataFormat.precise(DataFormat.json), + Some(S3Credentials(AccessKey("access"), SecretKey("secret"), Region("region")))) + + inp.reconfigure(patch) must beRight(expected) + } + "returns sanitized patch at left if patch has sensitive information" >> { + val inp = S3Config( + Uri.uri("www.foo.bar"), + DataFormat.precise(DataFormat.ldjson), + Some(S3Credentials(AccessKey("access"), SecretKey("secret"), Region("region")))) + val patch = S3Config( + Uri.uri("www.bar.baz"), + DataFormat.precise(DataFormat.json), + Some(S3Credentials(AccessKey("a"), SecretKey("s"), Region("r")))) + val expected = S3Config( + Uri.uri("www.bar.baz"), + DataFormat.precise(DataFormat.json), + Some(S3Credentials(AccessKey(""), SecretKey(""), Region("")))) + + inp.reconfigure(patch) must beLeft(expected) + } + } } diff --git a/datasource/src/test/scala/quasar/physical/s3/S3DataSourceModuleSpec.scala b/datasource/src/test/scala/quasar/physical/s3/S3DataSourceModuleSpec.scala deleted file mode 100644 index 5a5a321b..00000000 --- a/datasource/src/test/scala/quasar/physical/s3/S3DataSourceModuleSpec.scala +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2014–2018 SlamData Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package quasar.physical.s3 - -import slamdata.Predef._ - -import quasar.api.datasource.DatasourceError.AccessDenied -import quasar.connector.ResourceError -import quasar.contrib.scalaz.MonadError_ - -import scala.concurrent.ExecutionContext.Implicits.global - -import argonaut.Json -import cats.effect.IO -import org.specs2.mutable.Specification -import shims._ - -class S3DataSourceModuleSpec extends Specification { - import S3DataSourceModuleSpec._ - - "rejects invalid credentials" >> { - // slamdata-private-test is a bucket that requires credentials to access - val conf = Json.obj( - "bucket" -> Json.jString("https://s3.amazonaws.com/slamdata-private-test"), - "jsonParsing" -> Json.jString("array")) - - val ds = S3DataSourceModule.lightweightDatasource[IO](conf).unsafeRunSync.toEither - - ds must beLike { - case Left(AccessDenied(_, _, _)) => ok - } - } - - "rejects a non-bucket URI" >> { - val conf = Json.obj( - "bucket" -> Json.jString("https://example.com"), - "jsonParsing" -> Json.jString("array")) - - val ds = S3DataSourceModule.lightweightDatasource[IO](conf).unsafeRunSync.toEither - - ds must beLike { - case Left(AccessDenied(_, _, _)) => ok - } - } - - "removes AccessKey, SecretKey and Region from credentials" >> { - val conf = Json.obj( - "bucket" -> Json.jString("https://some.bucket.uri"), - "jsonParsing" -> Json.jString("array"), - "credentials" -> Json.obj( - "accessKey" -> Json.jString("some access key"), - "secretKey" -> Json.jString("super secret key"), - "region" -> Json.jString("us-east-1"))) - - val redactedConf = Json.obj( - "bucket" -> Json.jString("https://some.bucket.uri"), - "jsonParsing" -> Json.jString("array"), - "credentials" -> Json.obj( - "accessKey" -> Json.jString(""), - "secretKey" -> Json.jString(""), - "region" -> Json.jString(""))) - - S3DataSourceModule.sanitizeConfig(conf) must_== redactedConf - } - - "does nothing when there are no credentials to redact" >> { - val conf = Json.obj( - "bucket" -> Json.jString("https://some.bucket.uri"), - "jsonParsing" -> Json.jString("array")) - - S3DataSourceModule.sanitizeConfig(conf) must_== conf - } -} - -object S3DataSourceModuleSpec { - implicit val ioMonadResourceErr: MonadError_[IO, ResourceError] = - MonadError_.facet[IO](ResourceError.throwableP) -} diff --git a/datasource/src/test/scala/quasar/physical/s3/S3DataSourceSpec.scala b/datasource/src/test/scala/quasar/physical/s3/S3DataSourceSpec.scala deleted file mode 100644 index da73e65d..00000000 --- a/datasource/src/test/scala/quasar/physical/s3/S3DataSourceSpec.scala +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright 2014–2018 SlamData Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package quasar.physical.s3 - -import slamdata.Predef._ - -import quasar.api.resource.{ResourceName, ResourcePath, ResourcePathType} -import quasar.common.data.Data -import quasar.connector.{Datasource, DatasourceSpec, MonadResourceErr, ResourceError} -import quasar.connector.ResourceError -import quasar.contrib.scalaz.MonadError_ - -import cats.data.{EitherT, OptionT} -import cats.effect.{Effect, IO} -import cats.syntax.applicative._ -import cats.syntax.functor._ -import fs2.Stream -import org.http4s.Uri -import org.http4s.client.blaze.Http1Client -import scalaz.{Id, ~>}, Id.Id -import shims._ - -import S3DataSourceSpec._ - -class S3DataSourceSpec extends DatasourceSpec[IO, Stream[IO, ?]] { - val testBucket = Uri.uri("https://s3.amazonaws.com/slamdata-public-test") - val nonExistentPath = - ResourcePath.root() / ResourceName("does") / ResourceName("not") / ResourceName("exist") - - "pathIsResource" >> { - "the root of a bucket with a trailing slash is not a resource" >>* { - val root = ResourcePath.root() / ResourceName("") - datasource.pathIsResource(root).map(_ must beFalse) - } - - "the root of a bucket is not a resource" >>* { - val root = ResourcePath.root() - datasource.pathIsResource(root).map(_ must beFalse) - } - - "a prefix without contents is not a resource" >>* { - val path = ResourcePath.root() / ResourceName("prefix3") / ResourceName("subprefix5") - datasource.pathIsResource(path).map(_ must beFalse) - } - - "an actual file is a resource" >>* { - val res = ResourcePath.root() / ResourceName("testData") / ResourceName("array.json") - - datasource.pathIsResource(res) map (_ must beTrue) - } - } - - "prefixedChildPaths" >> { - - "list nested children" >>* { - assertPrefixedChildPaths( - ResourcePath.root() / ResourceName("dir1") / ResourceName("dir2") / ResourceName("dir3"), - List(ResourceName("flattenable.data") -> ResourcePathType.leafResource)) - } - - "list children at the root of the bucket" >>* { - assertPrefixedChildPaths( - ResourcePath.root(), - List( - ResourceName("dir1") -> ResourcePathType.prefix, - ResourceName("extraSmallZips.data") -> ResourcePathType.leafResource, - ResourceName("prefix3") -> ResourcePathType.prefix, - ResourceName("testData") -> ResourcePathType.prefix)) - } - - "list a file with special characters in it" >>* { - assertPrefixedChildPaths( - ResourcePath.root() / ResourceName("dir1"), - List( - ResourceName("dir2") -> ResourcePathType.prefix, - ResourceName("fóóbar.ldjson") -> ResourcePathType.leafResource)) - } - } - - "evaluate" >> { - "read line-delimited JSON" >>* { - assertEvaluate( - datasourceLD, - ResourcePath.root() / ResourceName("testData") / ResourceName("lines.json"), - data_12_34) - } - - "read array JSON" >>* { - assertEvaluate( - datasource, - ResourcePath.root() / ResourceName("testData") / ResourceName("array.json"), - data_12_34) - } - - "reading a non-existent file raises ResourceError.PathNotFound" >> { - val creds = EitherT.right[Throwable](credentials) - val ds = creds.flatMap(c => mkDatasource[G](S3JsonParsing.JsonArray, testBucket, c)) - - val path = ResourcePath.root() / ResourceName("does-not-exist") - val read: Stream[G, Data] = Stream.force(ds.flatMap(_.evaluator[Data].evaluate(path))) - - read.compile.toList.value.unsafeRunSync must beLeft.like { - case ResourceError.throwableP(ResourceError.PathNotFound(_)) => ok - } - } - } - - def assertEvaluate(ds: Datasource[IO, Stream[IO,?], ResourcePath], path: ResourcePath, expected: List[Data]) = - ds.evaluator[Data].evaluate(path).flatMap { res => - gatherMultiple(res).map { _ must_== expected } - } - - def assertPrefixedChildPaths(path: ResourcePath, expected: List[(ResourceName, ResourcePathType)]) = - OptionT(datasource.prefixedChildPaths(path)) - .getOrElseF(IO.raiseError(new Exception(s"Failed to list resources under $path"))) - .flatMap(gatherMultiple(_)).map { _ must_== expected } - - def gatherMultiple[A](g: Stream[IO, A]) = g.compile.toList - - val data_12_34 = List(Data.Arr(List(Data.Int(1), Data.Int(2))), Data.Arr(List(Data.Int(3), Data.Int(4)))) - - def credentials: IO[Option[S3Credentials]] = None.pure[IO] - - val run = λ[IO ~> Id](_.unsafeRunSync) - - def mkDatasource[F[_]: Effect: MonadResourceErr]( - parsing: S3JsonParsing, - bucket: Uri, - creds: Option[S3Credentials]) - : F[Datasource[F, Stream[F, ?], ResourcePath]] = - Http1Client[F]().map(client => - new S3DataSource[F](client, S3Config(bucket, parsing, creds))) - - val datasourceLD = run(mkDatasource[IO](S3JsonParsing.LineDelimited, testBucket, None)) - val datasource = run(mkDatasource[IO](S3JsonParsing.JsonArray, testBucket, None)) -} - -object S3DataSourceSpec { - type G[A] = EitherT[IO, Throwable, A] - - implicit val ioMonadResourceErr: MonadError_[IO, ResourceError] = - MonadError_.facet[IO](ResourceError.throwableP) - - implicit val eitherTMonadResourceErr: MonadError_[G, ResourceError] = - MonadError_.facet[G](ResourceError.throwableP) -} diff --git a/datasource/src/test/scala/quasar/physical/s3/S3DatasourceModuleSpec.scala b/datasource/src/test/scala/quasar/physical/s3/S3DatasourceModuleSpec.scala new file mode 100644 index 00000000..cfa4da0a --- /dev/null +++ b/datasource/src/test/scala/quasar/physical/s3/S3DatasourceModuleSpec.scala @@ -0,0 +1,234 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef._ + +import quasar.{RateLimiting, RateLimiter} +import quasar.api.datasource.DatasourceError._ +import quasar.connector.{ByteStore, ResourceError} +import quasar.connector.datasource.Reconfiguration +import quasar.contrib.scalaz.MonadError_ + +import scala.concurrent.ExecutionContext + +import argonaut.{Argonaut, Json}, Argonaut._ +import cats.effect.{ContextShift, IO, Resource, Timer} +import cats.kernel.instances.uuid._ +import org.specs2.mutable.Specification +import java.util.UUID +import scalaz.NonEmptyList +import shims._ + +class S3DatasourceModuleSpec extends Specification { + import S3DatasourceModuleSpec._ + + implicit val cs: ContextShift[IO] = IO.contextShift(ExecutionContext.global) + implicit val timer: Timer[IO] = IO.timer(ExecutionContext.global) + implicit val ec: ExecutionContext = ExecutionContext.global + + val rateLimiting: Resource[IO, RateLimiting[IO, UUID]] = + RateLimiter[IO, UUID](IO.delay(UUID.randomUUID())) + + "rejects invalid credentials" >> { + // slamdata-private-test is a bucket that requires credentials to access + val conf = Json.obj( + "bucket" -> Json.jString("https://slamdata-private-test.s3.amazonaws.com"), + "jsonParsing" -> Json.jString("array")) + + rateLimiting.flatMap((rl: RateLimiting[IO, UUID]) => + S3DatasourceModule.datasource[IO, UUID](conf, rl, ByteStore.void[IO], _ => IO(None))) + .use(ds => IO(ds must beLike { + case Left(AccessDenied(_, _, _)) => ok + })) + .unsafeRunSync() + } + + "rejects a non-bucket URI" >> { + val conf = Json.obj( + "bucket" -> Json.jString("https://google.com"), + "jsonParsing" -> Json.jString("array")) + + rateLimiting.flatMap((rl: RateLimiting[IO, UUID]) => + S3DatasourceModule.datasource[IO, UUID](conf, rl, ByteStore.void[IO], _ => IO(None))) + .use(ds => IO(ds must beLike { + case Left(AccessDenied(_, _, _)) => ok + })) + .unsafeRunSync() + } + + "migration" in { + "migrate config as itself" >> { + val config = Json( + "bucket" := Json.jString("www.quux.com"), + "format" := Json.obj( + "type" := Json.jString("json"), + "variant" := Json.jString("line-delimited"), + "precise" := Json.jBool(false)), + "credentials" := Json( + "accessKey" := Json.jString("aa"), + "secretKey" := Json.jString("ss"), + "region" := Json.jString("rr"))) + + S3DatasourceModule.migrateConfig[IO](1, 1, config).unsafeRunSync() must beRight(config) + } + + "fail to migrate malformed config" >> { + val malformed = "malformed".asJson + + val error = MalformedConfiguration( + S3DatasourceModule.kind, + malformed, + "Configuration to migrate is malformed.") + + S3DatasourceModule.migrateConfig[IO](1, 1, malformed).unsafeRunSync() must beLeft(error) + } + } + + "sanitizeConfig" in { + "removes AccessKey, SecretKey and Region from credentials" >> { + val conf = Json.obj( + "bucket" -> Json.jString("https://some.bucket.uri"), + "jsonParsing" -> Json.jString("array"), + "credentials" -> Json.obj( + "accessKey" -> Json.jString("some access key"), + "secretKey" -> Json.jString("super secret key"), + "region" -> Json.jString("us-east-1"))) + + val redactedConf = Json.obj( + "bucket" -> Json.jString("https://some.bucket.uri"), + "format" -> Json.obj( + "type" -> Json.jString("json"), + "variant" -> Json.jString("array-wrapped"), + "precise" -> Json.jBool(false)), + "credentials" -> Json.obj( + "accessKey" -> Json.jString(""), + "secretKey" -> Json.jString(""), + "region" -> Json.jString(""))) + + S3DatasourceModule.sanitizeConfig(conf) must_== redactedConf + } + + "only migrate when there are no credentials to redact" >> { + val conf = Json.obj( + "bucket" -> Json.jString("https://some.bucket.uri"), + "jsonParsing" -> Json.jString("array")) + + val migrated = Json.obj( + "bucket" -> Json.jString("https://some.bucket.uri"), + "credentials" -> Json.jNull, + "format" -> Json.obj( + "type" -> Json.jString("json"), + "variant" -> Json.jString("array-wrapped"), + "precise" -> Json.jBool(false))) + + S3DatasourceModule.sanitizeConfig(conf) must_== migrated + } + } + + "reconfiguration" >> { + val patchJson = Json( + "bucket" := Json.jString("www.foo.bar"), + "format" := Json.obj( + "type" := Json.jString("json"), + "variant" := Json.jString("array-wrapped"), + "precise" := Json.jBool(false))) + val sourceJson = Json( + "bucket" := Json.jString("www.bar.baz"), + "format" := Json.obj( + "type" := Json.jString("json"), + "variant" := Json.jString("line-delimited"), + "precise" := Json.jBool(false)), + "credentials" := Json( + "accessKey" := Json.jString("a"), + "secretKey" := Json.jString("s"), + "region" := Json.jString("r"))) + + "returns malformed error if patch or source can't be decoded" >> { + val incorrect = Json() + + "both" >> { + S3DatasourceModule.reconfigure(incorrect, incorrect) must beLeft( + MalformedConfiguration( + S3DatasourceModule.kind, + incorrect, + "Source configuration in reconfiguration is malformed.")) + } + "source" >> { + S3DatasourceModule.reconfigure(incorrect, patchJson) must beLeft( + MalformedConfiguration( + S3DatasourceModule.kind, + incorrect, + "Source configuration in reconfiguration is malformed.")) + } + "patch" >> { + S3DatasourceModule.reconfigure(sourceJson, incorrect) must beLeft( + MalformedConfiguration( + S3DatasourceModule.kind, + incorrect, + "Patch configuration in reconfiguration is malformed.")) + } + } + "reconfigures non-sensitive fields" >> { + val expected = Json( + "bucket" := Json.jString("www.foo.bar"), + "format" := Json.obj( + "type" := Json.jString("json"), + "variant" := Json.jString("array-wrapped"), + "precise" := Json.jBool(false)), + "credentials" := Json( + "accessKey" := Json.jString("a"), + "secretKey" := Json.jString("s"), + "region" := Json.jString("r"))) + S3DatasourceModule.reconfigure(sourceJson, patchJson) must beRight((Reconfiguration.Reset, expected)) + } + + "returns invalid configuration error if patch has sensitive information" >> { + val sensitivePatch = Json( + "bucket" := Json.jString("www.quux.com"), + "format" := Json.obj( + "type" := Json.jString("json"), + "variant" := Json.jString("line-delimited"), + "precise" := Json.jBool(false)), + "credentials" := Json( + "accessKey" := Json.jString("aa"), + "secretKey" := Json.jString("ss"), + "region" := Json.jString("rr"))) + val expected = Json( + "bucket" := Json.jString("www.quux.com"), + "format" := Json.obj( + "type" := Json.jString("json"), + "variant" := Json.jString("line-delimited"), + "precise" := Json.jBool(false)), + "credentials" := Json( + "accessKey" := Json.jString(""), + "secretKey" := Json.jString(""), + "region" := Json.jString(""))) + + S3DatasourceModule.reconfigure(sourceJson, sensitivePatch) must beLeft( + InvalidConfiguration( + S3DatasourceModule.kind, + expected, + NonEmptyList("Patch configuration contains sensitive information."))) + } + } +} + +object S3DatasourceModuleSpec { + implicit val ioMonadResourceErr: MonadError_[IO, ResourceError] = + MonadError_.facet[IO](ResourceError.throwableP) +} diff --git a/datasource/src/test/scala/quasar/physical/s3/S3DatasourceSpec.scala b/datasource/src/test/scala/quasar/physical/s3/S3DatasourceSpec.scala new file mode 100644 index 00000000..23af1bbd --- /dev/null +++ b/datasource/src/test/scala/quasar/physical/s3/S3DatasourceSpec.scala @@ -0,0 +1,238 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef._ + +import quasar.ScalarStages +import quasar.api.resource.{ResourceName, ResourcePath, ResourcePathType} +import quasar.common.data.Data +import quasar.connector._ +import quasar.connector.datasource.{DatasourceSpec, DatasourceModule} +import quasar.contrib.scalaz.MonadError_ +import quasar.qscript.InterpretedRead + +import java.nio.charset.Charset + +import cats.data.OptionT +import cats.effect.{IO, Resource} +import cats.syntax.applicative._ + +import fs2.Stream + +import org.http4s.Uri + +import shims.applicativeToScalaz + +import S3DatasourceSpec._ + +class S3DatasourceSpec extends DatasourceSpec[IO, Stream[IO, ?], ResourcePathType.Physical] { + + def iRead[A](path: A): InterpretedRead[A] = InterpretedRead(path, ScalarStages.Id) + + val testBucket = Uri.uri("https://slamdata-public-test.s3.amazonaws.com") + val nonExistentPath = + ResourcePath.root() / ResourceName("does") / ResourceName("not") / ResourceName("exist") + + val spanishResourceName1 = ResourceName("El veloz murciélago hindú") + val spanishResourcePrefix = ResourcePath.root() / ResourceName("testData") / spanishResourceName1 / ResourceName("comía feliz cardillo y kiwi") / ResourceName("La cigüeña tocaba el saxofón") + val spanishResourceLeaf = ResourceName("detrás del palenque de paja") + val spanishResource = spanishResourcePrefix / spanishResourceLeaf + + "pathIsResource" >> { + "the root of a bucket is not a resource" >>* { + val root = ResourcePath.root() + datasource.flatMap(_.pathIsResource(root)).use(b => IO.pure(b must beFalse)) + } + + "a prefix without contents is not a resource" >>* { + val path = ResourcePath.root() / ResourceName("prefix3") / ResourceName("subprefix5") + datasource.flatMap(_.pathIsResource(path)).use(b => IO.pure(b must beFalse)) + } + + // this also tests request signing for secured buckets + "a non-existing file with special chars is not a resource" >>* { + val res = ResourcePath.root() / ResourceName("testData") / ResourceName("""-_.!~*'() /"\#$%^&<>,?[]+=:;`""") + datasource.flatMap(_.pathIsResource(res)).use(b => IO.pure(b must beFalse)) + } + + "an actual file is a resource" >>* { + val res = ResourcePath.root() / ResourceName("testData") / ResourceName("array.json") + datasource.flatMap(_.pathIsResource(res)).use(b => IO.pure(b must beTrue)) + } + + "an actual file with special chars in path is a resource" >>* { + val res = ResourcePath.root() / ResourceName("testData") / ResourceName("á") / ResourceName("βç.json") + datasource.flatMap(_.pathIsResource(res)).use(b => IO.pure(b must beTrue)) + } + + "an actual file with special chars in deeper path is a resource" >>* { + datasource.flatMap(_.pathIsResource(spanishResource)).use(b => IO.pure(b must beTrue)) + } + } + + "prefixedChildPaths" >> { + + "list nested children" >>* { + assertPrefixedChildPaths( + ResourcePath.root() / ResourceName("dir1") / ResourceName("dir2") / ResourceName("dir3"), + List(ResourceName("flattenable.data") -> ResourcePathType.leafResource)) + } + + "list children at the root of the bucket" >>* { + assertPrefixedChildPaths( + ResourcePath.root(), + List( + ResourceName("extraSmallZips.data") -> ResourcePathType.leafResource, + ResourceName("dir1") -> ResourcePathType.prefix, + ResourceName("prefix3") -> ResourcePathType.prefix, + ResourceName("testData") -> ResourcePathType.prefix)) + } + + "list children with special chars" >>* { + assertPrefixedChildPaths( + ResourcePath.root() / ResourceName("dir1"), + List( + ResourceName("dir2") -> ResourcePathType.prefix, + ResourceName("fóóbar.ldjson") -> ResourcePathType.leafResource)) + } + + "list children with more special chars" >>* { + assertPrefixedChildPaths( + ResourcePath.root() / ResourceName("testData"), + List( + spanishResourceName1 -> ResourcePathType.prefix, + ResourceName("a b") -> ResourcePathType.prefix, + ResourceName("array.json") -> ResourcePathType.leafResource, + ResourceName("lines.json") -> ResourcePathType.leafResource, + ResourceName("test.csv") -> ResourcePathType.leafResource, + ResourceName("á") -> ResourcePathType.prefix)) + } + + "list children when space in path" >>* { + assertPrefixedChildPaths( + ResourcePath.root() / ResourceName("testData") / ResourceName("a b"), + List( + ResourceName("a b.json") -> ResourcePathType.leafResource)) + } + + "list children with special chars when special chars in path" >>* { + assertPrefixedChildPaths( + spanishResourcePrefix, + List(spanishResourceLeaf -> ResourcePathType.leafResource)) + } + } + + "evaluate" >> { + "read line-delimited JSON" in skipped { // ch11385 + assertResultBytes( + datasourceLD, + ResourcePath.root() / ResourceName("testData") / ResourceName("lines.json"), + "[1, 2]\n[3, 4]\n".getBytes(Charset.forName("UTF-8"))) + + ko + } + + "read array JSON" >>* { + assertResultBytes( + datasource, + ResourcePath.root() / ResourceName("testData") / ResourceName("array.json"), + "[[1, 2], [3, 4]]\n".getBytes(Charset.forName("UTF-8"))) + } + + "read CSV" >>* { + val expected = "foo,bar\r\n1,2" + assertResultBytes( + datasourceCSV, + ResourcePath.root() / ResourceName("testData") / ResourceName("test.csv"), + expected.getBytes(Charset.forName("UTF-8"))) + } + + "read array JSON of resource with special chars in path" >>* { + assertResultBytes( + datasource, + ResourcePath.root() / ResourceName("testData") / ResourceName("á") / ResourceName("βç.json"), + "[[1, 2], [3, 4]]\n".getBytes(Charset.forName("UTF-8"))) + } + + "read line-delimited JSON with special chars of resource with special chars in path" >>* { + val esStr = "\"El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja.\"\n" + + assertResultBytes( + datasourceLD, + spanishResource, + esStr.getBytes(Charset.forName("UTF-8"))) + } + + "reading a non-existent file raises ResourceError.PathNotFound" >>* { + val creds = Resource.eval(credentials) + val ds = creds.flatMap(c => mkDatasource(S3Config(testBucket, DataFormat.json, c))) + + val path = ResourcePath.root() / ResourceName("does-not-exist") + val read = ds.flatMap(_.loadFull(iRead(path)).value) + + MonadResourceErr[IO].attempt(read.use(_ => IO.unit)).map(_.toEither must beLeft.like { + case ResourceError.PathNotFound(_) => ok + }) + } + } + + def assertResultBytes( + ds: Resource[IO, DatasourceModule.DS[IO]], + path: ResourcePath, + expected: Array[Byte]) = + ds.flatMap(_.loadFull(iRead(path)).value) use { + case Some(QueryResult.Typed(_, data, ScalarStages.Id)) => + data.data.compile.to(Array).map(_ must_=== expected) + + case _ => + IO(ko("Unexpected QueryResult")) + } + + def assertPrefixedChildPaths(path: ResourcePath, expected: List[(ResourceName, ResourcePathType)]) = + OptionT(datasource.flatMap(_.prefixedChildPaths(path))) + .getOrElseF(Resource.eval(IO.raiseError(new Exception(s"Failed to list resources under $path")))) + .use(gatherMultiple(_)) + .map(result => { + // assert the same elements, with no duplicates + result.length must_== expected.length + result.toSet must_== expected.toSet + }) + + def gatherMultiple[A](g: Stream[IO, A]) = g.compile.toList + + val data_12_34 = List(Data.Arr(List(Data.Int(1), Data.Int(2))), Data.Arr(List(Data.Int(3), Data.Int(4)))) + + def credentials: IO[Option[S3Credentials]] = None.pure[IO] + + def mkDatasource(config: S3Config) + : Resource[IO, DatasourceModule.DS[IO]] = { + + AsyncHttpClientBuilder[IO] + .map(AwsV4Signing(config)) + .map(S3Datasource(_, config)) + } + + val datasource = mkDatasource(S3Config(testBucket, DataFormat.json, None)) + val datasourceLD = mkDatasource(S3Config(testBucket, DataFormat.ldjson, None)) + val datasourceCSV = mkDatasource(S3Config(testBucket, DataFormat.SeparatedValues.Default, None)) +} + +object S3DatasourceSpec { + implicit val ioMonadResourceErr: MonadError_[IO, ResourceError] = + MonadError_.facet[IO](ResourceError.throwableP) +} diff --git a/datasource/src/test/scala/quasar/physical/s3/SecureS3DataSourceSpec.scala b/datasource/src/test/scala/quasar/physical/s3/SecureS3DataSourceSpec.scala deleted file mode 100644 index d6e91ad8..00000000 --- a/datasource/src/test/scala/quasar/physical/s3/SecureS3DataSourceSpec.scala +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2014–2018 SlamData Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package quasar.physical.s3 - -import slamdata.Predef._ - -import quasar.connector.ResourceError -import quasar.contrib.scalaz.MonadError_ - -import scala.io.{Source, Codec} - -import java.io.File - -import argonaut.{Parse, DecodeJson} -import cats.effect.{IO, Sync} -import cats.syntax.flatMap._ -import cats.syntax.applicative._ -import cats.syntax.option._ -import org.http4s.Uri -import shims._ - -import SecureS3DataSourceSpec._ - -final class SecureS3DataSourceSpec extends S3DataSourceSpec { - override val testBucket = Uri.uri("https://s3.amazonaws.com/slamdata-private-test") - - // FIXME: close the file once we update to cats-effect 1.0.0 and - // Bracket is available - override val credentials: IO[Option[S3Credentials]] = { - val file = Sync[IO].catchNonFatal(new File("testCredentials.json")) - val msg = "Failed to read testCredentials.json" - - val src = (file >>= (f => IO(Source.fromFile(f)(Codec.UTF8)))).map(_.getLines.mkString) - - val jsonConfig = src >>= (p => - Parse.parse(p).toOption.map(_.pure[IO]).getOrElse(IO.raiseError(new Exception(msg)))) - - jsonConfig - .map(DecodeJson.of[S3Credentials].decodeJson(_)) - .map(_.toOption) >>= (_.fold[IO[Option[S3Credentials]]](IO.raiseError(new Exception(msg)))(c => c.some.pure[IO])) - } - - override val datasourceLD = - run(credentials >>= (creds => mkDatasource[IO](S3JsonParsing.LineDelimited, testBucket, creds))) - override val datasource = - run(credentials >>= (creds => mkDatasource[IO](S3JsonParsing.JsonArray, testBucket, creds))) -} - -object SecureS3DataSourceSpec { - implicit val ioMonadResourceErr: MonadError_[IO, ResourceError] = - MonadError_.facet[IO](ResourceError.throwableP) -} diff --git a/datasource/src/test/scala/quasar/physical/s3/SecureS3DatasourceSpec.scala b/datasource/src/test/scala/quasar/physical/s3/SecureS3DatasourceSpec.scala new file mode 100644 index 00000000..1d422db4 --- /dev/null +++ b/datasource/src/test/scala/quasar/physical/s3/SecureS3DatasourceSpec.scala @@ -0,0 +1,78 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef._ + +import quasar.connector.DataFormat + +import scala.io.{Source, Codec} + +import java.io.File + +import argonaut.{Parse, DecodeJson} + +import cats.effect.{IO, Resource} +import cats.syntax.flatMap._ +import cats.syntax.applicative._ +import cats.syntax.option._ + +import org.http4s.Uri + +final class SecureS3DatasourceSpec extends S3DatasourceSpec { + override val testBucket = Uri.uri("https://slamdata-private-test.s3.amazonaws.com") + + override val credentials: IO[Option[S3Credentials]] = { + val read = IO { + val file = new File(credsFile) + val src = Source.fromFile(file)(Codec.UTF8) + + (src.getLines.mkString, src) + } + + read.bracket({ + case (p, _) => { + val msg = "Failed to read testCredentials.json" + val jsonConfig = + Parse.parse(p).toOption.map(_.pure[IO]).getOrElse(IO.raiseError(new Exception(msg))) + + jsonConfig + .map(DecodeJson.of[S3Credentials].decodeJson(_)) + .map(_.toOption) >>= (_.fold[IO[Option[S3Credentials]]](IO.raiseError(new Exception(msg)))(c => c.some.pure[IO])) + } + })({ + case (_, src) => IO(src.close) + }) + } + + private val credsFile = "testCredentials.json" + + override val datasourceLD = + Resource.eval(credentials) flatMap { creds => + mkDatasource(S3Config(testBucket, DataFormat.ldjson, creds)) + } + + override val datasource = + Resource.eval(credentials) flatMap { creds => + mkDatasource(S3Config(testBucket, DataFormat.json, creds)) + } + + override val datasourceCSV = + Resource.eval(credentials) flatMap { creds => + mkDatasource(S3Config(testBucket, DataFormat.SeparatedValues.Default, creds)) + } +} diff --git a/datasource/src/test/scala/quasar/physical/s3/impl/ChildrenSpec.scala b/datasource/src/test/scala/quasar/physical/s3/impl/ChildrenSpec.scala index b263d42c..ca8280f2 100644 --- a/datasource/src/test/scala/quasar/physical/s3/impl/ChildrenSpec.scala +++ b/datasource/src/test/scala/quasar/physical/s3/impl/ChildrenSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright 2014–2018 SlamData Inc. + * Copyright 2020 Precog Data * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,31 +18,35 @@ package quasar.physical.s3 import slamdata.Predef._ +import scala.concurrent.ExecutionContext + import cats.effect.IO import cats.data.OptionT -import cats.syntax.applicative._ -import org.http4s.{Uri, Request} -import org.http4s.client.blaze.Http1Client +import org.http4s.Uri import org.specs2.mutable.Specification import pathy.Path +import scalaz.{-\/, \/-} final class ChildrenSpec extends Specification { "lists all resources at the root of the bucket, one per request" >> { - val client = Http1Client[IO]() + implicit val ec = ExecutionContext.global + implicit val cs = IO.contextShift(ec) // Force S3 to return a single element per page in ListObjects, // to ensure pagination works correctly - val bucket = Uri.uri("https://s3.amazonaws.com/slamdata-public-test/").withQueryParam("max-keys", "1") + val bucket = Uri.uri("https://slamdata-public-test.s3.amazonaws.com").withQueryParam("max-keys", "1") + val dir = Path.rootDir - val sign: Request[IO] => IO[Request[IO]] = _.pure[IO] + val client = AsyncHttpClientBuilder[IO] - OptionT(client.flatMap(impl.children(_, bucket, dir, sign))) + OptionT(client.use(impl.children(_, bucket, dir))) .getOrElseF(IO.raiseError(new Exception("Could not list children under the root"))) .flatMap(_.compile.toList).map { children => children.length must_== 4 - children(0).toEither must_== Left(Path.DirName("dir1")) - children(1).toEither must_== Right(Path.FileName("extraSmallZips.data")) - children(2).toEither must_== Left(Path.DirName("prefix3")) - children(3).toEither must_== Left(Path.DirName("testData")) + children.toSet must_== + Set(\/-(Path.FileName("extraSmallZips.data")), + -\/(Path.DirName("dir1")), + -\/(Path.DirName("prefix3")), + -\/(Path.DirName("testData"))) }.unsafeRunSync } } diff --git a/datasource/src/test/scala/quasar/physical/s3/impl/PreflightCheckSpec.scala b/datasource/src/test/scala/quasar/physical/s3/impl/PreflightCheckSpec.scala new file mode 100644 index 00000000..3b93f2a5 --- /dev/null +++ b/datasource/src/test/scala/quasar/physical/s3/impl/PreflightCheckSpec.scala @@ -0,0 +1,101 @@ +/* + * Copyright 2020 Precog Data + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package quasar.physical.s3 + +import slamdata.Predef.None + +import quasar.connector.DataFormat + +import scala.concurrent.ExecutionContext + +import cats.effect.IO + +import org.http4s.HttpRoutes +import org.http4s.Method +import org.http4s.Uri +import org.http4s.client.Client +import org.http4s.dsl.io._ +import org.http4s.headers.Location +import org.http4s.syntax.kleisli._ + +import org.specs2.mutable.Specification + +final class PreflightCheckSpec extends Specification { + implicit val cs = IO.contextShift(ExecutionContext.global) + + val maxRedirects = 3 + + val app = HttpRoutes.of[IO] { + case Method.HEAD -> Root / "bucket0" / "" => + SeeOther(Location(Uri.uri("http://localhost/bucket1/"))) + case Method.HEAD -> Root / "bucket1" / "" => + MovedPermanently(Location(Uri.uri("http://localhost/bucket2/"))) + case Method.HEAD -> Root / "bucket2" / "" => + PermanentRedirect(Location(Uri.uri("http://localhost/bucket3/"))) + case Method.HEAD -> Root / "bucket3" / "" => + Ok() + + case Method.HEAD -> Root / "loop0" / "" => + PermanentRedirect(Location(Uri.uri("http://localhost/loop1/"))) + case Method.HEAD -> Root / "loop1" / "" => + PermanentRedirect(Location(Uri.uri("http://localhost/loop0/"))) + + case Method.HEAD -> Root / "first" / "" => + PermanentRedirect(Location(Uri.uri("http://localhost/second/"))) + case Method.HEAD -> Root / "second" / "" => + PermanentRedirect(Location(Uri.uri("http://localhost/third/"))) + case Method.HEAD -> Root / "third" / "" => + PermanentRedirect(Location(Uri.uri("http://localhost/fourth/"))) + case Method.HEAD -> Root / "fourth" / "" => + Ok() + }.orNotFound + + val client = Client.fromHttpApp(app) + val config = S3Config(Uri.uri("http://localhost/bucket1"), DataFormat.ldjson, None) + + "updates bucket URI for permanent redirects" >> { + val uri = Uri.uri("http://localhost/bucket2/") + val redirectedTo = Uri.uri("http://localhost/bucket3/") + + impl.preflightCheck(client, uri, maxRedirects).unsafeRunSync must beSome(redirectedTo) + } + + "bucket URI is not altered for non-permanent redirects" >> { + val uri = Uri.uri("http://localhost/bucket0/") + + impl.preflightCheck(client, uri, maxRedirects).unsafeRunSync must beSome(uri) + } + + "bucket URI is not altered for non-redirects" >> { + val uri = Uri.uri("http://localhost/bucket3/") + + impl.preflightCheck(client, uri, maxRedirects).unsafeRunSync must beSome(uri) + } + + "follows three permanent redirects" >> { + val uri = Uri.uri("http://localhost/first/") + val finalUri = Uri.uri("http://localhost/fourth/") + + impl.preflightCheck(client, uri, maxRedirects).unsafeRunSync must beSome(finalUri) + } + + "fails with more than three redirects" >> { + val uri = Uri.uri("http://localhost/loop0/") + + impl.preflightCheck(client, uri, maxRedirects).unsafeRunSync must beNone + } +} diff --git "a/it/src/test/resources/El veloz murci\303\251lago hind\303\272/com\303\255a feliz cardillo y kiwi/La cig\303\274e\303\261a tocaba el saxof\303\263n/detr\303\241s del palenque de paja" "b/it/src/test/resources/El veloz murci\303\251lago hind\303\272/com\303\255a feliz cardillo y kiwi/La cig\303\274e\303\261a tocaba el saxof\303\263n/detr\303\241s del palenque de paja" new file mode 100644 index 00000000..49f1b431 --- /dev/null +++ "b/it/src/test/resources/El veloz murci\303\251lago hind\303\272/com\303\255a feliz cardillo y kiwi/La cig\303\274e\303\261a tocaba el saxof\303\263n/detr\303\241s del palenque de paja" @@ -0,0 +1 @@ +"El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja." diff --git a/it/src/test/resources/a b/a b.json b/it/src/test/resources/a b/a b.json new file mode 100644 index 00000000..c4093860 --- /dev/null +++ b/it/src/test/resources/a b/a b.json @@ -0,0 +1 @@ +[[1, 2], [3, 4]] diff --git "a/it/src/test/resources/\303\241/\316\262\303\247.json" "b/it/src/test/resources/\303\241/\316\262\303\247.json" new file mode 100644 index 00000000..c4093860 --- /dev/null +++ "b/it/src/test/resources/\303\241/\316\262\303\247.json" @@ -0,0 +1 @@ +[[1, 2], [3, 4]] diff --git a/project/AssembleDatasource.scala b/project/AssembleDatasource.scala deleted file mode 100644 index 7638f2f4..00000000 --- a/project/AssembleDatasource.scala +++ /dev/null @@ -1,178 +0,0 @@ -package quasar.s3.project - -import coursier._ -import java.nio.file.{Path, Files} -import java.nio.file.StandardCopyOption.REPLACE_EXISTING -import java.nio.file.StandardOpenOption.{CREATE_NEW, TRUNCATE_EXISTING} -import scala.concurrent.ExecutionContext.Implicits.global -import scala.collection.JavaConverters._ -import scala.sys.process._ -import java.util.stream.Collectors -import java.lang.Runtime - -import io.circe.Json -import sbt.{Cache => _, MavenRepository => _, Tags => _, Task => _, _} -import sbt.Keys._ -import scalaz._, Scalaz._ -import Tags.Parallel -import scalaz.concurrent.Task -import Task._ - -object AssembleDatasource { - val assembleDatasource = TaskKey[Unit]("assembleDatasource") - - // SBT needs `ModuleId` to declare dependencies with - // `libraryDependencies`, and coursier wants `Dependency`. - // Converting is straightforward but requires knowing the - // `scalaVersion`; I've hard-coded it to 2.12 here. - def moduleIdToDependency(moduleId: ModuleID): Dependency = - Dependency(Module(moduleId.organization, moduleId.name + "_2.12"), moduleId.revision) - - val setAssemblyKey = - assembleDatasource in Compile := { - // the location of the datasource jar itself. we make sure - // it's been built by calling `package` here. - val packagedJarFile = (sbt.Keys.`package` in Compile).value - - // where all of the artifacts for the project are. - // notably the datasource jar, and later on the assembled - // datasource tarball. - val buildOutputFolder = (crossTarget in Compile).value - - // Grab the version to make it part of the tarball filename. - val thisVersion = (version in ThisBuild).value - - // we assemble every component of the final tarball - // in this folder, to be exploded over the user's - // `plugins` folder. - val datasourcePluginsFolder = new File(buildOutputFolder, "plugins") - - // a folder for just the datasource jar itself, shared by - // other datasources in the same `plugins` folder. - val datasourceJarFolder = new File(datasourcePluginsFolder, "datasource") - - // the jar file's own path. - val datasourceJarFile = new File(datasourceJarFolder, packagedJarFile.name) - - // the datasource jar's path relative to datasourcePluginsFolder; - // included in the generated .plugin file to let quasar - // know where to load it from. - val relativeDatasourceJarPath = - datasourcePluginsFolder.toPath.relativize(datasourceJarFile.toPath).toString - - // the path to the generated .plugin file. - val pluginFilePath = new File(datasourcePluginsFolder, "s3.plugin").toPath - - // start coursier on resolving all of the datasource's - // dependencies, *except* for quasar. quasar and its - // dependencies are already present in the user's - // `plugins` folder. - val resolution = - Resolution(Dependencies.datasourceCore.map(moduleIdToDependency).toSet) - - // we're using datasourcePluginsFolder as a coursier cache while fetching - // our dependencies, because that's the format of a `plugins` folder. - val cache = Cache.fetch( - datasourcePluginsFolder, - CachePolicy.Update) - - val quasarVersion = IO.read(file("./quasar-version")).trim - - // I don't want to add kind-projector to the compiler - // plugins, so I'm hard-coding this type alias. - // later on we're going to make a `List[FileError \/ A]` - // into a `Validation[NonEmptyList[FileError], List[A]]` - type OrFileErrors[A] = FileError ValidationNel A - - (for { - // make the output plugins folder and the folder inside - // which houses the datasource jar. - _ <- Task.delay { - datasourcePluginsFolder.mkdir() - datasourceJarFolder.mkdir() - } - - _ <- Task.delay(println("Fetching artifacts with coursier...")) - - // coursier prefers that we fetch metadata before fetching - // artifacts. we do that in parallel with copying the datasource - // jar to its new place, because they don't depend on one - // another. - fetchedJarFiles <- - Parallel.unwrap( - Applicative[ParallelTask].apply2( - Parallel( - Task(Files.copy(packagedJarFile.toPath(), datasourceJarFile.toPath(), REPLACE_EXISTING)) - ), - Parallel( - for { - metadata <- resolution.process.run( - // we don't use ~/.ivy2/local here because - // I've heard that coursier doesn't copy - // from local caches into other local caches. - Fetch.from(Seq(MavenRepository("https://repo1.maven.org/maven2")), cache) - ) - // fetch artifacts in parallel into cache - artifactsPar = metadata.artifacts.toList - .traverse[ParallelTask, FileError \/ File] { f => - Parallel(Cache.file(f, datasourcePluginsFolder, CachePolicy.Update).run) - } - - // some contortions to make sure *all* errors - // are reported when any fail to download. - artifacts <- Parallel.unwrap(artifactsPar) - .flatMap(_.traverse[OrFileErrors, File](_.validationNel).fold( - es => Task.fail(new Exception(s"Failed to fetch files: ${es.foldMap(e => e.toString + "\n\n")}")), - Task.now(_) - )) - - // filter out coursier metadata, we only want the jars - // for the `classpath` field of the .plugin file - jarFiles <- artifacts.filter(_.name.endsWith(".jar")).pure[Task] - } yield jarFiles - ) - )((_, jars) => jars) - ) - - _ <- Task.delay(println("Artifacts fetched. Preparing to write .plugin file...")) - - // the .plugin file requires all dependency jar paths - // to be relative to the plugins folder - classPath = fetchedJarFiles.map(p => datasourcePluginsFolder.toPath.relativize(p.toPath)) ++ List(relativeDatasourceJarPath) - - cpJson = Json.arr(classPath.map(_.toString).map(Json.fromString(_)) :_*) - mainJar = Json.fromString(relativeDatasourceJarPath) - - // include the datasource jar and classpath into the .plugin file - outJson = Json.obj("mainJar" -> mainJar, "classPath" -> cpJson).spaces2 - - // delete an old .plugin file, write the new one - _ <- Task.delay { - Files.deleteIfExists(pluginFilePath) - Files.write(pluginFilePath, outJson.getBytes, CREATE_NEW) - } - - _ <- Task.delay(println(".plugin file written. Zipping up tarball...")) - - // equivalent to `ls $datasourcePluginsFolder`, the files and - // folders we need to zip up to make a valid - // `plugins` folder - files = datasourcePluginsFolder.listFiles.map(p => datasourcePluginsFolder.toPath.relativize(p.toPath)).mkString(" ") - - // the `plugins` tarball's location - tarPath = new File(buildOutputFolder, s"quasar-s3-$thisVersion-q$quasarVersion-explode.tar.gz") - - // the command we run to finish up: zip up (-c) all of - // the files in our plugins folder ($files), with the - // plugins folder as "root" of the tarball (-C) and - // put the tarball into the artifacts folder. - cmd = s"tar -czvf $tarPath -C $datasourcePluginsFolder/ $files" - - // do it. - _ <- Task.delay(Runtime.getRuntime().exec(cmd)) - - _ <- Task.delay(println(s"Tarball written to ${tarPath}.")) - } yield ()).unsafePerformSync - - } -} diff --git a/project/CachedCi.scala b/project/CachedCi.scala deleted file mode 100644 index 2b77f8e5..00000000 --- a/project/CachedCi.scala +++ /dev/null @@ -1,3 +0,0 @@ -import org.romanowski.hoarder.actions.ci.TravisPRValidation - -object CachedCi extends TravisPRValidation.PluginBase diff --git a/project/Dependencies.scala b/project/Dependencies.scala deleted file mode 100644 index a6be1af3..00000000 --- a/project/Dependencies.scala +++ /dev/null @@ -1,60 +0,0 @@ -package quasar.s3.project - -import scala.Boolean -import scala.collection.Seq - -import sbt._ - -object Dependencies { - private val http4sVersion = "0.18.13" - - // hooray, scala-xml. we use it for parsing XML out of - // the S3 API's responses. - private val scalaXmlVersion = "1.1.0" - - // used for parsing JSON out of the stream of S3 data - // in an object. - // we need to be compatible with Quasar's version of both - // fs2 and jawn, so we use the older circe-jawn version. - private val argonautVersion = "6.2.2" - private val catsEffectVersion = "0.10.1" - private val circeFs2Version = "0.9.0" - private val circeJawnVersion = "0.9.3" - private val fs2Version = "0.10.5" - private val quasarVersion = IO.read(file("./quasar-version")).trim - private val qdataVersion = IO.read(file("./qdata-version")).trim - private val jawnFs2Version = "0.12.2" - private val shimsVersion = "1.2.1" - private val specsVersion = "4.1.2" - - // http4s-blaze-client's version has to be in sync with - // quasar's http4s version. The same goes for any - // dependencies, transitive or otherwise. - def datasourceCore = Seq( - "org.http4s" %% "jawn-fs2" % jawnFs2Version, - "com.slamdata" %% "qdata-json" % qdataVersion, - "org.http4s" %% "http4s-scala-xml" % http4sVersion, - "org.http4s" %% "http4s-blaze-client" % http4sVersion, - "org.scala-lang.modules" %% "scala-xml" % scalaXmlVersion, - "io.circe" %% "circe-jawn" % circeJawnVersion, - "com.codecommit" %% "shims" % shimsVersion, - "org.typelevel" %% "cats-effect" % catsEffectVersion, - "org.specs2" %% "specs2-core" % specsVersion % Test, - "org.specs2" %% "specs2-scalaz" % specsVersion % Test, - "org.specs2" %% "specs2-scalacheck" % specsVersion % Test, - "io.argonaut" %% "argonaut" % argonautVersion, - "io.circe" %% "circe-fs2" % circeFs2Version - ) - - // we need to separate quasar out from the datasource dependencies, - // to keep from packaging it and its dependencies. TODO: we should - // do this in the assembly routine. - def datasource = datasourceCore ++ Seq( - "com.slamdata" %% "quasar-api-internal" % quasarVersion, - "com.slamdata" %% "quasar-api-internal" % quasarVersion % Test classifier "tests", - "com.slamdata" %% "quasar-foundation-internal" % quasarVersion, - "com.slamdata" %% "quasar-foundation-internal" % quasarVersion % Test classifier "tests", - "com.slamdata" %% "quasar-connector-internal" % quasarVersion, - "com.slamdata" %% "quasar-connector-internal" % quasarVersion % Test classifier "tests", - ) -} diff --git a/project/Github.scala b/project/Github.scala deleted file mode 100644 index af4f2504..00000000 --- a/project/Github.scala +++ /dev/null @@ -1,143 +0,0 @@ -package github - -import java.lang.{RuntimeException, String, System} -import scala.{Boolean, Option, Predef} -import scala.collection.{JavaConverters, Seq}, JavaConverters._ -import scala.util.{Failure, Success, Try} - -import org.kohsuke.github._ -import sbt._, Keys._ - -object GithubPlugin extends AutoPlugin { - object GithubKeys { - lazy val repoSlug = settingKey[String]("The repo slug, e.g. 'slamdata/quasar'") - lazy val tag = settingKey[String]("The name of the tag, e.g. v1.2.3") - lazy val releaseName = taskKey[String]("The name of the release") - lazy val commitish = settingKey[String]("The commitish value from which the tag is created") - lazy val draft = settingKey[Boolean]("The draft / final flag") - lazy val prerelease = settingKey[Boolean]("The prerelease / release flag") - lazy val assets = taskKey[Seq[File]]("The binary assets to upload") - lazy val githubAuth = taskKey[GitHub]("Creates a Github based on GITHUB_TOKEN OAuth variable") - lazy val githubRelease = taskKey[GHRelease]("Publishes a new Github release") - - lazy val versionFile = settingKey[String]("The JSON version file, e.g. 'version.json") - lazy val versionRepo = settingKey[String]("The repo slug for the JSON version file") - lazy val githubUpdateVer = taskKey[String]("Updates the JSON version file in the version repo") - } - - import GithubKeys._ - - private object Travis { - lazy val BuildNumber = Option(System.getenv("TRAVIS_BUILD_NUMBER")) - lazy val RepoSlug = Option(System.getenv("TRAVIS_REPO_SLUG")) - lazy val Commit = Option(System.getenv("TRAVIS_COMMIT")) - } - - lazy val githubSettings: Seq[Setting[_]] = Seq( - repoSlug := Travis.RepoSlug.fold(organization.value + "/" + normalizedName.value)(Predef.identity), - tag := "v" + version.value + - (if (prerelease.value) Travis.BuildNumber.fold("")("-" + _) else ""), - releaseName := name.value + - (" " + tag.value) + - (if (draft.value) " (draft)" else ""), - commitish := Travis.Commit.getOrElse(""), - draft := false, - prerelease := version.value.matches(""".*SNAPSHOT.*"""), - assets := Seq((packageBin in Compile).value), - - githubAuth := { - val log = streams.value.log - - val token = Option(System.getenv("GITHUB_TOKEN")).getOrElse(scala.sys.error("You must define GITHUB_TOKEN")) - - val github = GitHub.connectUsingOAuth(token) - - log.info("Connected using GITHUB_TOKEN") - - github - }, - - githubRelease := { - val log = streams.value.log - - val github = githubAuth.value - - val release = Try { - val repo = github.getRepository(repoSlug.value) - - val body = - repo.listTags.asScala.find(_.getName == tag.value).map { tagUnpopulated => - repo.getCommit(tagUnpopulated.getCommit.getSHA1).getCommitShortInfo.getMessage - }.getOrElse(scala.sys.error("Tag not found")) - - log.info("repoSlug = " + repoSlug.value) - log.info("tag = " + tag.value) - log.info("releaseName = " + releaseName.value) - log.info("draft = " + draft.value) - log.info("body = " + body) - log.info("prerelease = " + prerelease.value) - log.info("commitish = " + commitish.value) - - val existingRelease = - repo.listReleases.asScala.find(_.getName == releaseName.value) - - existingRelease.getOrElse { - val releaseBuilder = repo - .createRelease(tag.value) - .name(releaseName.value) - .draft(draft.value) - .body(body) - .prerelease(prerelease.value) - - (commitish.value match { - case "" => releaseBuilder - case v => releaseBuilder.commitish(v) - }).create - } - } match { - case Success(v) => v - case Failure(e) => - throw new RuntimeException("Could not access or create the Github release", e) - } - - log.info("Created Github release: " + release) - - assets.value foreach { asset => - val relativePath = asset.relativeTo(baseDirectory.value).getOrElse(asset) - val mimeType = Option(java.nio.file.Files.probeContentType(asset.toPath())).getOrElse("application/java-archive") - - log.info("Uploading " + relativePath + " (" + mimeType + ") to release") - - release.uploadAsset(asset, mimeType) - } - - release - }, - - versionFile := "version.json", - - versionRepo := { repoSlug.value }, - - githubUpdateVer := { - val log = streams.value.log - - val ver = version.value - val file = versionFile.value - val repo = versionRepo.value - - log.info("version = " + ver) - log.info("version file = " + file) - log.info("version repo = " + repo) - - val github = githubAuth.value - - val content = github.getRepository(repo).getFileContent(file) - - val json = """{"version": """" + ver + """"}""" - - content.update(json, "Releasing " + ver) - - json - } - ) -} diff --git a/project/build.properties b/project/build.properties index 8b697bbb..a919a9b5 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.1.0 +sbt.version=1.3.8 diff --git a/project/build.sbt b/project/build.sbt deleted file mode 100644 index d257b23f..00000000 --- a/project/build.sbt +++ /dev/null @@ -1,23 +0,0 @@ -disablePlugins(TravisCiPlugin) - -libraryDependencies += "org.kohsuke" % "github-api" % "1.59" exclude("org.jenkins-ci", "annotation-indexer") - -// used to fetch dependencies to form a coursier cache, -// for packaging the datasource. -// should remain the newest coursier version. -libraryDependencies ++= Seq( - "io.get-coursier" %% "coursier" % "1.0.1", - "io.get-coursier" %% "coursier-cache" % "1.0.1", - "io.circe" %% "circe-core" % "0.9.3" -) - -scalacOptions --= Seq( - "-Ywarn-unused:imports", - "-Yinduction-heuristics", - "-Ykind-polymorphism", - "-Xstrict-patmat-analysis") - -// sbt/sbt#2572 -scalacOptions in (Compile, console) --= Seq( - "-Yno-imports", - "-Ywarn-unused:imports") diff --git a/project/plugins.sbt b/project/plugins.sbt index 5be04d4c..dff7ec97 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,10 +1,10 @@ -resolvers += Resolver.sonatypeRepo("releases") -resolvers += Resolver.bintrayRepo("slamdata-inc", "maven-public") +credentials += Credentials( + "GitHub Package Registry", + "maven.pkg.github.com", + sys.env.get("GITHUB_ACTOR").getOrElse(sys.error("Please define GITHUB_ACTOR")), + sys.env.get("GITHUB_TOKEN").getOrElse(sys.error("Please define GITHUB_TOKEN"))) -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.5") -addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.7.0") -addSbtPlugin("io.get-coursier" % "sbt-coursier" % "1.0.0-RC12") -addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.1") -addSbtPlugin("com.slamdata" % "sbt-slamdata" % "1.3.0") -addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.2.27") -addSbtPlugin("com.github.romanowski" % "hoarder" % "1.0.2-RC2") +resolvers += "GitHub Package Registry" at "https://maven.pkg.github.com/precog/_" + +addSbtPlugin("com.precog" % "sbt-precog" % "3.0.1") +addSbtPlugin("com.precog" % "sbt-quasar-plugin" % "1.0.0") diff --git a/project/project/plugins.sbt b/project/project/plugins.sbt deleted file mode 100644 index 12c76253..00000000 --- a/project/project/plugins.sbt +++ /dev/null @@ -1,6 +0,0 @@ -resolvers += Resolver.sonatypeRepo("releases") -resolvers += Resolver.bintrayRepo("slamdata-inc", "maven-public") -resolvers += Resolver.bintrayIvyRepo("djspiewak", "ivy") - -addSbtPlugin("io.get-coursier" % "sbt-coursier" % "1.0.0-RC12") -addSbtPlugin("com.slamdata" % "sbt-slamdata" % "1.3.0") diff --git a/qdata-version b/qdata-version index 7ec1d6db..02161ca8 100644 --- a/qdata-version +++ b/qdata-version @@ -1 +1 @@ -2.1.0 +13.0.0 diff --git a/quasar-version b/quasar-version deleted file mode 100644 index 841ef591..00000000 --- a/quasar-version +++ /dev/null @@ -1 +0,0 @@ -69.0.1 diff --git a/sbt b/sbt index cc845372..d97f8e85 100755 --- a/sbt +++ b/sbt @@ -2,33 +2,61 @@ # # A more capable sbt runner, coincidentally also called sbt. # Author: Paul Phillips +# https://github.com/paulp/sbt-extras +# +# Generated from http://www.opensource.org/licenses/bsd-license.php +# Copyright (c) 2011, Paul Phillips. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set -o pipefail -declare -r sbt_release_version="0.13.16" -declare -r sbt_unreleased_version="0.13.16" +declare -r sbt_release_version="1.5.0" +declare -r sbt_unreleased_version="1.5.0" -declare -r latest_213="2.13.0-M2" -declare -r latest_212="2.12.3" -declare -r latest_211="2.11.11" -declare -r latest_210="2.10.6" +declare -r latest_213="2.13.5" +declare -r latest_212="2.12.13" +declare -r latest_211="2.11.12" +declare -r latest_210="2.10.7" declare -r latest_29="2.9.3" declare -r latest_28="2.8.2" declare -r buildProps="project/build.properties" -declare -r sbt_launch_ivy_release_repo="http://repo.typesafe.com/typesafe/ivy-releases" +declare -r sbt_launch_ivy_release_repo="https://repo.typesafe.com/typesafe/ivy-releases" declare -r sbt_launch_ivy_snapshot_repo="https://repo.scala-sbt.org/scalasbt/ivy-snapshots" -declare -r sbt_launch_mvn_release_repo="http://repo.scala-sbt.org/scalasbt/maven-releases" -declare -r sbt_launch_mvn_snapshot_repo="http://repo.scala-sbt.org/scalasbt/maven-snapshots" +declare -r sbt_launch_mvn_release_repo="https://repo1.maven.org/maven2" +declare -r sbt_launch_mvn_snapshot_repo="https://repo.scala-sbt.org/scalasbt/maven-snapshots" -declare -r default_jvm_opts_common="-Xms512m -Xmx1536m -Xss2m" -declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" +declare -r default_jvm_opts_common="-Xms512m -Xss2m -XX:MaxInlineLevel=18" +declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy -Dsbt.coursier.home=project/.coursier" declare sbt_jar sbt_dir sbt_create sbt_version sbt_script sbt_new declare sbt_explicit_version declare verbose noshare batch trace_level -declare debugUs declare java_cmd="java" declare sbt_launch_dir="$HOME/.sbt/launchers" @@ -40,13 +68,17 @@ declare -a java_args scalac_args sbt_commands residual_args # args to jvm/sbt via files or environment variables declare -a extra_jvm_opts extra_sbt_opts -echoerr () { echo >&2 "$@"; } -vlog () { [[ -n "$verbose" ]] && echoerr "$@"; } -die () { echo "Aborting: $@" ; exit 1; } +echoerr() { echo >&2 "$@"; } +vlog() { [[ -n "$verbose" ]] && echoerr "$@"; } +die() { + echo "Aborting: $*" + exit 1 +} -setTrapExit () { +setTrapExit() { # save stty and trap exit, to ensure echo is re-enabled if we are interrupted. - export SBT_STTY="$(stty -g 2>/dev/null)" + SBT_STTY="$(stty -g 2>/dev/null)" + export SBT_STTY # restore stty settings (echo in particular) onSbtRunnerExit() { @@ -62,11 +94,14 @@ setTrapExit () { # this seems to cover the bases on OSX, and someone will # have to tell me about the others. -get_script_path () { +get_script_path() { local path="$1" - [[ -L "$path" ]] || { echo "$path" ; return; } + [[ -L "$path" ]] || { + echo "$path" + return + } - local target="$(readlink "$path")" + local -r target="$(readlink "$path")" if [[ "${target:0:1}" == "/" ]]; then echo "$target" else @@ -74,10 +109,12 @@ get_script_path () { fi } -declare -r script_path="$(get_script_path "$BASH_SOURCE")" -declare -r script_name="${script_path##*/}" +script_path="$(get_script_path "${BASH_SOURCE[0]}")" +declare -r script_path +script_name="${script_path##*/}" +declare -r script_name -init_default_option_file () { +init_default_option_file() { local overriding_var="${!1}" local default_file="$2" if [[ ! -r "$default_file" && "$overriding_var" =~ ^@(.*)$ ]]; then @@ -89,82 +126,82 @@ init_default_option_file () { echo "$default_file" } -declare sbt_opts_file="$(init_default_option_file SBT_OPTS .sbtopts)" -declare jvm_opts_file="$(init_default_option_file JVM_OPTS .jvmopts)" +sbt_opts_file="$(init_default_option_file SBT_OPTS .sbtopts)" +sbtx_opts_file="$(init_default_option_file SBTX_OPTS .sbtxopts)" +jvm_opts_file="$(init_default_option_file JVM_OPTS .jvmopts)" -build_props_sbt () { - [[ -r "$buildProps" ]] && \ +build_props_sbt() { + [[ -r "$buildProps" ]] && grep '^sbt\.version' "$buildProps" | tr '=\r' ' ' | awk '{ print $2; }' } -update_build_props_sbt () { - local ver="$1" - local old="$(build_props_sbt)" - - [[ -r "$buildProps" ]] && [[ "$ver" != "$old" ]] && { - perl -pi -e "s/^sbt\.version\b.*\$/sbt.version=${ver}/" "$buildProps" - grep -q '^sbt.version[ =]' "$buildProps" || printf "\nsbt.version=%s\n" "$ver" >> "$buildProps" - - vlog "!!!" - vlog "!!! Updated file $buildProps setting sbt.version to: $ver" - vlog "!!! Previous value was: $old" - vlog "!!!" - } -} - -set_sbt_version () { +set_sbt_version() { sbt_version="${sbt_explicit_version:-$(build_props_sbt)}" [[ -n "$sbt_version" ]] || sbt_version=$sbt_release_version export sbt_version } -url_base () { +url_base() { local version="$1" case "$version" in - 0.7.*) echo "http://simple-build-tool.googlecode.com" ;; - 0.10.* ) echo "$sbt_launch_ivy_release_repo" ;; + 0.7.*) echo "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/simple-build-tool" ;; + 0.10.*) echo "$sbt_launch_ivy_release_repo" ;; 0.11.[12]) echo "$sbt_launch_ivy_release_repo" ;; 0.*-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9]) # ie "*-yyyymmdd-hhMMss" - echo "$sbt_launch_ivy_snapshot_repo" ;; - 0.*) echo "$sbt_launch_ivy_release_repo" ;; - *-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9]) # ie "*-yyyymmdd-hhMMss" - echo "$sbt_launch_mvn_snapshot_repo" ;; - *) echo "$sbt_launch_mvn_release_repo" ;; + echo "$sbt_launch_ivy_snapshot_repo" ;; + 0.*) echo "$sbt_launch_ivy_release_repo" ;; + *-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T[0-9][0-9][0-9][0-9][0-9][0-9]) # ie "*-yyyymmddThhMMss" + echo "$sbt_launch_mvn_snapshot_repo" ;; + *) echo "$sbt_launch_mvn_release_repo" ;; esac } -make_url () { +make_url() { local version="$1" local base="${sbt_launch_repo:-$(url_base "$version")}" case "$version" in - 0.7.*) echo "$base/files/sbt-launch-0.7.7.jar" ;; - 0.10.* ) echo "$base/org.scala-tools.sbt/sbt-launch/$version/sbt-launch.jar" ;; + 0.7.*) echo "$base/sbt-launch-0.7.7.jar" ;; + 0.10.*) echo "$base/org.scala-tools.sbt/sbt-launch/$version/sbt-launch.jar" ;; 0.11.[12]) echo "$base/org.scala-tools.sbt/sbt-launch/$version/sbt-launch.jar" ;; - 0.*) echo "$base/org.scala-sbt/sbt-launch/$version/sbt-launch.jar" ;; - *) echo "$base/org/scala-sbt/sbt-launch/$version/sbt-launch.jar" ;; + 0.*) echo "$base/org.scala-sbt/sbt-launch/$version/sbt-launch.jar" ;; + *) echo "$base/org/scala-sbt/sbt-launch/$version/sbt-launch-${version}.jar" ;; esac } -addJava () { vlog "[addJava] arg = '$1'" ; java_args+=("$1"); } -addSbt () { vlog "[addSbt] arg = '$1'" ; sbt_commands+=("$1"); } -addScalac () { vlog "[addScalac] arg = '$1'" ; scalac_args+=("$1"); } -addResidual () { vlog "[residual] arg = '$1'" ; residual_args+=("$1"); } +addJava() { + vlog "[addJava] arg = '$1'" + java_args+=("$1") +} +addSbt() { + vlog "[addSbt] arg = '$1'" + sbt_commands+=("$1") +} +addScalac() { + vlog "[addScalac] arg = '$1'" + scalac_args+=("$1") +} +addResidual() { + vlog "[residual] arg = '$1'" + residual_args+=("$1") +} + +addResolver() { addSbt "set resolvers += $1"; } -addResolver () { addSbt "set resolvers += $1"; } -addDebugger () { addJava "-Xdebug" ; addJava "-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1"; } -setThisBuild () { - vlog "[addBuild] args = '$@'" +addDebugger() { addJava "-Xdebug" && addJava "-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1"; } + +setThisBuild() { + vlog "[addBuild] args = '$*'" local key="$1" && shift - addSbt "set $key in ThisBuild := $@" + addSbt "set $key in ThisBuild := $*" } -setScalaVersion () { +setScalaVersion() { [[ "$1" == *"-SNAPSHOT" ]] && addResolver 'Resolver.sonatypeRepo("snapshots")' addSbt "++ $1" } -setJavaHome () { +setJavaHome() { java_cmd="$1/bin/java" setThisBuild javaHome "_root_.scala.Some(file(\"$1\"))" export JAVA_HOME="$1" @@ -172,13 +209,25 @@ setJavaHome () { export PATH="$JAVA_HOME/bin:$PATH" } -getJavaVersion() { "$1" -version 2>&1 | grep -E -e '(java|openjdk) version' | awk '{ print $3 }' | tr -d \"; } +getJavaVersion() { + local -r str=$("$1" -version 2>&1 | grep -E -e '(java|openjdk) version' | awk '{ print $3 }' | tr -d '"') + + # java -version on java8 says 1.8.x + # but on 9 and 10 it's 9.x.y and 10.x.y. + if [[ "$str" =~ ^1\.([0-9]+)(\..*)?$ ]]; then + echo "${BASH_REMATCH[1]}" + elif [[ "$str" =~ ^([0-9]+)(\..*)?$ ]]; then + echo "${BASH_REMATCH[1]}" + elif [[ -n "$str" ]]; then + echoerr "Can't parse java version from: $str" + fi +} checkJava() { # Warn if there is a Java version mismatch between PATH and JAVA_HOME/JDK_HOME - [[ -n "$JAVA_HOME" && -e "$JAVA_HOME/bin/java" ]] && java="$JAVA_HOME/bin/java" - [[ -n "$JDK_HOME" && -e "$JDK_HOME/lib/tools.jar" ]] && java="$JDK_HOME/bin/java" + [[ -n "$JAVA_HOME" && -e "$JAVA_HOME/bin/java" ]] && java="$JAVA_HOME/bin/java" + [[ -n "$JDK_HOME" && -e "$JDK_HOME/lib/tools.jar" ]] && java="$JDK_HOME/bin/java" if [[ -n "$java" ]]; then pathJavaVersion=$(getJavaVersion java) @@ -192,31 +241,32 @@ checkJava() { fi } -java_version () { - local version=$(getJavaVersion "$java_cmd") +java_version() { + local -r version=$(getJavaVersion "$java_cmd") vlog "Detected Java version: $version" - echo "${version:2:1}" + echo "$version" } +is_apple_silicon() { [[ "$(uname -s)" == "Darwin" && "$(uname -m)" == "arm64" ]]; } + # MaxPermSize critical on pre-8 JVMs but incurs noisy warning on 8+ -default_jvm_opts () { - local v="$(java_version)" - if [[ $v -ge 8 ]]; then +default_jvm_opts() { + local -r v="$(java_version)" + if [[ $v -ge 10 ]]; then + if is_apple_silicon; then + # As of Dec 2020, JVM for Apple Silicon (M1) doesn't support JVMCI + echo "$default_jvm_opts_common" + else + echo "$default_jvm_opts_common -XX:+UnlockExperimentalVMOptions -XX:+UseJVMCICompiler" + fi + elif [[ $v -ge 8 ]]; then echo "$default_jvm_opts_common" else echo "-XX:MaxPermSize=384m $default_jvm_opts_common" fi } -build_props_scala () { - if [[ -r "$buildProps" ]]; then - versionLine="$(grep '^build.scala.versions' "$buildProps")" - versionString="${versionLine##build.scala.versions=}" - echo "${versionString%% .*}" - fi -} - -execRunner () { +execRunner() { # print the arguments one to a line, quoting any containing spaces vlog "# Executing command line:" && { for arg; do @@ -234,40 +284,36 @@ execRunner () { setTrapExit if [[ -n "$batch" ]]; then - "$@" < /dev/null + "$@" /dev/null; then + if command -v curl >/dev/null 2>&1; then curl --fail --silent --location "$url" --output "$jar" - elif which wget >/dev/null; then + elif command -v wget >/dev/null 2>&1; then wget -q -O "$jar" "$url" fi } && [[ -r "$jar" ]] } -acquire_sbt_jar () { +acquire_sbt_jar() { { sbt_jar="$(jar_file "$sbt_version")" [[ -r "$sbt_jar" ]] @@ -276,11 +322,66 @@ acquire_sbt_jar () { [[ -r "$sbt_jar" ]] } || { sbt_jar="$(jar_file "$sbt_version")" - download_url "$(make_url "$sbt_version")" "$sbt_jar" + jar_url="$(make_url "$sbt_version")" + + echoerr "Downloading sbt launcher for ${sbt_version}:" + echoerr " From ${jar_url}" + echoerr " To ${sbt_jar}" + + download_url "${jar_url}" "${sbt_jar}" + + case "${sbt_version}" in + 0.*) + vlog "SBT versions < 1.0 do not have published MD5 checksums, skipping check" + echo "" + ;; + *) verify_sbt_jar "${sbt_jar}" ;; + esac } } -usage () { +verify_sbt_jar() { + local jar="${1}" + local md5="${jar}.md5" + md5url="$(make_url "${sbt_version}").md5" + + echoerr "Downloading sbt launcher ${sbt_version} md5 hash:" + echoerr " From ${md5url}" + echoerr " To ${md5}" + + download_url "${md5url}" "${md5}" >/dev/null 2>&1 + + if command -v md5sum >/dev/null 2>&1; then + if echo "$(cat "${md5}") ${jar}" | md5sum -c -; then + rm -rf "${md5}" + return 0 + else + echoerr "Checksum does not match" + return 1 + fi + elif command -v md5 >/dev/null 2>&1; then + if [ "$(md5 -q "${jar}")" == "$(cat "${md5}")" ]; then + rm -rf "${md5}" + return 0 + else + echoerr "Checksum does not match" + return 1 + fi + elif command -v openssl >/dev/null 2>&1; then + if [ "$(openssl md5 -r "${jar}" | awk '{print $1}')" == "$(cat "${md5}")" ]; then + rm -rf "${md5}" + return 0 + else + echoerr "Checksum does not match" + return 1 + fi + else + echoerr "Could not find an MD5 command" + return 1 + fi +} + +usage() { set_sbt_version cat < Run the specified file as a scala script # sbt version (default: sbt.version from $buildProps if present, otherwise $sbt_release_version) - -sbt-force-latest force the use of the latest release of sbt: $sbt_release_version - -sbt-version use the specified version of sbt (default: $sbt_release_version) - -sbt-dev use the latest pre-release version of sbt: $sbt_unreleased_version - -sbt-jar use the specified jar as the sbt launcher - -sbt-launch-dir directory to hold sbt launchers (default: $sbt_launch_dir) - -sbt-launch-repo repo url for downloading sbt launcher jar (default: $(url_base "$sbt_version")) + -sbt-version use the specified version of sbt (default: $sbt_release_version) + -sbt-force-latest force the use of the latest release of sbt: $sbt_release_version + -sbt-dev use the latest pre-release version of sbt: $sbt_unreleased_version + -sbt-jar use the specified jar as the sbt launcher + -sbt-launch-dir directory to hold sbt launchers (default: $sbt_launch_dir) + -sbt-launch-repo repo url for downloading sbt launcher jar (default: $(url_base "$sbt_version")) # scala version (default: as chosen by sbt) - -28 use $latest_28 - -29 use $latest_29 - -210 use $latest_210 - -211 use $latest_211 - -212 use $latest_212 - -213 use $latest_213 - -scala-home use the scala build at the specified directory - -scala-version use the specified version of scala - -binary-version use the specified scala version when searching for dependencies + -28 use $latest_28 + -29 use $latest_29 + -210 use $latest_210 + -211 use $latest_211 + -212 use $latest_212 + -213 use $latest_213 + -scala-home use the scala build at the specified directory + -scala-version use the specified version of scala + -binary-version use the specified scala version when searching for dependencies # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) - -java-home alternate JAVA_HOME + -java-home alternate JAVA_HOME # passing options to the jvm - note it does NOT use JAVA_OPTS due to pollution # The default set is used if JVM_OPTS is unset and no -jvm-opts file is found - $(default_jvm_opts) - JVM_OPTS environment variable holding either the jvm args directly, or - the reference to a file containing jvm args if given path is prepended by '@' (e.g. '@/etc/jvmopts') - Note: "@"-file is overridden by local '.jvmopts' or '-jvm-opts' argument. - -jvm-opts file containing jvm args (if not given, .jvmopts in project root is used if present) - -Dkey=val pass -Dkey=val directly to the jvm - -J-X pass option -X directly to the jvm (-J is stripped) + $(default_jvm_opts) + JVM_OPTS environment variable holding either the jvm args directly, or + the reference to a file containing jvm args if given path is prepended by '@' (e.g. '@/etc/jvmopts') + Note: "@"-file is overridden by local '.jvmopts' or '-jvm-opts' argument. + -jvm-opts file containing jvm args (if not given, .jvmopts in project root is used if present) + -Dkey=val pass -Dkey=val directly to the jvm + -J-X pass option -X directly to the jvm (-J is stripped) # passing options to sbt, OR to this runner - SBT_OPTS environment variable holding either the sbt args directly, or - the reference to a file containing sbt args if given path is prepended by '@' (e.g. '@/etc/sbtopts') - Note: "@"-file is overridden by local '.sbtopts' or '-sbt-opts' argument. - -sbt-opts file containing sbt args (if not given, .sbtopts in project root is used if present) - -S-X add -X to sbt's scalacOptions (-S is stripped) + SBT_OPTS environment variable holding either the sbt args directly, or + the reference to a file containing sbt args if given path is prepended by '@' (e.g. '@/etc/sbtopts') + Note: "@"-file is overridden by local '.sbtopts' or '-sbt-opts' argument. + -sbt-opts file containing sbt args (if not given, .sbtopts in project root is used if present) + -S-X add -X to sbt's scalacOptions (-S is stripped) + + # passing options exclusively to this runner + SBTX_OPTS environment variable holding either the sbt-extras args directly, or + the reference to a file containing sbt-extras args if given path is prepended by '@' (e.g. '@/etc/sbtxopts') + Note: "@"-file is overridden by local '.sbtxopts' or '-sbtx-opts' argument. + -sbtx-opts file containing sbt-extras args (if not given, .sbtxopts in project root is used if present) EOM + exit 0 } -process_args () { - require_arg () { +process_args() { + require_arg() { local type="$1" local opt="$2" local arg="$3" @@ -367,50 +469,56 @@ process_args () { } while [[ $# -gt 0 ]]; do case "$1" in - -h|-help) usage; exit 0 ;; - -v) verbose=true && shift ;; - -d) addSbt "--debug" && shift ;; - -w) addSbt "--warn" && shift ;; - -q) addSbt "--error" && shift ;; - -x) debugUs=true && shift ;; - -trace) require_arg integer "$1" "$2" && trace_level="$2" && shift 2 ;; - -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; - -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; - -no-share) noshare=true && shift ;; - -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; - -sbt-dir) require_arg path "$1" "$2" && sbt_dir="$2" && shift 2 ;; - -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; - -offline) addSbt "set offline in Global := true" && shift ;; - -jvm-debug) require_arg port "$1" "$2" && addDebugger "$2" && shift 2 ;; - -batch) batch=true && shift ;; - -prompt) require_arg "expr" "$1" "$2" && setThisBuild shellPrompt "(s => { val e = Project.extract(s) ; $2 })" && shift 2 ;; - -script) require_arg file "$1" "$2" && sbt_script="$2" && addJava "-Dsbt.main.class=sbt.ScriptMain" && shift 2 ;; - - -sbt-create) sbt_create=true && shift ;; - -sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;; + -h | -help) usage ;; + -v) verbose=true && shift ;; + -d) addSbt "--debug" && shift ;; + -w) addSbt "--warn" && shift ;; + -q) addSbt "--error" && shift ;; + -x) shift ;; # currently unused + -trace) require_arg integer "$1" "$2" && trace_level="$2" && shift 2 ;; + -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; + + -no-colors) addJava "-Dsbt.log.noformat=true" && addJava "-Dsbt.color=false" && shift ;; + -sbt-create) sbt_create=true && shift ;; + -sbt-dir) require_arg path "$1" "$2" && sbt_dir="$2" && shift 2 ;; + -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; + -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; + -no-share) noshare=true && shift ;; + -offline) addSbt "set offline in Global := true" && shift ;; + -jvm-debug) require_arg port "$1" "$2" && addDebugger "$2" && shift 2 ;; + -batch) batch=true && shift ;; + -prompt) require_arg "expr" "$1" "$2" && setThisBuild shellPrompt "(s => { val e = Project.extract(s) ; $2 })" && shift 2 ;; + -script) require_arg file "$1" "$2" && sbt_script="$2" && addJava "-Dsbt.main.class=sbt.ScriptMain" && shift 2 ;; + -sbt-version) require_arg version "$1" "$2" && sbt_explicit_version="$2" && shift 2 ;; - -sbt-force-latest) sbt_explicit_version="$sbt_release_version" && shift ;; - -sbt-dev) sbt_explicit_version="$sbt_unreleased_version" && shift ;; - -sbt-launch-dir) require_arg path "$1" "$2" && sbt_launch_dir="$2" && shift 2 ;; - -sbt-launch-repo) require_arg path "$1" "$2" && sbt_launch_repo="$2" && shift 2 ;; - -scala-version) require_arg version "$1" "$2" && setScalaVersion "$2" && shift 2 ;; - -binary-version) require_arg version "$1" "$2" && setThisBuild scalaBinaryVersion "\"$2\"" && shift 2 ;; - -scala-home) require_arg path "$1" "$2" && setThisBuild scalaHome "_root_.scala.Some(file(\"$2\"))" && shift 2 ;; - -java-home) require_arg path "$1" "$2" && setJavaHome "$2" && shift 2 ;; - -sbt-opts) require_arg path "$1" "$2" && sbt_opts_file="$2" && shift 2 ;; - -jvm-opts) require_arg path "$1" "$2" && jvm_opts_file="$2" && shift 2 ;; - - -D*) addJava "$1" && shift ;; - -J*) addJava "${1:2}" && shift ;; - -S*) addScalac "${1:2}" && shift ;; - -28) setScalaVersion "$latest_28" && shift ;; - -29) setScalaVersion "$latest_29" && shift ;; - -210) setScalaVersion "$latest_210" && shift ;; - -211) setScalaVersion "$latest_211" && shift ;; - -212) setScalaVersion "$latest_212" && shift ;; - -213) setScalaVersion "$latest_213" && shift ;; - new) sbt_new=true && : ${sbt_explicit_version:=$sbt_release_version} && addResidual "$1" && shift ;; - *) addResidual "$1" && shift ;; + -sbt-force-latest) sbt_explicit_version="$sbt_release_version" && shift ;; + -sbt-dev) sbt_explicit_version="$sbt_unreleased_version" && shift ;; + -sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;; + -sbt-launch-dir) require_arg path "$1" "$2" && sbt_launch_dir="$2" && shift 2 ;; + -sbt-launch-repo) require_arg path "$1" "$2" && sbt_launch_repo="$2" && shift 2 ;; + + -28) setScalaVersion "$latest_28" && shift ;; + -29) setScalaVersion "$latest_29" && shift ;; + -210) setScalaVersion "$latest_210" && shift ;; + -211) setScalaVersion "$latest_211" && shift ;; + -212) setScalaVersion "$latest_212" && shift ;; + -213) setScalaVersion "$latest_213" && shift ;; + + -scala-version) require_arg version "$1" "$2" && setScalaVersion "$2" && shift 2 ;; + -binary-version) require_arg version "$1" "$2" && setThisBuild scalaBinaryVersion "\"$2\"" && shift 2 ;; + -scala-home) require_arg path "$1" "$2" && setThisBuild scalaHome "_root_.scala.Some(file(\"$2\"))" && shift 2 ;; + -java-home) require_arg path "$1" "$2" && setJavaHome "$2" && shift 2 ;; + -sbt-opts) require_arg path "$1" "$2" && sbt_opts_file="$2" && shift 2 ;; + -sbtx-opts) require_arg path "$1" "$2" && sbtx_opts_file="$2" && shift 2 ;; + -jvm-opts) require_arg path "$1" "$2" && jvm_opts_file="$2" && shift 2 ;; + + -D*) addJava "$1" && shift ;; + -J*) addJava "${1:2}" && shift ;; + -S*) addScalac "${1:2}" && shift ;; + + new) sbt_new=true && : ${sbt_explicit_version:=$sbt_release_version} && addResidual "$1" && shift ;; + + *) addResidual "$1" && shift ;; esac done } @@ -422,19 +530,31 @@ process_args "$@" readConfigFile() { local end=false until $end; do - read || end=true + read -r || end=true [[ $REPLY =~ ^# ]] || [[ -z $REPLY ]] || echo "$REPLY" - done < "$1" + done <"$1" } # if there are file/environment sbt_opts, process again so we # can supply args to this runner if [[ -r "$sbt_opts_file" ]]; then vlog "Using sbt options defined in file $sbt_opts_file" - while read opt; do extra_sbt_opts+=("$opt"); done < <(readConfigFile "$sbt_opts_file") + while read -r opt; do extra_sbt_opts+=("$opt"); done < <(readConfigFile "$sbt_opts_file") elif [[ -n "$SBT_OPTS" && ! ("$SBT_OPTS" =~ ^@.*) ]]; then vlog "Using sbt options defined in variable \$SBT_OPTS" - extra_sbt_opts=( $SBT_OPTS ) + IFS=" " read -r -a extra_sbt_opts <<<"$SBT_OPTS" +else + vlog "No extra sbt options have been defined" +fi + +# if there are file/environment sbtx_opts, process again so we +# can supply args to this runner +if [[ -r "$sbtx_opts_file" ]]; then + vlog "Using sbt options defined in file $sbtx_opts_file" + while read -r opt; do extra_sbt_opts+=("$opt"); done < <(readConfigFile "$sbtx_opts_file") +elif [[ -n "$SBTX_OPTS" && ! ("$SBTX_OPTS" =~ ^@.*) ]]; then + vlog "Using sbt options defined in variable \$SBTX_OPTS" + IFS=" " read -r -a extra_sbt_opts <<<"$SBTX_OPTS" else vlog "No extra sbt options have been defined" fi @@ -453,25 +573,24 @@ checkJava # only exists in 0.12+ setTraceLevel() { case "$sbt_version" in - "0.7."* | "0.10."* | "0.11."* ) echoerr "Cannot set trace level in sbt version $sbt_version" ;; - *) setThisBuild traceLevel $trace_level ;; + "0.7."* | "0.10."* | "0.11."*) echoerr "Cannot set trace level in sbt version $sbt_version" ;; + *) setThisBuild traceLevel "$trace_level" ;; esac } # set scalacOptions if we were given any -S opts -[[ ${#scalac_args[@]} -eq 0 ]] || addSbt "set scalacOptions in ThisBuild += \"${scalac_args[@]}\"" +[[ ${#scalac_args[@]} -eq 0 ]] || addSbt "set scalacOptions in ThisBuild += \"${scalac_args[*]}\"" -# Update build.properties on disk to set explicit version - sbt gives us no choice -[[ -n "$sbt_explicit_version" && -z "$sbt_new" ]] && update_build_props_sbt "$sbt_explicit_version" +[[ -n "$sbt_explicit_version" && -z "$sbt_new" ]] && addJava "-Dsbt.version=$sbt_explicit_version" vlog "Detected sbt version $sbt_version" if [[ -n "$sbt_script" ]]; then - residual_args=( $sbt_script ${residual_args[@]} ) + residual_args=("$sbt_script" "${residual_args[@]}") else # no args - alert them there's stuff in here - (( argumentCount > 0 )) || { + ((argumentCount > 0)) || { vlog "Starting $script_name: invoke with -help for other options" - residual_args=( shell ) + residual_args=(shell) } fi @@ -487,6 +606,7 @@ EOM } # pick up completion if present; todo +# shellcheck disable=SC1091 [[ -r .sbt_completion.sh ]] && source .sbt_completion.sh # directory to store sbt launchers @@ -496,7 +616,7 @@ EOM # no jar? download it. [[ -r "$sbt_jar" ]] || acquire_sbt_jar || { # still no jar? uh-oh. - echo "Download failed. Obtain the jar manually and place it at $sbt_jar" + echo "Could not download and verify the launcher. Obtain the jar manually and place it at $sbt_jar" exit 1 } @@ -506,12 +626,12 @@ if [[ -n "$noshare" ]]; then done else case "$sbt_version" in - "0.7."* | "0.10."* | "0.11."* | "0.12."* ) + "0.7."* | "0.10."* | "0.11."* | "0.12."*) [[ -n "$sbt_dir" ]] || { sbt_dir="$HOME/.sbt/$sbt_version" vlog "Using $sbt_dir as sbt dir, -sbt-dir to override." } - ;; + ;; esac if [[ -n "$sbt_dir" ]]; then @@ -521,58 +641,21 @@ fi if [[ -r "$jvm_opts_file" ]]; then vlog "Using jvm options defined in file $jvm_opts_file" - while read opt; do extra_jvm_opts+=("$opt"); done < <(readConfigFile "$jvm_opts_file") + while read -r opt; do extra_jvm_opts+=("$opt"); done < <(readConfigFile "$jvm_opts_file") elif [[ -n "$JVM_OPTS" && ! ("$JVM_OPTS" =~ ^@.*) ]]; then vlog "Using jvm options defined in \$JVM_OPTS variable" - extra_jvm_opts=( $JVM_OPTS ) + IFS=" " read -r -a extra_jvm_opts <<<"$JVM_OPTS" else vlog "Using default jvm options" - extra_jvm_opts=( $(default_jvm_opts) ) + IFS=" " read -r -a extra_jvm_opts <<<"$( default_jvm_opts)" fi # traceLevel is 0.12+ [[ -n "$trace_level" ]] && setTraceLevel -main () { - execRunner "$java_cmd" \ - "${extra_jvm_opts[@]}" \ - "${java_args[@]}" \ - -jar "$sbt_jar" \ - "${sbt_commands[@]}" \ - "${residual_args[@]}" -} - -# sbt inserts this string on certain lines when formatting is enabled: -# val OverwriteLine = "\r\u001BM\u001B[2K" -# ...in order not to spam the console with a million "Resolving" lines. -# Unfortunately that makes it that much harder to work with when -# we're not going to print those lines anyway. We strip that bit of -# line noise, but leave the other codes to preserve color. -mainFiltered () { - local ansiOverwrite='\r\x1BM\x1B[2K' - local excludeRegex=$(egrep -v '^#|^$' ~/.sbtignore | paste -sd'|' -) - - echoLine () { - local line="$1" - local line1="$(echo "$line" | sed 's/\r\x1BM\x1B\[2K//g')" # This strips the OverwriteLine code. - local line2="$(echo "$line1" | sed 's/\x1B\[[0-9;]*[JKmsu]//g')" # This strips all codes - we test regexes against this. - - if [[ $line2 =~ $excludeRegex ]]; then - [[ -n $debugUs ]] && echo "[X] $line1" - else - [[ -n $debugUs ]] && echo " $line1" || echo "$line1" - fi - } - - echoLine "Starting sbt with output filtering enabled." - main | while read -r line; do echoLine "$line"; done -} - -# Only filter if there's a filter file and we don't see a known interactive command. -# Obviously this is super ad hoc but I don't know how to improve on it. Testing whether -# stdin is a terminal is useless because most of my use cases for this filtering are -# exactly when I'm at a terminal, running sbt non-interactively. -shouldFilter () { [[ -f ~/.sbtignore ]] && ! egrep -q '\b(shell|console|consoleProject)\b' <<<"${residual_args[@]}"; } - -# run sbt -if shouldFilter; then mainFiltered; else main; fi +execRunner "$java_cmd" \ + "${extra_jvm_opts[@]}" \ + "${java_args[@]}" \ + -jar "$sbt_jar" \ + "${sbt_commands[@]}" \ + "${residual_args[@]}" diff --git a/scripts/lwcPublishAndTag b/scripts/lwcPublishAndTag deleted file mode 100755 index ea91bb5d..00000000 --- a/scripts/lwcPublishAndTag +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail # STRICT MODE -IFS=$'\n\t' # http://redsymbol.net/articles/unofficial-bash-strict-mode/ - -SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) - -WS_DIR="$SCRIPT_DIR/.." - -SBT="$WS_DIR/sbt" - -"$SBT" transferPublishAndTagResources - -# Set TRAVIS_JOB_NUMBER as a workaround to meet sbt-slamdata's publishAndTag assumption -TRAVIS_JOB_NUMBER=1 scripts/publishAndTag 'slamdata/quasar-s3' - diff --git a/scripts/travis_tag b/scripts/travis_tag deleted file mode 100644 index c70f1ac8..00000000 --- a/scripts/travis_tag +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -if [[ "$TRAVIS_PULL_REQUEST" != "false" || - ("$TRAVIS_BRANCH" != "master" && "$TRAVIS_BRANCH" != backport/*) ]] -then exit -fi - -git config --global user.email "builds@travis-ci.com" -git config --global user.name "Travis CI" -TAG=v$VERSION -git tag $TAG -a -m "Quasar S3 Connector $VERSION" -git push origin $TAG -export TRAVIS_TAG=v$VERSION diff --git a/scripts/version b/scripts/version deleted file mode 100644 index 26b4fac4..00000000 --- a/scripts/version +++ /dev/null @@ -1 +0,0 @@ -export VERSION=$(sed 's/.*"\(.*\)"/\1/' version.sbt) diff --git a/testCredentials.json.b64 b/testCredentials.json.b64 index 370abd5a..1a3d8c40 100644 --- a/testCredentials.json.b64 +++ b/testCredentials.json.b64 @@ -1 +1 @@ -ewogICAiY3JlZGVudGlhbHMiOiB7CiAgICAgImFjY2Vzc0tleSI6ICJBS0lBSjRRWk5PMlJWQ0E3WE03QSIsCiAgICAgInNlY3JldEtleSI6ICJtdENiclNwY1hTZHNqL0VKZnp4UnZ5NFlxR2g0K3ZzTWhFMnpoeTZBIiwKICAgICAicmVnaW9uIjogInVzLWVhc3QtMSIKICAgfQp9Cg== +ewogICJhY2Nlc3NLZXkiOiAiQUtJQUo0UVpOTzJSVkNBN1hNN0EiLAogICJzZWNyZXRLZXkiOiAibXRDYnJTcGNYU2Rzai9FSmZ6eFJ2eTRZcUdoNCt2c01oRTJ6aHk2QSIsCiAgInJlZ2lvbiI6ICJ1cy1lYXN0LTEiCn0K diff --git a/testCredentials.json.example b/testCredentials.json.example index c73d804e..892635db 100644 --- a/testCredentials.json.example +++ b/testCredentials.json.example @@ -1,7 +1,5 @@ { - "credentials": { - "accessKey": "", - "secretKey": "", - "region": "" - } + "accessKey": "", + "secretKey": "", + "region": "" } diff --git a/version.sbt b/version.sbt index efd2a596..37a3c680 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "4.2.1" \ No newline at end of file +version in ThisBuild := "45.0.13"