diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..452f85d --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,71 @@ +name: Build + +# Controls when the workflow will run +on: + workflow_dispatch: + push: + branches: + - 'main' + - 'dev' + tags: + - 'v*.*.*' + pull_request: + branches: + - 'main' + - 'dev' + +# permissions are needed if pushing to ghcr.io +permissions: + packages: write + +jobs: + build: + name: "Build and release" + runs-on: ubuntu-latest + steps: + # Get the repository's code + - name: ⬇️ checkout + uses: actions/checkout@v2 + + # https://github.com/docker/setup-qemu-action + - name: ⚙ set up qemu + uses: docker/setup-qemu-action@v1 + + # https://github.com/docker/setup-buildx-action + - name: ⚙ set up docker buildx + id: buildx + uses: docker/setup-buildx-action@v1 + + - name: 👤 login to ghcr + if: github.event_name != 'pull_request' + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: 🏷 docker meta + id: docker_meta # you'll use this in the next step + uses: docker/metadata-action@v3 + with: + # list of Docker images to use as base name for tags + images: | + ghcr.io/${{ github.repository_owner }}/13ft + # Docker tags based on the following events/attributes + tags: | + type=schedule + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + + - name: 📦 build and ⬆ push + uses: docker/build-push-action@v2 + with: + context: . + platforms: linux/amd64,linux/arm/v7,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.docker_meta.outputs.tags }} + labels: ${{ steps.docker_meta.outputs.labels }} diff --git a/Dockerfile b/Dockerfile index 09136b7..dbd74ce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,16 +2,16 @@ FROM python:3.9.18-alpine # Generic labels LABEL maintainer="Arian Mollik Wasi " -LABEL version="0.2.3" +LABEL version="0.3.4" LABEL description="My own custom 12ft.io replacement" LABEL url="https://github.com/wasi-master/13ft/" LABEL documentation="https://github.com/wasi-master/13ft/blob/main/README.md" # OCI compliant labels LABEL org.opencontainers.image.source="https://github.com/wasi-master/13ft" -LABEL org.opencontainers.image.authors="Arian Mollik Wasi, Justin Paul, Alfredo Casanova" +LABEL org.opencontainers.image.authors="Arian Mollik Wasi" LABEL org.opencontainers.image.created="2023-10-31T22:53:00Z" -LABEL org.opencontainers.image.version="0.2.3" +LABEL org.opencontainers.image.version="0.3.4" LABEL org.opencontainers.image.url="https://github.com/wasi-master/13ft/" LABEL org.opencontainers.image.source="https://github.com/wasi-master/13ft/" LABEL org.opencontainers.image.description="My own custom 12ft.io replacement" diff --git a/README.md b/README.md index 34b4802..b60af76 100644 --- a/README.md +++ b/README.md @@ -16,16 +16,18 @@ It pretends to be GoogleBot (Google's web crawler) and gets the same content tha Requirements: - docker -- docker-compose +- Docker Compose (available as `docker compose`) -First, clone the repo to your machine, then run the following commands: +First, clone the repo to your machine then run the following commands: ```sh git clone https://github.com/wasi-master/13ft.git cd 13ft -docker-compose up +docker compose up ``` +The image is also available from [DockerHub](https://hub.docker.com/r/wasimaster/13ft "docker pull wasimaster/13ft") or [ghcr.io](https://github.com/wasi-master/13ft/pkgs/container/13ft "docker pull ghcr.io/wasi-master/13ft:0.2.3") so the command `docker pull wasimaster/13ft` also works. + ### Standard Python script First, make sure you have [python](https://python.org) installed on your machine. Next, clone the git repo. Then go to a terminal (`Command Prompt` on Windows, `Terminal` on Mac) and run the following command: @@ -45,7 +47,116 @@ Then run `portable.py`, click [this link](https://realpython.com/run-python-scri python portable.py ``` -Then follow these simple steps +Then open the link shown in the terminal in the browser and you'll be able to use this + +### Installation using venv and running under specific bind address / port + +```sh +python3 -m venv venv +source venv/bin/activate +python -m pip install -r requirements.txt +FLASK_APP=app/portable.py flask run --host=127.0.0.1 --port=9982 +``` + + +## Using as a Bookmarklet in Chrome: + +You can create a bookmarklet that performs the URL transformation by writing a small JavaScript snippet. Below is the JavaScript code for your bookmarklet: +```javascript +javascript:(function(){window.location.href='https://13ft.wasimaster.me/'+encodeURIComponent(window.location.href);})(); +``` +You can replace https://13ft.wasimaster.me with your own 13ft instance if desired. + +Steps: +1. Open Bookmarks Manager: + +2. Click on the three dots (menu) in the top-right corner of Chrome. +Go to Bookmarks > Bookmark manager, or simply press Ctrl+Shift+O on Windows/Linux or Cmd+Option+B on Mac. +Create a New Bookmark: + +3. In the Bookmark Manager, click the three-dot menu in the top-right corner of the window and select Add new bookmark. +Enter Bookmark Details: + - Name: Enter a name for your bookmarklet, such as "13ft-ize". This name will show as a bookmark title in the bookmarks bar + - URL: Paste the JavaScript code provided above into the URL field. +4. Click Save. + +Using the Bookmarklet: + +Navigate to the page whose URL you want to use 13ft on. + +Click on the bookmarklet you saved in your bookmarks bar. The browser will redirect you to the 13ft version of the URL using your service. + +To show Bookmarks in Chrome, click the icon with three horizontal bars in the top right corner to open options. 2. In options, hover over "Bookmarks" to display a second menu where you can click the "Show bookmarks bar" text to toggle the bar on or off. + +Instructions courtesy of [@barakplasma](https://github.com/barakplasma) + +## Customizing listening host and port, Systemd / Reverse-proxy example + +### Systemd Service + +``` +/lib/systemd/system/13ft.service +``` + +``` +[Unit] +Description=13ft Flask Service +Wants=network-online.target +After=network-online.target + +[Service] +Type=simple +Restart=on-failure +RestartSec=10 +User=www-data +Group=www-data +Environment=APP_PATH=/var/www/paywall-break +Environment=FLASK_APP=app/portable.py + +ExecStart=/bin/bash -c "cd ${APP_PATH};${APP_PATH}/venv/bin/flask run --host=127.0.0.1 --port=22113" + +# Make sure stderr/stdout is captured in the systemd journal. +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target +``` + +### Reverse Proxy + +``` + + ErrorLog ${APACHE_LOG_DIR}/13ft-error.log + CustomLog ${APACHE_LOG_DIR}/13ft-access.log combined + + ProxyRequests Off + + SSLEngine on + SSLCertificateFile /etc/ssl/certs/ssl-cert-snakeoil.pem + SSLCertificateKeyFile /etc/ssl/private/ssl-cert-snakeoil.key + Header always set Strict-Transport-Security "max-age=63072000" + SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1 + + SSLHonorCipherOrder off + SSLSessionTickets off + + Protocols h2 http/1.1 + + + Order deny,allow + Allow from all + + + + ProxyPass / http://127.0.0.1:22113/ + ProxyPassReverse / http://127.0.0.1:22113/ + + + +``` + +## Screenshots ### Step 1 @@ -71,4 +182,4 @@ Voilà you now have bypassed the paywall and ads You can also append the url at the end of the link and it will also work. (e.g if your server is running at `http://127.0.0.1:5000` then you can go to `http://127.0.0.1:5000/https://example.com` and it will read out the contents of `https://example.com`) -This feature is possible thanks to [atcasanova](https://github.com/atcasanova) +This feature was implemented by [@atcasanova](https://github.com/atcasanova) diff --git a/app/index.html b/app/index.html index d4da1e4..45b6b4d 100644 --- a/app/index.html +++ b/app/index.html @@ -1,73 +1,177 @@ + + + 13ft Ladder - - - + + -
-
-

- -

-
- - -
+
+ +
+
+

Enter Website Link

+ + + +
+ + - + + \ No newline at end of file diff --git a/app/index.py b/app/index.py index 8f15d85..6086b2e 100644 --- a/app/index.py +++ b/app/index.py @@ -1,20 +1,49 @@ import flask import requests from flask import request +from bs4 import BeautifulSoup +from urllib.parse import urlparse, urljoin app = flask.Flask(__name__) googlebot_headers = { - "User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" + "User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.119 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" } +def add_base_tag(html_content, original_url): + soup = BeautifulSoup(html_content, 'html.parser') + parsed_url = urlparse(original_url) + base_url = f"{parsed_url.scheme}://{parsed_url.netloc}/" + + # Handle paths that are not root, e.g., "https://x.com/some/path/w.html" + if parsed_url.path and not parsed_url.path.endswith('/'): + base_url = urljoin(base_url, parsed_url.path.rsplit('/', 1)[0] + '/') + base_tag = soup.find('base') + + print(base_url) + if not base_tag: + new_base_tag = soup.new_tag('base', href=base_url) + if soup.head: + soup.head.insert(0, new_base_tag) + else: + head_tag = soup.new_tag('head') + head_tag.insert(0, new_base_tag) + soup.insert(0, head_tag) + + return str(soup) def bypass_paywall(url): """ Bypass paywall for a given url """ - response = requests.get(url, headers=googlebot_headers) - response.encoding = response.apparent_encoding - return response.text + if url.startswith("http"): + response = requests.get(url, headers=googlebot_headers) + response.encoding = response.apparent_encoding + return add_base_tag(response.text, response.url) + + try: + return bypass_paywall("https://" + url) + except requests.exceptions.RequestException as e: + return bypass_paywall("http://" + url) @app.route("/") @@ -29,18 +58,17 @@ def show_article(): return bypass_paywall(link) except requests.exceptions.RequestException as e: return str(e), 400 - except e: - raise e + except Exception as exc: + raise exc + @app.route("/", defaults={"path": ""}) -@app.route('/', methods=["GET"]) +@app.route("/", methods=["GET"]) def get_article(path): - print(path) full_url = request.url - parts = full_url.split('/',4) + parts = full_url.split("/", 4) if len(parts) >= 5: - actual_url = 'https://' + parts[4].lstrip('/') - print(actual_url) + actual_url = "https://" + parts[4].lstrip("/") try: return bypass_paywall(actual_url) except requests.exceptions.RequestException as e: @@ -50,4 +78,5 @@ def get_article(path): else: return "Invalid URL", 400 + app.run(debug=False) diff --git a/app/portable.py b/app/portable.py index a71454b..7449035 100644 --- a/app/portable.py +++ b/app/portable.py @@ -1,101 +1,227 @@ import flask import requests from flask import request +from bs4 import BeautifulSoup +from urllib.parse import urlparse, urljoin app = flask.Flask(__name__) googlebot_headers = { - "User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" + "User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.119 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" } html = """ + + + 13ft Ladder - - - - - + -
-
-

- -

-
- - -
+
+ +
+
+

Enter Website Link

+ + + +
+ + + """ +def add_base_tag(html_content, original_url): + soup = BeautifulSoup(html_content, 'html.parser') + parsed_url = urlparse(original_url) + base_url = f"{parsed_url.scheme}://{parsed_url.netloc}/" + + # Handle paths that are not root, e.g., "https://x.com/some/path/w.html" + if parsed_url.path and not parsed_url.path.endswith('/'): + base_url = urljoin(base_url, parsed_url.path.rsplit('/', 1)[0] + '/') + base_tag = soup.find('base') + + print(base_url) + if not base_tag: + new_base_tag = soup.new_tag('base', href=base_url) + if soup.head: + soup.head.insert(0, new_base_tag) + else: + head_tag = soup.new_tag('head') + head_tag.insert(0, new_base_tag) + soup.insert(0, head_tag) + + return str(soup) + def bypass_paywall(url): """ Bypass paywall for a given url """ - response = requests.get(url, headers=googlebot_headers) - response.encoding = response.apparent_encoding - return response.text + if url.startswith("http"): + response = requests.get(url, headers=googlebot_headers) + response.encoding = response.apparent_encoding + return add_base_tag(response.text, response.url) + + try: + return bypass_paywall("https://" + url) + except requests.exceptions.RequestException as e: + return bypass_paywall("http://" + url) @app.route("/") @@ -113,13 +239,14 @@ def show_article(): except e: raise e + @app.route("/", defaults={"path": ""}) -@app.route('/', methods=["GET"]) +@app.route("/", methods=["GET"]) def get_article(path): full_url = request.url - parts = full_url.split('/',4) + parts = full_url.split("/", 4) if len(parts) >= 5: - actual_url = 'https://' + parts[4].lstrip('/') + actual_url = "https://" + parts[4].lstrip("/") try: return bypass_paywall(actual_url) except requests.exceptions.RequestException as e: @@ -130,4 +257,4 @@ def get_article(path): return "Invalid URL", 400 -app.run(host='0.0.0.0', port=5000, debug=False) +app.run(host="0.0.0.0", port=5000, debug=False) diff --git a/app/requirements.txt b/app/requirements.txt index 7e10602..b2d4e9d 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1 +1,3 @@ flask +requests +bs4 diff --git a/docker-compose.yaml b/docker-compose.yaml index 799a2a8..e05f13f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,7 +1,8 @@ -version: '3' services: 13ft: - build: . - #image: your-image-name:tag + container_name: 13ft + hostname: 13ft + image: ghcr.io/wasi-master/13ft:latest + restart: unless-stopped ports: - - "5000:5000" \ No newline at end of file + - "5000:5000" diff --git a/requirements.txt b/requirements.txt index 30692b7..b2d4e9d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ flask requests +bs4