Skip to content

Implement link checker #67

Open
Open
@rsokl

Description

@rsokl

Ensure that links are not broken and that internal links use https and not http. Using the example provided by hypothesis

from hypothesis.stateful import GenericStateMachine
import hypothesis.strategies as st
from requests_html import HTMLSession


class LinkChecker(GenericStateMachine):
    def __init__(self):
        super(LinkChecker, self).__init__()
        self.session = HTMLSession()
        self.result = None

    def steps(self):
        if self.result is None:
            # Always start on the home page
            return st.just("https://hypothesis.works/")
        else:
            return st.sampled_from([
                l
                for l in self.result.html.absolute_links
                # Don't try to crawl to other people's sites
                if l.startswith("https://hypothesis.works") and
                # Avoid Cloudflare's bot protection. We are a bot but we don't
                # care about the info it's hiding.
                '/cdn-cgi/' not in l
            ])

    def execute_step(self, step):
        self.result = self.session.get(step)

        assert self.result.status_code == 200

        for l in self.result.html.absolute_links:
            # All links should be HTTPS
            assert "http://hypothesis.works" not in l


TestLinks = LinkChecker.TestCase

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions