Skip to content

Commit

Permalink
Doc server broken link detection
Browse files Browse the repository at this point in the history
The integration test, when run with -a, now detects and reports broken links and orphaned pages.

Broken links either link to an invalid page (404) or have an invalid anchor. Orphaned pages are pages that cannot be navigated to from the home pages just by clicking links.

BUG=147747

Review URL: https://chromiumcodereview.appspot.com/17816005

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@212240 0039d316-1c4b-4281-b951-d872f2087c98
  • Loading branch information
jaredshumway94@gmail.com committed Jul 18, 2013
1 parent 6cc885e commit 715ce5e
Show file tree
Hide file tree
Showing 11 changed files with 387 additions and 18 deletions.
2 changes: 1 addition & 1 deletion chrome/common/extensions/docs/server2/app.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
application: chrome-apps-doc
version: 2-8-3
version: 2-9-3
runtime: python27
api_version: 1
threadsafe: false
Expand Down
8 changes: 4 additions & 4 deletions chrome/common/extensions/docs/server2/cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@ cron:
- description: Load everything for trunk.
url: /_cron/trunk
schedule: every 5 minutes
target: 2-8-3
target: 2-9-3

- description: Load everything for dev.
url: /_cron/dev
schedule: every 5 minutes
target: 2-8-3
target: 2-9-3

- description: Load everything for beta.
url: /_cron/beta
schedule: every 5 minutes
target: 2-8-3
target: 2-9-3

- description: Load everything for stable.
url: /_cron/stable
schedule: every 5 minutes
target: 2-8-3
target: 2-9-3
13 changes: 4 additions & 9 deletions chrome/common/extensions/docs/server2/cron_servlet.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,16 @@
from appengine_wrappers import (
GetAppVersion, DeadlineExceededError, IsDevServer, logservice)
from branch_utility import BranchUtility
from caching_file_system import CachingFileSystem
from compiled_file_system import CompiledFileSystem
from empty_dir_file_system import EmptyDirFileSystem
from file_system_util import CreateURLsFromPaths
from github_file_system import GithubFileSystem
from host_file_system_creator import HostFileSystemCreator
from object_store_creator import ObjectStoreCreator
from render_servlet import RenderServlet
from server_instance import ServerInstance
from servlet import Servlet, Request, Response
from subversion_file_system import SubversionFileSystem
import svn_constants
from third_party.json_schema_compiler.memoize import memoize

class _SingletonRenderServletDelegate(RenderServlet.Delegate):
def __init__(self, server_instance):
Expand Down Expand Up @@ -87,16 +85,13 @@ def get_via_render_servlet(path):
def run_cron_for_dir(d, path_prefix=''):
success = True
start_time = time.time()
# TODO(jshumway): use server_instance.host_file_system.Walk.
# TODO(kalman): delete me where it's set.
files = [f for f in server_instance.content_cache.GetFromFileListing(d)
if not f.endswith('/') and f != 'redirects.json']
files = dict(
CreateURLsFromPaths(server_instance.host_file_system, d, path_prefix))
logging.info('cron/%s: rendering %s files from %s...' % (
channel, len(files), d))
try:
for i, f in enumerate(files):
for i, path in enumerate(files):
error = None
path = '%s%s' % (path_prefix, f)
try:
response = get_via_render_servlet(path)
if response.status != 200:
Expand Down
14 changes: 14 additions & 0 deletions chrome/common/extensions/docs/server2/file_system_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import posixpath

def CreateURLsFromPaths(file_system, directory, urlprefix):
'''Yields a tuple (url, prefix) for every file in |directory|, where the URL
is given relative to |urlprefix|.
'''
for root, _, files in file_system.Walk(directory):
for f in files:
url = posixpath.join(urlprefix, root[len(directory) + 1:], f)
yield url, '%s/%s' % (root, f)
2 changes: 1 addition & 1 deletion chrome/common/extensions/docs/server2/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# found in the LICENSE file.

from cron_servlet import CronServlet
from patch_servlet import PatchServlet
from instance_servlet import InstanceServlet
from patch_servlet import PatchServlet
from servlet import Servlet, Request, Response

_DEFAULT_SERVLET = InstanceServlet.GetConstructor()
Expand Down
53 changes: 52 additions & 1 deletion chrome/common/extensions/docs/server2/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
import build_server
build_server.main()

import logging
from itertools import groupby
from operator import itemgetter
import optparse
import os
import sys
import time
import unittest

from link_error_detector import LinkErrorDetector
from local_file_system import LocalFileSystem
from local_renderer import LocalRenderer
from fake_fetchers import ConfigureFakeFetchers
from handler import Handler
Expand All @@ -40,6 +43,24 @@ def _GetPublicFiles():
public_files['/'.join((relative_posix_path, filename))] = f.read()
return public_files

def _PrintBrokenLinks(broken_links):
'''Prints out broken links in a more readable format.
'''
col_width = max(len(link[0]) for link in broken_links)
getter = itemgetter(1)

def pretty_print(prefix, message):
print("%s%s -> %s" % (prefix, (col_width - len(prefix)) * ' ', message))

for target, links in groupby(sorted(broken_links, key=getter), getter):
links = [l[0] for l in links]
if len(links) > 50:
out = "%s and %d others" % (links[0], len(links) - 1)
pretty_print(out, target)
else:
for link in links:
pretty_print(link, target)

class IntegrationTest(unittest.TestCase):
def setUp(self):
ConfigureFakeFetchers()
Expand All @@ -61,6 +82,33 @@ def testCronAndPublicFiles(self):
finally:
print('Took %s seconds' % (time.time() - start_time))

print("Checking for broken links...")
start_time = time.time()
link_error_detector = LinkErrorDetector(
LocalFileSystem(os.path.join(sys.path[0], os.pardir, os.pardir)),
lambda path: Handler(Request.ForTest(path)).Get(),
'templates/public',
('extensions/index.html', 'apps/about_apps.html'))

broken_links, broken_anchors = link_error_detector.GetBrokenLinks()
if broken_links or broken_anchors:
# TODO(jshumway): Test should fail when broken links are detected.
print('Warning: Found %d broken links:' % (
len(broken_links + broken_anchors)))
_PrintBrokenLinks(broken_links + broken_anchors)

print('Took %s seconds.' % (time.time() - start_time))

print('Searching for orphaned pages...')
start_time = time.time()
orphaned_pages = link_error_detector.GetOrphanedPages()
if orphaned_pages:
# TODO(jshumway): Test should fail when orphaned pages are detected.
print('Warning: Found %d orphaned pages:' % len(orphaned_pages))
for page in orphaned_pages:
print(page)
print('Took %s seconds.' % (time.time() - start_time))

public_files = _GetPublicFiles()

print('Rendering %s public files...' % len(public_files.keys()))
Expand Down Expand Up @@ -103,6 +151,9 @@ def testExplicitFiles(self):
finally:
print('Took %s seconds' % (time.time() - start_time))

# TODO(jshumway): Check page for broken links (currently prohibited by the
# time it takes to render the pages).

@DisableLogging('warning')
def testFileNotFound(self):
response = Handler(Request.ForTest('/extensions/notfound.html')).Get()
Expand Down
Loading

0 comments on commit 715ce5e

Please sign in to comment.