Skip to content

Commit

Permalink
Use effective_status for Page#status (#1103)
Browse files Browse the repository at this point in the history
Pages have a `status` attribute that is meant to be a convenient way to figure out what the current status of a page is, skipping over short-lived, erroneous errors from a single snapshot. A while back, we added "effective" status codes to versions, which attempt to figure out if a version with a 200 status code actually should have been an error code (surprisingly common!). We should have updated Page's status calculation to use that, since the page is really about what is effectively the current state, but missed it at the time.
  • Loading branch information
Mr0grog authored Aug 10, 2023
1 parent 3632762 commit c4ce127
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 6 deletions.
9 changes: 5 additions & 4 deletions app/models/page.rb
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def ensure_page_urls
urls.find_or_create_by!(url:) if saved_change_to_attribute?('url')
end

def update_status
new_status = calculate_status
def update_status(relative_to: nil)
new_status = calculate_status(relative_to:)
self.update(status: new_status) unless new_status.zero?
self.status
end
Expand Down Expand Up @@ -317,9 +317,10 @@ def calculate_status(relative_to: nil)
version_time = last_time - capture_time
total_time += version_time

if version.status >= 400
version_status = version.effective_status
if version_status >= 400
error_time += version_time
latest_error ||= version.status
latest_error ||= version_status
end
last_time = version.capture_time
end
Expand Down
15 changes: 13 additions & 2 deletions lib/tasks/update_page_statuses.rake
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
desc 'Update the `status` field on all pages. The first parameter optionally sets what pages to update: "recent" (default, pages updated recently enough to change status), "unknown" (only pages with unknown status), "all" (all pages)'
task :update_page_statuses, [:where] => [:environment] do |_t, args|
task :update_page_statuses, [:where, :at_time] => [:environment] do |_t, args|
where_options = ['recent', 'unknown', 'all']
where = args[:where] || where_options[0]
abort("First argument must be one of (#{where_options.join ', '})") unless where_options.include? where

at_time = args[:at_time]
if at_time.present? && at_time != 'latest_version'
at_time = Time.parse(args[:at_time])
end

ActiveRecord::Migration.say_with_time('Updating status codes on pages...') do
DataHelpers.with_activerecord_log_level(:error) do
page_set = Page.all.order(created_at: :asc)
Expand All @@ -22,7 +27,13 @@ task :update_page_statuses, [:where] => [:environment] do |_t, args|
total = page_set.size

DataHelpers.iterate_each(page_set, batch_size: 500) do |page|
page.update_status
relative_to = if at_time == 'latest_version'
page.latest&.capture_time
else
at_time
end

page.update_status(relative_to:)
completed += 1
if Time.now - last_update > 2
DataHelpers.log_progress(completed, total)
Expand Down
9 changes: 9 additions & 0 deletions test/models/page_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,15 @@ class PageTest < ActiveSupport::TestCase
assert_equal(page.update_status, 404, 'Status should match the latest error code')
end

test 'pages use version#effective_status, not raw status' do
page = Page.create(url: 'https://example.gov/')
page.versions.create(capture_time: Time.now - 15.days, status: 200, title: '404 Not Found')
page.versions.create(capture_time: Time.now - 12.days, status: 200, title: '404 Not Found')
page.versions.create(capture_time: Time.now - 10.days, status: 200, title: '404 Not Found')
page.versions.create(capture_time: Time.now - 1.day, status: 200, title: '404 Not Found')
assert_equal(page.update_status, 404, 'Status should be the effective_status of the versions')
end

test 'pages can calculate a status even when some versions have no status' do
page = Page.create(url: 'https://example.gov/')
page.versions.create(capture_time: Time.now - 12.days)
Expand Down

0 comments on commit c4ce127

Please sign in to comment.