From 0d8030b8611fdef55109950e3005b71742938ba6 Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Thu, 19 Jan 2023 11:48:28 -0800 Subject: [PATCH] Resetting page titles should consider old versions (#1065) I messed up a bunch of page titles in #1061 because I overlooked the fact that `Version#sync_page_title` only functioned if called on the latest version. This fixes the issue by moving all the meaningful logic about where and when to grab a title to `Page#update_page_title` and changes `Version#sync_page_title` to just call that, but with an argument that tells it to only look forward from the version's capture time. That's not *exactly* the same behavior for the Version method, but gets us effectively the same result. This should also make the migration run faster. --- app/models/page.rb | 15 ++++++++++----- app/models/version.rb | 10 +--------- test/models/page_test.rb | 11 ++++++++++- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/app/models/page.rb b/app/models/page.rb index 7e273672..30c4c1f0 100644 --- a/app/models/page.rb +++ b/app/models/page.rb @@ -247,16 +247,21 @@ def update_page_title(from_time = nil) new_title = nil candidates.each do |version| - new_title = version.sync_page_title - break if new_title + if version.title.present? && version.status_ok? + new_title = version.title + break + end end # Fall back to the filename from the page's URL. - if new_title.blank? + if new_title.blank? && title.blank? filename = /\/([^\/]+)\/?$/.match(url).try(:[], 1) - clean_name = CGI.unescape(filename || '') - self.update(title: clean_name) + new_title = CGI.unescape(filename || '') end + + + self.update(title: new_title) if new_title.present? + new_title end protected diff --git a/app/models/version.rb b/app/models/version.rb index 30528bba..3a9c9913 100644 --- a/app/models/version.rb +++ b/app/models/version.rb @@ -213,15 +213,7 @@ def status_ok?(strict: false) end def sync_page_title - if title.present? && status_ok? - most_recent_capture_time = page.latest.capture_time - if most_recent_capture_time.nil? || most_recent_capture_time <= capture_time - page.update(title:) - return title - end - end - - nil + page.update_page_title(capture_time) end private diff --git a/test/models/page_test.rb b/test/models/page_test.rb index eca8e1b0..e9798966 100644 --- a/test/models/page_test.rb +++ b/test/models/page_test.rb @@ -52,7 +52,7 @@ class PageTest < ActiveSupport::TestCase assert_equal('Page One', page.title, 'The page title should not sync with the incoming version if it is an error status') end - test "page title should should use URL if there's no valid version" do + test "page title should use URL if there's no valid version" do page = Page.create(url: 'http://no-title.com/my/special+page.html', status: 404) page.versions.create(capture_time: '2017-03-05T00:00:00Z', status: 404, title: '') page.update_page_title @@ -67,6 +67,15 @@ class PageTest < ActiveSupport::TestCase assert_equal('no-title.com', page2.title) end + test 'update_page_title should keep looking back in time if latest version has no title' do + page = Page.create(url: 'http://no-title.com/my/special+page.html', status: 404) + page.versions.create(capture_time: '2017-03-06T00:00:00Z', status: 200, title: '') + page.versions.create(capture_time: '2017-03-05T00:00:00Z', status: 200, title: 'Good Title') + page.update_page_title + + assert_equal('Good Title', page.title) + end + test 'can add many maintainer models to a page' do pages(:home_page).add_maintainer(maintainers(:epa)) pages(:home_page).add_maintainer(maintainers(:doi))