From 24db8df8d1b22c522897238cf59d95697949cfb4 Mon Sep 17 00:00:00 2001 From: Shane Curcuru Date: Mon, 5 Feb 2024 09:41:25 -0500 Subject: [PATCH] Refactor attr name out --- assets/ruby/sponsor_utils.rb | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/assets/ruby/sponsor_utils.rb b/assets/ruby/sponsor_utils.rb index 826f39e..fe074e8 100644 --- a/assets/ruby/sponsor_utils.rb +++ b/assets/ruby/sponsor_utils.rb @@ -51,7 +51,7 @@ def normalize_href(base, href) 'fourth' => '.Silver-sponsors a', 'fifth' => '.Bronze-sponsors a' } - DRUPAL_SPONSOR_CSS = { + DRUPAL_SPONSOR_CSS = { # TODO sponsor links are on separate page 'first' => '.sponsors--signature a', 'second' => '.view-display-id-attachment_6 a', 'third' => '.view-display-id-attachment_3 a', @@ -59,19 +59,38 @@ def normalize_href(base, href) 'community' => '.view-display-id-attachment_9 a' } DRUPAL_SPONSOR_PAGE = '.org-link a' + PYTHON_SPONSOR_CSS = { # TODO Uses ethicalads.io to disintermediate sponsor links/logos; requires custom processing + 'first' => 'div[title="visionary Sponsors"] div[data-internal-year]', + 'second' => 'div[title="sustainability Sponsors"] div[data-internal-year]', + 'third' => 'div[title="maintaining Sponsors"] div[data-internal-year]', + 'fourth' => 'div[title="contributing Sponsors"] div[data-internal-year]', + 'fifth' => 'div[title="supporting Sponsors"] div[data-internal-year]', + 'sixth' => 'div[title="partner Sponsors"] div[data-internal-year]', + 'seventh' => 'div[title="participating Sponsors"] div[data-internal-year]', + 'eighth' => 'div[title="associate Sponsors"] div[data-internal-year]' + } + FOUNDATION_MAP = { + 'asf' => [ASF_SPONSOR_CSS, 'href'], + 'numfocus' => [NUMFOCUS_SPONSOR_CSS, 'href'], + 'osgeo' => [OSGEO_SPONSOR_CSS, 'href'], + 'drupal' => [DRUPAL_SPONSOR_CSS, 'href'], + 'python' => [PYTHON_SPONSOR_CSS, 'id'] + } # Scrape sponsor listing defined by css selectors # @param io input stream of html to parse + # @param shortname of foundation map to parse # @return hash of sponsors by approximate map-defined levels - def scrape_bycss(io, baseurl, selectors) + def scrape_bycss(io, foundation) sponsors = {} + cssmap = FOUNDATION_MAP.fetch(foundation, nil) doc = Nokogiri::HTML5(io) body = doc.xpath('/html/body') - selectors.each do | key, selector | + cssmap[0].each do | key, selector | nodelist = body.css(selector) sponsors[key] = [] nodelist.each do | node | - sponsors[key] << node['href'] + sponsors[key] << node[cssmap[1]] end end return sponsors @@ -81,9 +100,9 @@ def scrape_bycss(io, baseurl, selectors) # ### #### ##### ###### # Main method for command line use if __FILE__ == $PROGRAM_NAME - filename = '../../../sponsors-drupal.html' + filename = '../../../sponsors-asf.html' baseurl = '' io = File.open(filename) - sponsors = SponsorUtils.scrape_bycss(io, baseurl, SponsorUtils::DRUPAL_SPONSOR_CSS) + sponsors = SponsorUtils.scrape_bycss(io, 'asf') puts JSON.pretty_generate(sponsors) end