Skip to content

Commit

Permalink
Fetch GitHub release data again (electron#332)
Browse files Browse the repository at this point in the history
* update release data every four hours at most

* re-enable release data fetching

* fetch releases and readmes in separate scripts
  • Loading branch information
zeke authored Oct 19, 2017
1 parent 2f859b6 commit 424b736
Show file tree
Hide file tree
Showing 9 changed files with 32,856 additions and 3,679 deletions.
11 changes: 11 additions & 0 deletions lib/apps-with-github-repos.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
const apps = require('./raw-app-list')()
const parseGitUrl = require('github-url-to-object')

module.exports = apps
.filter(app => {
// inherit repository from website if possible
if (!app.repository && parseGitUrl(app.website)) app.repository = app.website
if (!app.repository) return false
if (!parseGitUrl(app.repository)) return false
return true
})
1,057 changes: 1,057 additions & 0 deletions meta/readmes.json

Large diffs are not rendered by default.

35,186 changes: 31,619 additions & 3,567 deletions meta/releases.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"build:dates": "node script/dates",
"build:colors": "node script/colors",
"build:categories": "node script/categories",
"xbuild:releases": "node script/releases",
"build:releases": "node script/releases",
"build:readmes": "node script/readmes",
"build:pack": "node script/pack",
"prepack": "check-for-leaks",
"prepush": "check-for-leaks",
Expand All @@ -31,6 +32,7 @@
"author": "Zeke Sikelianos <zeke@sikelianos.com> (http://zeke.sikelianos.com)",
"license": "MIT",
"devDependencies": {
"bottleneck": "^1.16.0",
"chai": "^3.5.0",
"check-for-leaks": "^1.0.2",
"cheerio": "^1.0.0-rc.2",
Expand All @@ -41,6 +43,7 @@
"get-image-colors": "^1.8.1",
"github": "^9.2.0",
"github-url-to-object": "^4.0.2",
"human-interval": "^0.1.6",
"husky": "^0.14.3",
"image-size": "^0.5.0",
"inquirer": "^2.0.0",
Expand Down
6 changes: 3 additions & 3 deletions script/pack.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const yaml = require('yamljs')
const dates = require('../meta/dates.json')
const colors = require('../meta/colors.json')
const releases = require('../meta/releases.json')
const readmes = require('../meta/readmes.json')
const apps = []

fs.readdirSync(path.join(__dirname, '../apps'))
Expand All @@ -23,14 +24,13 @@ fs.readdirSync(path.join(__dirname, '../apps'))
date: dates[slug],
iconColors: colors[slug].palette
},
releases[slug]
releases[slug],
readmes[slug]
)

app.goodColorOnWhite = app.goodColorOnWhite || colors[slug].goodColorOnWhite
app.goodColorOnBlack = app.goodColorOnBlack || colors[slug].goodColorOnBlack

if (!app.latestRelease) app.latestRelease = false

apps.push(app)
})

Expand Down
78 changes: 78 additions & 0 deletions script/readmes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
const MAX_CONCURRENCY = Number(process.env.MAX_CONCURRENCY) || 4 // simultaneous open web requests
const README_CACHE_TTL = require('human-interval')(process.env.README_CACHE_TTL || '4 hours')

const fs = require('fs')
const path = require('path')
const Bottleneck = require('bottleneck')
const github = require('../lib/github')
const cheerio = require('cheerio')
const parseGitUrl = require('github-url-to-object')

const outputFile = path.join(__dirname, '../meta/readmes.json')
const oldReadmeData = require(outputFile)
const output = {}
const limiter = new Bottleneck(MAX_CONCURRENCY)

const apps = require('../lib/raw-app-list')()
const appsWithRepos = require('../lib/apps-with-github-repos')
const appsToUpdate = appsWithRepos.filter(app => {
const oldData = oldReadmeData[app.slug]
if (!oldData) return true
const oldDate = new Date(oldData.readmeFetchedAt || null).getTime()
return oldDate + README_CACHE_TTL < Date.now()
})

console.log(`${appsWithRepos.length} of ${apps.length} apps have a GitHub repo.`)
console.log(`${appsToUpdate.length} of those ${appsWithRepos.length} have missing or outdated README data.`)

appsToUpdate.forEach(app => {
limiter.schedule(getReadme, app)
})

limiter.on('idle', () => {
fs.writeFileSync(outputFile, JSON.stringify(output, null, 2))
console.log(`Done fetching README files.\nWrote ${outputFile}`)
process.exit()
})

function getReadme (app) {
const {user: owner, repo} = parseGitUrl(app.repository)
const opts = {
owner: owner,
repo: repo,
headers: {
Accept: 'application/vnd.github.v3.html'
}
}

return github.repos.getReadme(opts)
.then(release => {
console.log(`${app.slug}: got latest README`)
output[app.slug] = {
readmeCleaned: cleanReadme(release.data, app),
readmeOriginal: release.data,
readmeFetchedAt: new Date()
}
})
.catch(err => {
console.error(`${app.slug}: no README found`)
output[app.slug] = {
readmeOriginal: null,
readmeFetchedAt: new Date()
}
if (err.code !== 404) console.error(err)
})
}

function cleanReadme (readme, app) {
const $ = cheerio.load(readme)

const $relativeImages = $('img').not('[src^="http"]')
if ($relativeImages.length) {
console.log(`${app.slug}: updating ${$relativeImages.length} relative image URLs`)
$relativeImages.each((i, img) => {
$(img).attr('src', `${app.repository}/raw/master/${$(img).attr('src')}`)
})
}
return $('body').html()
}
132 changes: 43 additions & 89 deletions script/releases.js
Original file line number Diff line number Diff line change
@@ -1,108 +1,62 @@
const MAX_CONCURRENCY = Number(process.env.MAX_CONCURRENCY) || 4 // simultaneous open web requests
const RELEASE_CACHE_TTL = require('human-interval')(process.env.RELEASE_CACHE_TTL || '4 hours')

const fs = require('fs')
const path = require('path')
const Bottleneck = require('bottleneck')
const github = require('../lib/github')
const cheerio = require('cheerio')
const parseGitUrl = require('github-url-to-object')
const Duration = require('duration')
const downloadExtensions = [
'.deb',
'.dmg',
'.exe',
'.gz',
'.rpm',
'.zip'
]
const apps = require('../lib/raw-app-list')()
.filter(app => {
if (!app.repository) {
if (parseGitUrl(app.website)) {
console.log(`${app.name} website is a giturl: ${app.website}`)
app.repository = app.website
}
}
if (!app.repository) return false
if (!parseGitUrl(app.repository)) return false
let age = new Duration(new Date(app.releases_fetched_at || null), new Date())
if (age.hours < 24) return false
return true
})

const outputFile = path.join(__dirname, '../meta/releases.json')
const oldReleaseData = require(outputFile)
const output = {}
let i = -1
const limiter = new Bottleneck(MAX_CONCURRENCY)

// Don't fetch release data too often
const outputFileAgeInHours = (new Date() - new Date(fs.statSync(outputFile).mtime)) / 1000 / 60
if (outputFileAgeInHours < 1) {
console.log('Release data was updated less than an hour ago; skipping')
const apps = require('../lib/raw-app-list')()
const appsWithRepos = require('../lib/apps-with-github-repos')
const appsToUpdate = appsWithRepos.filter(app => {
const oldData = oldReleaseData[app.slug]
if (!oldData) return true
const oldDate = new Date(oldData.latestReleaseFetchedAt || null).getTime()
return oldDate + RELEASE_CACHE_TTL < Date.now()
})

console.log(`${appsWithRepos.length} of ${apps.length} apps have a GitHub repo.`)
console.log(`${appsToUpdate.length} of those ${appsWithRepos.length} have missing or outdated release data.`)

appsToUpdate.forEach(app => {
limiter.schedule(getLatestRelease, app)
})

limiter.on('idle', () => {
fs.writeFileSync(outputFile, JSON.stringify(output, null, 2))
console.log(`Done fetching release data.\nWrote ${outputFile}`)
process.exit()
} else {
console.log('Fetching release data for apps that have a GitHub repo...')
}

go()

function go () {
++i
})

if (i === apps.length) {
fs.writeFileSync(outputFile, JSON.stringify(output, null, 2))
process.exit()
}

const app = apps[i]
function getLatestRelease (app) {
const {user: owner, repo} = parseGitUrl(app.repository)
const gitHubOptions = {
const opts = {
owner: owner,
repo: repo,
headers: {
Accept: 'application/vnd.github.v3.html'
}
}

github.repos.getLatestRelease(gitHubOptions)
.then(release => {
console.log(app.slug)
output[app.slug] = {
latestRelease: release.data || false,
release_fetched_at: new Date()
}
if (release.data) {
output[app.slug].latestRelease = {
releaseUrl: release.data.html_url,
tagName: release.data.tag_name,
releaseName: release.data.name,
releaseNotes: release.data.body_html
return github.repos.getLatestRelease(opts)
.then(release => {
console.log(`${app.slug}: got latest release`)
output[app.slug] = {
latestRelease: release.data,
latestReleaseFetchedAt: new Date()
}
output[app.slug].latestRelease.downloads = release.data.assets.filter((asset) => {
let fileExtension = path.extname(asset.browser_download_url)
return (downloadExtensions.indexOf(fileExtension) !== -1)
}).map((asset) => {
return Object.assign({
fileName: asset.name,
fileUrl: asset.browser_download_url
})
})
}
return github.repos.getReadme(gitHubOptions)
}).catch(() => {
output[app.slug] = {
latestRelease: false
}
return github.repos.getReadme(gitHubOptions)
}).then((response) => {
let readme = response.data
let $ = cheerio.load(readme)

const $relativeImages = $('img').not('[src^="http"]')
if ($relativeImages.length) {
console.log(`Updating relative image URLs in ${app.name}`)
$relativeImages.each((i, img) => {
$(img).attr('src', `${app.repository}/raw/master/${$(img).attr('src')}`)
})
}

output[app.slug].originalReadme = readme
output[app.slug].readme = $('body').html()
go()
})
}).catch(err => {
console.error(`${app.slug}: no releases found`)
output[app.slug] = {
latestRelease: null,
latestReleaseFetchedAt: new Date()
}
if (err.code !== 404) console.error(err)
})
}
54 changes: 35 additions & 19 deletions test/machine-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,36 +64,52 @@ describe('machine-generated app data (exported by the module)', () => {
expect(hyper.goodColorOnBlack).to.eq('#FFF')
})

it('sets a `releases` array on every app', function () {
return this.skip()
// apps.forEach(app => {
// expect(app.releases).to.be.an('array', app.slug)
// })

// const app = apps.find(app => app.slug === 'hyper')
// expect(app).to.be.an('object')
// expect(app.releases.length).to.be.above(12)
// expect(app.releases[5].assets.length).to.be.above(4)
describe('releases', () => {
const releaseApps = apps.filter(app => app.latestRelease)

it('collects latest GitHub release data for apps that have it', () => {
expect(releaseApps.length).to.be.above(50)
})

it('sets `latestRelease` on apps with GitHub repos that use Releases', () => {
expect(releaseApps.every(app => app.latestRelease)).to.eq(true)
})

it('sets `latestReleaseFetchedAt`', () => {
expect(releaseApps.every(app => app.latestReleaseFetchedAt)).to.eq(true)
})
})

it('adds readme data to apps with GitHub releases', () => {
const readmeApps = apps.filter(app => app.readme)
expect(readmeApps.length).to.be.above(10)
describe('readmes', () => {
const readmeApps = apps.filter(app => app.readmeCleaned)

// make sure every app retains its original unmodified readme
expect(readmeApps.every(app => app.originalReadme.length > 0)).to.eq(true)
it('collects READMEs for apps with GitHub repos', () => {
expect(readmeApps.length).to.be.above(50)
})

it('sets `readmeCleaned`', () => {
expect(readmeApps.every(app => app.readmeCleaned.length > 0)).to.eq(true)
})

it('sets `readmeOriginal`', () => {
expect(readmeApps.every(app => app.readmeOriginal.length > 0)).to.eq(true)
})

it('sets `readmeFetchedAt`', () => {
expect(readmeApps.every(app => app.readmeFetchedAt.length > 0)).to.eq(true)
})
})

it('rewrites relative image source tags', () => {
const beaker = apps.find(app => app.slug === 'beaker-browser')
const local = '<img src="build/icons/256x256.png"'
const remote = '<img src="https://github.com/beakerbrowser/beaker/raw/master/build/icons/256x256.png"'

expect(beaker.originalReadme).to.include(local)
expect(beaker.originalReadme).to.not.include(remote)
expect(beaker.readmeOriginal).to.include(local)
expect(beaker.readmeOriginal).to.not.include(remote)

expect(beaker.readme).to.not.include(local)
expect(beaker.readme).to.include(remote)
expect(beaker.readmeCleaned).to.not.include(local)
expect(beaker.readmeCleaned).to.include(remote)
})
})

Expand Down

0 comments on commit 424b736

Please sign in to comment.