Skip to content

Commit 80be19c

Browse files
Update to incrementally process records
1 parent 72f69bc commit 80be19c

File tree

15 files changed

+165
-674
lines changed

15 files changed

+165
-674
lines changed

.env.example

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
ALGOLIA_APP_ID=
2-
ALGOLIA_ADMIN_KEY=
3-
API_DOCS_PATH=../ember-jsonapi-docs/tmp
4-
GUIDES_DOCS_PATH=../guides-app/dist/content
5-
DEBUG=false
6-
DRIVER=algolia
1+
GUIDES_DOCS_PATH="../guides-app/dist/content"
2+
ALGOLIA_APP_ID=""
3+
ALGOLIA_ADMIN_KEY=""
74
AWS_ACCESS_KEY=""
85
AWS_SECRET_KEY=""

README.md

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,27 @@
55
### Setup
66

77
1. `cp .env.example .env` - Copy the example environment configuration
8-
2. Update the Algolia .env variables and path to the root of the generated API docs
8+
2. Update the Algolia .env variables and path to the root of the generated API docs (only needed for populating indexes. use the `-j` flag to write to disk during development)
99
3. `yarn install` - Install dependencies
1010

1111
### Indexing API
1212

13-
Because of AWS rate limits, this project currently depends on having the API documentation downloaded locally. To generate the JSON files locally, follow the excellent documentation over at [ember-jsonapi-docs](https://github.com/ember-learn/ember-jsonapi-docs#running-the-app).
13+
Use any valid AWS tokens to setup the `AWS_ACCESS_KEY` & `AWS_SECRET_KEY` to download the json api docs.
1414

15-
Once generated, use the following command to reindex algolia:
15+
Once generated, use the following command to re-index algolia:
1616
`yarn start -p api`
1717

1818
### Indexing Guides
1919

2020
Guides json files are currently not stored somewhere, because they are built on demand using [broccoli-static-site-json](https://github.com/stonecircle/broccoli-static-site-json) in [guides-app](https://github.com/ember-learn/guides-app).
2121
To Generate the JSON files locally, clone [guides-app](https://github.com/ember-learn/guides-app), and run `ember build`.
2222

23-
Once generated, use the following command to reindex algolia:
23+
Once generated, use the following command to re-index algolia:
2424
`yarn start -p guides`
2525

26-
2726
## .env variables
2827

29-
1. `ALGOLIA_APP_ID` - The Algolia application ID, found in "API Keys" section of the Algolia dashboard
30-
2. `ALGOLIA_ADMIN_KEY` - The Algolia admin key, found in "API Keys" section of the Algolia dashboard
31-
3. `API_DOCS_PATH` - The path to the root of the built documentation from [ember-jsonapi-docs](https://github.com/ember-learn/ember-jsonapi-docs#running-the-app)
32-
3. `GUIDES_DOCS_PATH` - The path to the root of the built documentation from [guides-app](https://github.com/ember-learn/guides-app)
33-
4. `DEBUG` - Outputs helpful debug information
34-
5. `DRIVER` - Controls the type of the script's output. Available options: `algolia`, `json`. `json` can be helpful for debugging the output of the script without wasting any indexing operations on `algolia`.
28+
1. `GUIDES_DOCS_PATH` - The path to the root of the built documentation from [guides-app](https://github.com/ember-learn/guides-app)
29+
2. `ALGOLIA_APP_ID` - The Algolia application ID, found in "API Keys" section of the Algolia dashboard
30+
3. `ALGOLIA_ADMIN_KEY` - The Algolia admin key, found in "API Keys" section of the Algolia dashboard
31+
4. `AWS_ACCESS_KEY` & `AWS_SECRET_KEY` - Any valid AWS token that can be used to read our public json docs

index.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ program
1313
'-c, --clear-index',
1414
'Whether indexes of the project should be cleared while processing'
1515
)
16+
.option('-j, --json-driver', 'Use the json driver instead of algolia')
1617

1718
program.on('--help', function() {
1819
console.log(`
@@ -26,10 +27,10 @@ program.parse(process.argv)
2627

2728
switch (program.project) {
2829
case 'guides':
29-
runGuides()
30+
runGuides(program.clearIndex, program.jsonDriver)
3031
break
3132
case 'api':
32-
runApi(program.clearIndex)
33+
runApi(program.clearIndex, program.jsonDriver)
3334
break
3435
default:
3536
throw new Error('Invalid --project property')

lib/api-docs-sync.js

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,26 @@
1-
import { Promise, all as waitForAllPromises } from 'bluebird'
2-
import S3 from 's3'
3-
import ora from 'ora'
4-
import humanSize from 'human-size'
51
import http from 'http'
62
import https from 'https'
3+
import humanSize from 'human-size'
4+
import ora from 'ora'
5+
import S3 from 's3'
76

87
// To increase s3's download & upload dir perf
98
http.globalAgent.maxSockets = https.globalAgent.maxSockets = 30
109

1110
const { AWS_ACCESS_KEY, AWS_SECRET_KEY } = process.env
1211

1312
const client = S3.createClient({
14-
s3Options: { accessKeyId: AWS_ACCESS_KEY, secretAccessKey: AWS_SECRET_KEY }
13+
s3Options: { accessKeyId: AWS_ACCESS_KEY, secretAccessKey: AWS_SECRET_KEY },
1514
})
1615

1716
const jsonDocsDirDownloadOptions = {
1817
localDir: 'tmp/json-docs',
19-
s3Params: { Bucket: 'api-docs.emberjs.com', Prefix: 'json-docs' }
18+
s3Params: { Bucket: 'api-docs.emberjs.com', Prefix: 'json-docs' },
2019
}
2120

2221
let revDocsDirDownloadOptions = {
2322
localDir: 'tmp/rev-index',
24-
s3Params: { Bucket: 'api-docs.emberjs.com', Prefix: 'rev-index' }
23+
s3Params: { Bucket: 'api-docs.emberjs.com', Prefix: 'rev-index' },
2524
}
2625

2726
const syncDir = options => {
@@ -51,8 +50,8 @@ const syncDir = options => {
5150
}
5251

5352
export default function downloadExistingDocsToLocal() {
54-
return waitForAllPromises([
53+
return Promise.all([
5554
syncDir(jsonDocsDirDownloadOptions),
56-
syncDir(revDocsDirDownloadOptions)
55+
syncDir(revDocsDirDownloadOptions),
5756
])
5857
}

lib/api.js

Lines changed: 42 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,38 @@
11
require('dotenv').config()
22

3-
import { all, resolve } from 'bluebird'
3+
import { readJsonSync } from 'fs-extra'
4+
import { difference } from 'lodash-es'
45
import { compare as compareSemVers } from 'semver'
5-
import { difference } from 'lodash'
6-
7-
import logger from './utils/logger'
8-
import drivers from './drivers'
9-
import { readTmpFileFactory, readTmpFileAsyncFactory } from './utils/fs'
10-
import schemas from './schemas'
116
import downloadApiDocs from './api-docs-sync'
12-
13-
// Get 'readTmpFile' and 'readTmpFileAsync' bound by 'api'
14-
const PROJECT_TYPE = 'api'
15-
const readTmpFile = readTmpFileFactory(PROJECT_TYPE)
16-
const readTmpFileAsync = readTmpFileAsyncFactory(PROJECT_TYPE)
17-
18-
const { DRIVER } = process.env
19-
20-
const SelectedDriver = drivers[DRIVER]
7+
import algoliaDriver from './drivers/algolia'
8+
import jsonDriver from './drivers/json'
9+
import schemas from './schemas'
2110

2211
const apiIndexes = ['modules', 'classes', 'methods', 'versions']
2312

24-
export async function run(clearIndex = false) {
25-
apiIndexes.map(SelectedDriver.init)
13+
export async function run(clearIndex = false, useJsonDriver = false) {
14+
let driver = useJsonDriver ? jsonDriver : algoliaDriver
15+
16+
apiIndexes.map(driver.init)
2617

2718
if (clearIndex) {
28-
await all(apiIndexes.map(SelectedDriver.clear))
19+
apiIndexes.map(driver.clear)
2920
}
3021

3122
await downloadApiDocs()
3223

33-
await all([processDocs('ember'), processDocs('ember-data')])
24+
await Promise.all([
25+
processDocs(driver, 'ember'),
26+
processDocs(driver, 'ember-data'),
27+
])
3428
}
3529

36-
async function processDocs(project) {
37-
let prevIndexedVersions = await SelectedDriver.getPreviouslyIndexedVersions(
38-
project
39-
)
30+
async function processDocs(driver, project) {
31+
let prevIndexedVersions = await driver.getPreviouslyIndexedVersions(project)
4032

4133
const {
42-
meta: { availableVersions }
43-
} = await readTmpFileAsync(`rev-index/${project}.json`)
34+
meta: { availableVersions },
35+
} = readJsonSync(`./tmp/rev-index/${project}.json`)
4436

4537
let versionsToProcess = difference(availableVersions, prevIndexedVersions)
4638

@@ -60,78 +52,59 @@ async function processDocs(project) {
6052
)
6153
// Run the schema against all data stored
6254
.map(mapDataForVersion)
63-
// Write out to selected driver.
64-
.map(writeToDriver)
55+
.map(content => writeToDriver(driver, content))
56+
57+
let versions = [...prevIndexedVersions, ...versionsToProcess].sort(
58+
compareSemVers
59+
)
6560

66-
await SelectedDriver.write(
61+
await driver.write(
6762
'versions',
68-
[
69-
{
70-
id: project,
71-
name: project,
72-
versions: [...prevIndexedVersions, ...versionsToProcess].sort(
73-
compareSemVers
74-
)
75-
}
76-
],
63+
[{ id: project, name: project, versions }],
7764
project
7865
)
7966
} catch (err) {
8067
console.log('Error:: ', err)
8168
}
8269
}
8370

84-
/**
85-
* Read index file for version
86-
*
87-
* @param {string} version - Version of library to index
88-
* @param {string} libName - Name of library currently indexing
89-
* @returns {Promise} - Returns found index file json
90-
*/
9171
function readIndexFileForVersion(version, libName) {
92-
const emberVersionJSONPath = `rev-index/${libName}-${version}.json`
93-
logger.logBlue(`OPENING:: ${emberVersionJSONPath}`)
94-
return readTmpFile(emberVersionJSONPath)
72+
const emberVersionJSONPath = `./tmp/rev-index/${libName}-${version}.json`
73+
console.debug(`OPENING:: ${emberVersionJSONPath}`)
74+
return readJsonSync(emberVersionJSONPath)
9575
}
9676

97-
/**
98-
* Fetch public modules and classes for version
99-
*
100-
* @param {object} versionIndexObject - The index.json file for a given version
101-
* @param {string} libName - The name of the library to index
102-
* @returns {object} - Extended version object with public modules & classes
103-
*/
10477
function fetchPublicModuleClassesForVersion(versionIndexObject, libName) {
10578
const publicModules = versionIndexObject.data.relationships[
10679
'public-modules'
10780
].data.map(module => {
10881
// Module names are uri encoded
10982
const id = encodeURIComponent(module.id)
110-
const modulePath = `json-docs/${libName}/${
83+
const modulePath = `./tmp/json-docs/${libName}/${
11184
versionIndexObject.data.attributes.version
11285
}/modules/${versionIndexObject.meta.module[id]}.json`
11386

114-
logger.logBlue(`OPENING:: ${modulePath}`)
115-
return readTmpFile(modulePath)
87+
console.debug(`OPENING:: ${modulePath}`)
88+
return readJsonSync(modulePath)
11689
})
11790

11891
const publicClasses = versionIndexObject.data.relationships[
11992
'public-classes'
12093
].data.map(classObj => {
12194
// Class names are uri encoded
12295
const id = encodeURIComponent(classObj.id)
123-
const classPath = `json-docs/${libName}/${
96+
const classPath = `./tmp/json-docs/${libName}/${
12497
versionIndexObject.data.attributes.version
12598
}/classes/${versionIndexObject.meta.class[id]}.json`
12699

127-
logger.logBlue(`OPENING:: ${classPath}`)
128-
return readTmpFile(classPath)
100+
console.debug(`OPENING:: ${classPath}`)
101+
return readJsonSync(classPath)
129102
})
130103

131104
return {
132105
version: versionIndexObject,
133106
publicModules,
134-
publicClasses
107+
publicClasses,
135108
}
136109
}
137110

@@ -151,44 +124,29 @@ function mapDataForVersion(versionObject) {
151124
...versionObject,
152125
methods: [...methods, ...staticFunctions],
153126
publicModules: versionObject.publicModules.map(schemas.moduleSchema),
154-
publicClasses: versionObject.publicClasses.map(schemas.classSchema)
127+
publicClasses: versionObject.publicClasses.map(schemas.classSchema),
155128
}
156129
}
157130

158-
/**
159-
* Writes out to the given driver
160-
*
161-
* @param versionObject - Object version to write out
162-
*/
163-
function writeToDriver(versionObject) {
131+
function writeToDriver(driver, versionObject) {
164132
const { id } = versionObject.version.data
165133

166134
let tokens = id.split('-')
167135
let version = tokens.pop()
168136
let projectName = tokens.join('-')
169137

170-
logger.logGreen(
138+
console.info(
171139
`version: ${id}, public classes: ${
172140
versionObject.publicClasses.length
173141
}, public modules: ${versionObject.publicModules.length}, methods: ${
174142
versionObject.methods.length
175143
}`
176144
)
177145

178-
return all([
179-
SelectedDriver.write(
180-
'modules',
181-
versionObject.publicModules,
182-
projectName,
183-
version
184-
),
185-
SelectedDriver.write(
186-
'classes',
187-
versionObject.publicClasses,
188-
projectName,
189-
version
190-
),
191-
SelectedDriver.write('methods', versionObject.methods, projectName, version)
146+
return Promise.all([
147+
driver.write('modules', versionObject.publicModules, projectName, version),
148+
driver.write('classes', versionObject.publicClasses, projectName, version),
149+
driver.write('methods', versionObject.methods, projectName, version),
192150
])
193151
}
194152

0 commit comments

Comments
 (0)