Skip to content

Commit

Permalink
Wire in the python doc build and related runme work
Browse files Browse the repository at this point in the history
* Change to making a version string: it used to read an existing
  version before re-creating `BuildArtifacts`, so the old version would
  still get used.  This is now different: the version is properly
  re-made when the directory is recreated.  This will have a negative
  impact on quick builds after a commit, but without it things can be
  confusing when checking out a different git branch.

* Pip-install `sphinx`, `sphinx_rtd_theme`, and `recommonmark` in
  developer environments for the sphinx doc build.

* Add a build script for the python docs, and wire it into runme.

* Make the generated rst files go in a different BuildArtifacts
  directory, so the actual docs that should be copied are all in a
  single directory.

* Drop doc references that don't exist.

* Some tweaking of `conf.py`.

* Streamline the build a bit, making the python work more similar to the
  Scala work and more:
  - always create the pip package instead of when `should publish
    pip` (which now controls only the actual publishing);
  - do that with the pydocs step right after the sbt step, so there's a
    single chunk of python work;
  - move the python tests to after the basic publishing, making it
    possible to grab the results for debugging in case of failures.

* Add `$MML_LATEST` which is set to `yes` only when we're building a
  tagged version *and* that version is the most recent one on `master`.
  Use it to determine whether we should label the current build as
  latest (for docker hub tag and published docs directory).

* New `azblob` convenience function for blob operations in
  `$MAIN_CONTAINER`.

* New `docs` tag for publishing, and new `$DOCS_CONTAINER`, `$DOCS_URL`
  environment variables.

* Add a `pydocs/publish` script (not really "py") to copy the generated
  documentation to the `docs` container.  This is a complete hack
  because of several limitations of the CLI, and because it's making
  `foo/` URLs work (copied from `foo/index.html`) and also `foo` (a
  redirection HTML to `foo/`).

* Add `docs` to what `container-gc` knows about.  (But deleting a
  version is likely going to be horrifyingly slow.)

* Add `src/project/top-doc.html` with some HTML to show on the toplevel
  initial page.
  • Loading branch information
elibarzilay committed Jul 11, 2017
1 parent cf8fb19 commit 882f4c2
Show file tree
Hide file tree
Showing 16 changed files with 279 additions and 76 deletions.
5 changes: 2 additions & 3 deletions src/codegen/src/main/scala/CodeGen.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,14 @@ object CodeGen {
| outputDir: $outputDir
| toZipDir: $toZipDir
| pyTestDir: $pyTestDir
| docDir: $docDir""".stripMargin)
| pyDocDir: $pyDocDir""".stripMargin)
val roots = // note: excludes the toplevel project
if (!rootsFile.exists) sys.error(s"Could not find roots file at $rootsFile")
else readFile(rootsFile, _.getLines.toList).filter(_ != ".")
println("Creating temp folders")
toZipDir.mkdirs
pyTestDir.mkdirs
docDir.mkdirs
inDocDir.mkdirs
pyDocDir.mkdirs
println("Copy jar files to output directory")
copyAllFilesFromRoots(srcDir, roots, jarRelPath,
(Pattern.quote("-" + mmlVer + ".jar") + "$").r,
Expand Down
3 changes: 1 addition & 2 deletions src/codegen/src/main/scala/Config.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ object Config {
val toZipDir = new File(srcDir, "src/main/resources/mmlspark")
val zipFile = new File(outputDir, "mmlspark.zip")
val pyTestDir = new File(topDir, "TestResults/generated_pytests")
val docDir = new File(topDir, "BuildArtifacts/docs")
val inDocDir = new File(docDir, "source")
val pyDocDir = new File(topDir, "BuildArtifacts/pydocsrc")
val jarRelPath = "target/scala-" + sys.env("SCALA_VERSION")
val pyRelPath = "src/main/python"
val mmlVer = sys.env.getOrElse("MML_VERSION",
Expand Down
4 changes: 2 additions & 2 deletions src/codegen/src/main/scala/DocGen.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ object DocGen {
val pattern = "^[A-Z]\\w*[.]py$".r
val moduleString = allFiles(toZipDir, (f => pattern.findFirstIn(f.getName) != None))
.map(f => s" ${getBaseName(f.getName)}\n").mkString("")
writeFile(new File(inDocDir, "modules.rst"), rstFileLines(moduleString))
writeFile(new File(pyDocDir, "modules.rst"), rstFileLines(moduleString))

// Generate .rst file for each PySpark wrapper - for documentation generation
allFiles(toZipDir, (f => pattern.findFirstIn(f.getName) != None))
.foreach{x => writeFile(new File(inDocDir, getBaseName(x.getName) + ".rst"),
.foreach{x => writeFile(new File(pyDocDir, getBaseName(x.getName) + ".rst"),
contentsString(getBaseName(x.getName)))
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/project/build.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ object Extras {
def testsDir = file(env("TEST_RESULTS", "../TestResults"))
def mavenDir = artifactsDir / "packages" / "m2"
def docsDir = artifactsDir / "docs" / "scala"
val topDocHtml = file(".") / "project" / "top-doc.html"

def scalacOpts = Seq(
"-encoding", "UTF-8",
Expand Down Expand Up @@ -114,6 +115,7 @@ object Extras {
dependencyOverrides in ThisBuild ++= overrideLibs,
scalacOptions in ThisBuild ++= scalacOpts,
scalacOptions in (Compile, doc) += "-groups",
scalacOptions in (Compile, doc) ++= Seq("-doc-root-content", topDocHtml.getPath()),
// Don't run tests in parallel, and fork subprocesses for them
parallelExecution in (ThisBuild, Test) := false,
fork in (ThisBuild, Test) := true,
Expand Down
4 changes: 4 additions & 0 deletions src/project/top-doc.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
These are the API documentation pages for the Scala side of
<a href="https://github.com/Azure/mmlspark/">MMLSpark</a>.

See also the <a href="../pyspark/" target="_parent">pyspark level API documentation</a>.
9 changes: 6 additions & 3 deletions tools/config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ deftag extended
deftag e2e extended
deftag linuxonly
# Tag definitions for $PUBLISH
map deftag storage maven pip demo docker
map deftag storage maven pip docs demo docker

defvar -p SRCDIR "$BASEDIR/src"
defvar -p BUILD_ARTIFACTS "$BASEDIR/BuildArtifacts"
Expand Down Expand Up @@ -192,7 +192,8 @@ Conda.setup() {
_ ./bin/conda install --name "root" --no-update-deps --no-deps --yes \
--quiet --file "mmlspark-packages.spec"
if [[ "$BUILDMODE" != "runtime" ]]; then
./bin/pip install "xmlrunner" "wheel"
# xmlrunner: tests; wheel: pip builds; sphinx*, recommonmark: pydoc builds
./bin/pip install "xmlrunner" "wheel" "sphinx" "sphinx_rtd_theme" "recommonmark"
else
show section "Minimizing conda directory"
collect_log=2 _ ./bin/conda uninstall -y tk
Expand Down Expand Up @@ -231,7 +232,9 @@ CNTK.init() {
defvar STORAGE_CONTAINER "buildartifacts"
defvar STORAGE_URL "$(_main_url "$STORAGE_CONTAINER")"

# Container for maven/pip packages
# Container for docs and maven/pip packages
defvar DOCS_CONTAINER "docs"
defvar DOCS_URL "$(_main_url "$DOCS_CONTAINER")"
defvar MAVEN_CONTAINER "maven"
defvar -x MAVEN_URL "$(_main_url "$MAVEN_CONTAINER")"
defvar -d MAVEN_PACKAGE "com.microsoft.ml.spark:mmlspark_$SCALA_VERSION:<{MML_VERSION}>"
Expand Down
3 changes: 2 additions & 1 deletion tools/docker/build-env
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ echo 'PS1='\''\u:\w\$ '\' >> "/etc/skel/.bashrc"
useradd -c "Microsoft ML for Apache Spark" -U -d "$HOME" -m "$USER"
cd "$HOME"

# avoid running git
/mkenv/src/runme BUILDMODE=runtime INSTALLER_CACHE_DIR=/mkenv/cache \
MML_VERSION="???" MML_BUILD_INFO="???"
MML_VERSION="???" MML_BUILD_INFO="???" MML_LATEST="???"

chown -R "$USER:$USER" "$HOME"
cd /home
Expand Down
16 changes: 12 additions & 4 deletions tools/misc/container-gc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

. "$(dirname "${BASH_SOURCE[0]}")/../../runme"

types=(S M P)
types=(S M P D)
declare -A S=([container]="$STORAGE_CONTAINER"
[path]=""
[suffix]="/")
Expand All @@ -15,6 +15,10 @@ declare -A P=([container]="$PIP_CONTAINER"
[path]=""
[prefix]="mmlspark-"
[suffix]="-py2.py3-none-any.whl")
declare -A D=([container]="$DOCS_CONTAINER"
[path]=""
[suffix]=""
[listsuffix]="/")

set -e
shopt -s nullglob
Expand Down Expand Up @@ -71,9 +75,13 @@ get_versions_for() {
local IFS=$'\n\r'
Xs=($(IFS=""; azls "/${X[container]}/${X[path]}${X[path]:+/}" | \
while read -r l; do
l="${l#${X[prefix]}}"; l="${l%${X[suffix]}}"; echo "$l"
# [listsuffix] can override [suffix] for this listing
l="${l#${X[prefix]}}"; l="${l%${X[listsuffix]:-${X[suffix]}}}"
# ignore things that don't look like a version (eg, in docs)
if [[ "$l" = *[0-9].[0-9]* ]]; then echo "$l"; fi
done | sort -V))
IFS=" "
all+=("${Xs[@]}")
X[vers_]=" ${Xs[*]} "
}

Expand Down Expand Up @@ -226,9 +234,9 @@ do_requests() {
done
}

types_="${types[*]}"; types_="${types_// /}"
types_="${types[*]}"; types_="${types_// /}"; all=()
map get_versions_for "${types[@]}"
all=($(printf '%s\n' "${Ss[@]}" "${Ms[@]}" "${Ps[@]}" | sort -V -u))
all=($(printf '%s\n' "${all[@]}" | sort -V -u))

echo "Versions found: ${#all[@]}"

Expand Down
File renamed without changes.
35 changes: 35 additions & 0 deletions tools/pydocs/build
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env bash
# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.

. "$(dirname "${BASH_SOURCE[0]}")/../../runme" "$@"
main() (

show section "Building Python Documentation"

here="$(cd $(dirname "${BASH_SOURCE[0]}"); pwd)"
pydocsrc="$BUILD_ARTIFACTS/pydocsrc"

cd "$BUILD_ARTIFACTS/docs"
_rm "pyspark"; _md "pyspark"

show - "Copying source files"
srcs=( "$here/src"/* )
for f in "${srcs[@]}"; do t="$pydocsrc/$(basename "$f")"
if [[ -e "$t" ]]; then failwith "target file exists: $t"; else cp "$f" "$t"; fi
done

PATH+=":$CONDA_HOME/bin"
PYTHONPATH="$BUILD_ARTIFACTS/sdk/mmlspark.zip:$PYTHONPATH"
PYTHONPATH="$BASEDIR/src/src/main/resources/mmlspark:$PYTHONPATH"

_ -a sphinx-build -D version="${MML_VERSION%%+*}" -D release="$MML_VERSION" \
"$pydocsrc" "pyspark"
ret=$?

for f in "${srcs[@]}"; do rm -f "$pydocsrc/$(basename "$f")"; done

if (($ret)); then failwith "documentation build failure"; fi

)
__ main "$@"
115 changes: 115 additions & 0 deletions tools/pydocs/publish
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env bash
# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.

# publish [dirname | --top]
# Copies the built documentation in $BUILD_ARTIFACTS/docs to the docs container
# at the specified directory name which defaults to $MML_VERSION; use `--top` to
# copy it to the toplevel so it's the default docs content.

. "$(dirname "${BASH_SOURCE[0]}")/../../runme" "$@"
main() (

target="$1"; shift
if [[ -z "$target" ]]; then target="$MML_VERSION"; fi
if [[ "x$target" = "x--top" ]]; then
show section "Publishing docs to toplevel"; target=""
else
show section "Publishing docs to $target"
fi

default="index.html"
mime_types=(
# note: glob patterns are matched against the whole copied path, so they
# should always start with a `*`
"*.html;text/html"
"*.png;image/png"
"*.gif;image/gif"
"*.svg;image/svg+xml"
"*.js;application/javascript"
"*.css;text/css"
"*.ttf;application/font-sfnt"
"*.woff;application/font-woff"
"*.eot;application/vnd.ms-fontobject"
"*.txt;text/plain"
"*.doctree;application/octet-stream"
"*/.buildinfo;application/octet-stream"
"*/objects.inv;application/octet-stream"
"*/environment.pickle;application/octet-stream"
)

cd "$BUILD_ARTIFACTS/docs"
restore=$(shopt -p dotglob); shopt -s dotglob; files=( ** ); $restore

# Make $default files when missing
fst=1
for f in . "${files[@]}"; do
x="$f/$default"
if [[ ! -d "$f" || -e "$x" ]]; then continue; fi
if ((fst)); then show - "creating default $default files"; fst=0; fi
show - " $x"
{ echo "<html><body><pre style=\"font-size: 150%;\">"
for y in "$f"/*; do
u="$(basename "$y")"; if [[ -d "$y" ]]; then u+="/"; fi
if [[ "$u" = "$default" ]]; then continue; fi
echo "<a href=\"$u\">$u</u>"
done
echo "</pre></body></html>"
} > "$x"
files+=("$x")
done

len=${#files[@]}

# copy all files with a proper type
for mt in "${mime_types[@]}"; do
glob="${mt%;*}"; type="${mt##*;}"
show - "Copying $glob as $type"
for ((i=0; i<len; i++)); do
# mimics the matching that `upload-batch` with `--pattern` is doing
if [[ -n "${files[$i]}" && "${files[$i]}" = $glob ]]; then unset files[$i]; fi
done
collect_log=1 _ azblob upload-batch \
--source "." --destination "$DOCS_CONTAINER${target:+/}$target" \
--pattern "$glob" --content-type "$type"
done

# Deal with directories
show - "Making directory defaults"
tmp="/tmp/mmlbuild-$$"
if [[ -z "$target" ]]; then start=0; else start=-1; fi
for ((i=-1; i<len; i++)); do
if ((i<0)) # -1 for the top directory (can't do this for toplevel target)
then f="."; webdir="$MML_VERSION"; redirect="$MML_VERSION/"
else f="${files[$i]}"; webdir="$target${target:+/}$f"; redirect="${f##*/}/"; fi
if [[ -z "$f" || ! -d "$f" ]]; then continue; fi
unset files[$i]
if [[ ! -e "$f/$default" ]]; then continue; fi
# copy "foo/$default" also to "foo/"
collect_log=1 _ azblob upload \
--container "$DOCS_CONTAINER" --content-type "text/html" \
--file "$f/$default" --name "$webdir/" \
2> /dev/null
# create "foo" for redirections to "foo/"
show command "... html redirect to $redirect ... > $(qstr "$tmp")"
{ echo "<html><head>"
echo "<meta http-equiv=\"refresh\" content=\"0; url=$redirect\" />"
echo "</head><body>"
echo "Moved <a href=\"$redirect\">here</a>"
echo "</body></html>"
} > "$tmp"
collect_log=1 _ azblob upload \
--container "$DOCS_CONTAINER" --content-type "text/html" \
--file "$tmp" --name "$webdir" \
2> /dev/null
done
rm -f "$tmp"

if ((${#files[@]} > 0)); then
echo "Leftovers: ${#files[@]}"
printf ' %s\n' "${files[@]}"
failwith "the above paths were not in any known patterns"
fi

)
__ main "$@"
Loading

0 comments on commit 882f4c2

Please sign in to comment.