Skip to content

Commit

Permalink
chore: refactor the envd and up cmd (#747)
Browse files Browse the repository at this point in the history
* WIP

Signed-off-by: nullday <aseaday@hotmail.com>

* WIP

Signed-off-by: nullday <aseaday@hotmail.com>

* Refactor up and build

Signed-off-by: nullday <aseaday@hotmail.com>

Signed-off-by: nullday <aseaday@hotmail.com>
  • Loading branch information
aseaday authored Aug 12, 2022
1 parent b628f6a commit 4f24b0e
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 117 deletions.
90 changes: 62 additions & 28 deletions pkg/app/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,50 +91,91 @@ To build and push the image to a registry:
}

func build(clicontext *cli.Context) error {
buildContext, err := filepath.Abs(clicontext.Path("path"))
opt, err := ParseBuildOpt(clicontext)
if err != nil {
return errors.Wrap(err, "failed to get absolute path of the build context")
return err
}

fileName, funcName, err := builder.ParseFromStr(clicontext.String("from"))
logger := logrus.WithFields(logrus.Fields{
"build-context": opt.BuildContextDir,
"build-file": opt.ManifestFilePath,
"config": opt.ConfigFilePath,
"tag": opt.Tag,
})
logger.WithFields(logrus.Fields{
"builder-options": opt,
}).Debug("starting build command")

builder, err := GetBuilder(clicontext, opt)
if err != nil {
return err
}
if err = InterpretEnvdDef(builder); err != nil {
return err
}
return BuildImage(clicontext, builder)
}

func GetBuilder(clicontext *cli.Context, opt builder.Options) (builder.Builder, error) {
builder, err := builder.New(clicontext.Context, opt)
if err != nil {
return nil, errors.Wrap(err, "failed to create the builder")
}
return builder, nil
}

func InterpretEnvdDef(builder builder.Builder) error {
if err := builder.Interpret(); err != nil {
return errors.Wrap(err, "failed to interpret")
}
return nil
}

func BuildImage(clicontext *cli.Context, builder builder.Builder) error {
force := clicontext.Bool("force")
if err := builder.Build(clicontext.Context, force); err != nil {
return errors.Wrap(err, "failed to build the image")
}
return nil
}

func ParseBuildOpt(clicontext *cli.Context) (builder.Options, error) {
buildContext, err := filepath.Abs(clicontext.Path("path"))
if err != nil {
return builder.Options{}, errors.Wrap(err, "failed to get absolute path of the build context")
}
fileName, funcName, err := builder.ParseFromStr(clicontext.String("from"))
if err != nil {
return builder.Options{}, err
}

manifest, err := filepath.Abs(filepath.Join(buildContext, fileName))
if err != nil {
return errors.Wrap(err, "failed to get absolute path of the build file")
return builder.Options{}, errors.Wrap(err, "failed to get absolute path of the build file")
}
if manifest == "" {
return errors.Newf("build file %s does not exist", fileName)
return builder.Options{}, errors.New("file does not exist")
}

cfg := home.GetManager().ConfigFile()
config := home.GetManager().ConfigFile()

tag := clicontext.String("tag")
if tag == "" {
logrus.Debug("tag not specified, using default")
tag = fmt.Sprintf("%s:%s", filepath.Base(buildContext), "dev")
}
// The current container engine is only Docker. It should be expaned to support other container engines.
tag, err = docker.NormalizeNamed(tag)
if err != nil {
return err
return builder.Options{}, err
}

logger := logrus.WithFields(logrus.Fields{
"build-context": buildContext,
"build-file": manifest,
"config": cfg,
"tag": tag,
})
debug := clicontext.Bool("debug")
output := clicontext.String("output")
force := clicontext.Bool("force")
output := ""
exportCache := clicontext.String("export-cache")
importCache := clicontext.String("import-cache")

opt := builder.Options{
ManifestFilePath: manifest,
ConfigFilePath: cfg,
ConfigFilePath: config,
BuildFuncName: funcName,
BuildContextDir: buildContext,
Tag: tag,
Expand All @@ -144,17 +185,10 @@ func build(clicontext *cli.Context) error {
ExportCache: exportCache,
ImportCache: importCache,
}

debug := clicontext.Bool("debug")
if debug {
opt.ProgressMode = "plain"
}

logger.WithFields(logrus.Fields{
"builder-options": opt,
}).Debug("starting build command")

builder, err := builder.New(clicontext.Context, opt)
if err != nil {
return errors.Wrap(err, "failed to create the builder")
}
return builder.Build(clicontext.Context, force)
return opt, nil
}
128 changes: 47 additions & 81 deletions pkg/app/up.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
package app

import (
"fmt"
"path/filepath"
"time"

Expand All @@ -25,7 +24,6 @@ import (

"github.com/tensorchord/envd/pkg/builder"
"github.com/tensorchord/envd/pkg/docker"
"github.com/tensorchord/envd/pkg/home"
"github.com/tensorchord/envd/pkg/lang/ir"
"github.com/tensorchord/envd/pkg/ssh"
sshconfig "github.com/tensorchord/envd/pkg/ssh/config"
Expand Down Expand Up @@ -116,74 +114,31 @@ var CommandUp = &cli.Command{
}

func up(clicontext *cli.Context) error {
buildContext, err := filepath.Abs(clicontext.Path("path"))
if err != nil {
return errors.Wrap(err, "failed to get absolute path of the build context")
}
fileName, funcName, err := builder.ParseFromStr(clicontext.String("from"))
buildOpt, err := ParseBuildOpt(clicontext)
if err != nil {
return err
}

manifest, err := filepath.Abs(filepath.Join(buildContext, fileName))
if err != nil {
return errors.Wrap(err, "failed to get absolute path of the build file")
}
if manifest == "" {
return errors.New("file does not exist")
}

config := home.GetManager().ConfigFile()

tag := clicontext.String("tag")
if tag == "" {
logrus.Debug("tag not specified, using default")
tag = fmt.Sprintf("%s:%s", filepath.Base(buildContext), "dev")
}
// The current container engine is only Docker. It should be expaned to support other container engines.
tag, err = docker.NormalizeNamed(tag)
if err != nil {
return err
}
ctr := filepath.Base(buildContext)

ctr := filepath.Base(buildOpt.BuildContextDir)
detach := clicontext.Bool("detach")
debug := clicontext.Bool("debug")
force := clicontext.Bool("force")
output := ""
exportCache := clicontext.String("export-cache")
importCache := clicontext.String("import-cache")

opt := builder.Options{
ManifestFilePath: manifest,
ConfigFilePath: config,
BuildFuncName: funcName,
BuildContextDir: buildContext,
Tag: tag,
OutputOpts: output,
PubKeyPath: clicontext.Path("public-key"),
ProgressMode: "auto",
ExportCache: exportCache,
ImportCache: importCache,
}
if debug {
opt.ProgressMode = "plain"
}

logger := logrus.WithFields(logrus.Fields{
"builder-options": opt,
"builder-options": buildOpt,
"container-name": ctr,
"detach": detach,
})
logger.Debug("starting up command")
builder, err := builder.New(clicontext.Context, opt)

builder, err := GetBuilder(clicontext, buildOpt)
if err != nil {
return errors.Wrap(err, "failed to create the builder")
return err
}

if err := builder.Build(clicontext.Context, force); err != nil {
return errors.Wrap(err, "failed to build the image")
if err = InterpretEnvdDef(builder); err != nil {
return err
}
if err = BuildImage(clicontext, builder); err != nil {
return err
}

// Do not attach GPU if the flag is set.
gpuEnable := clicontext.Bool("no-gpu")
var gpu bool
Expand All @@ -192,59 +147,70 @@ func up(clicontext *cli.Context) error {
} else {
gpu = builder.GPUEnabled()
}
numGPUs := builder.NumGPUs()

sshPortInHost, error := StartEnvd(clicontext, buildOpt, gpu, numGPUs)
if error != nil {
return error
}

if !detach {
opt := ssh.DefaultOptions()
opt.PrivateKeyPath = clicontext.Path("private-key")
opt.Port = sshPortInHost
sshClient, err := ssh.NewClient(opt)
if err != nil {
return errors.Wrap(err, "failed to create the ssh client")
}
if err := sshClient.Attach(); err != nil {
return errors.Wrap(err, "failed to attach to the container")
}
}

return nil
}

func StartEnvd(clicontext *cli.Context, buildOpt builder.Options, gpu bool, numGPUs int) (int, error) {
dockerClient, err := docker.NewClient(clicontext.Context)
if err != nil {
return errors.Wrap(err, "failed to create the docker client")
return 0, errors.Wrap(err, "failed to create the docker client")
}

if gpu {
nvruntimeExists, err := dockerClient.GPUEnabled(clicontext.Context)
if err != nil {
return errors.Wrap(err, "failed to check if nvidia-runtime is installed")
return 0, errors.Wrap(err, "failed to check if nvidia-runtime is installed")
}
if !nvruntimeExists {
return errors.New("GPU is required but nvidia container runtime is not installed, please refer to https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker")
return 0, errors.New("GPU is required but nvidia container runtime is not installed, please refer to https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker")
}
}

sshPortInHost, err := netutil.GetFreePort()
if err != nil {
return errors.Wrap(err, "failed to get a free port")
return 0, errors.Wrap(err, "failed to get a free port")
}
numGPUs := builder.NumGPUs()

ctr := filepath.Base(buildOpt.BuildContextDir)
force := clicontext.Bool("force")
err = dockerClient.CleanEnvdIfExists(clicontext.Context, ctr, force)
if err != nil {
return errors.Wrap(err, "failed to clean the envd environment")
return 0, errors.Wrap(err, "failed to clean the envd environment")
}
containerID, containerIP, err := dockerClient.StartEnvd(clicontext.Context,
tag, ctr, buildContext, gpu, numGPUs, sshPortInHost, *ir.DefaultGraph, clicontext.Duration("timeout"),
buildOpt.Tag, ctr, buildOpt.BuildContextDir, gpu, numGPUs, sshPortInHost, *ir.DefaultGraph, clicontext.Duration("timeout"),
clicontext.StringSlice("volume"))
if err != nil {
return errors.Wrap(err, "failed to start the envd environment")
return 0, errors.Wrap(err, "failed to start the envd environment")
}
logrus.Debugf("container %s is running", containerID)

logrus.Debugf("Add entry %s to SSH config. at %s", buildContext, containerIP)
logrus.Debugf("Add entry %s to SSH config. at %s", buildOpt.BuildContextDir, containerIP)
if err = sshconfig.AddEntry(
ctr, localhost, sshPortInHost, clicontext.Path("private-key")); err != nil {
logrus.Infof("failed to add entry %s to your SSH config file: %s", ctr, err)
return errors.Wrap(err, "failed to add entry to your SSH config file")
return 0, errors.Wrap(err, "failed to add entry to your SSH config file")
}
return sshPortInHost, nil

if !detach {
opt := ssh.DefaultOptions()
opt.PrivateKeyPath = clicontext.Path("private-key")
opt.Port = sshPortInHost
sshClient, err := ssh.NewClient(opt)
if err != nil {
return errors.Wrap(err, "failed to create the ssh client")
}
if err := sshClient.Attach(); err != nil {
return errors.Wrap(err, "failed to attach to the container")
}
}

return nil
}
8 changes: 0 additions & 8 deletions pkg/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,6 @@ func (b generalBuilder) NumGPUs() int {

func (b generalBuilder) Build(ctx context.Context, force bool) error {
if !force && !b.checkIfNeedBuild(ctx) {
// The container label needs the interpreted defaultGrpah to be set in `StartEnvd`
// TODO(Qi Chen): remove this hack
if err := b.Interpret(); err != nil {
return errors.Wrap(err, "failed to interpret")
}
return nil
}

Expand Down Expand Up @@ -181,9 +176,6 @@ func (b generalBuilder) Interpret() error {
}

func (b generalBuilder) compile(ctx context.Context) (*llb.Definition, error) {
if err := b.Interpret(); err != nil {
return nil, errors.Wrap(err, "failed to interpret")
}
def, err := ir.Compile(ctx, filepath.Base(b.BuildContextDir), b.PubKeyPath)
if err != nil {
return nil, errors.Wrap(err, "failed to compile build.envd")
Expand Down

0 comments on commit 4f24b0e

Please sign in to comment.