Skip to content

Commit

Permalink
fix: patched intel rdt to allow sudo (#9527)
Browse files Browse the repository at this point in the history
Co-authored-by: Joe Guo <joe.guo@canonical.com>
  • Loading branch information
xavpaice and Joe Guo authored Oct 12, 2021
1 parent 7c16822 commit 4321f8a
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 5 deletions.
27 changes: 27 additions & 0 deletions plugins/inputs/intel_rdt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,29 @@ Note: pqos tool needs root privileges to work properly.

Metrics will be constantly reported from the following `pqos` commands within the given interval:

#### If telegraf does not run as the root user

The `pqos` binary needs to run as root. If telegraf is running as a non-root user, you may enable sudo
to allow `pqos` to run correctly.
The `pqos` command requires root level access to run. There are two options to
overcome this if you run telegraf as a non-root user.

It is possible to update the pqos binary with setuid using `chmod u+s
/path/to/pqos`. This approach is simple and requires no modification to the
Telegraf configuration, however pqos is not a read-only tool and there are
security implications for making such a command setuid root.

Alternately, you may enable sudo to allow `pqos` to run correctly, as follows:

Add the following to your sudoers file (assumes telegraf runs as a user named `telegraf`):

```
telegraf ALL=(ALL) NOPASSWD:/usr/sbin/pqos -r --iface-os --mon-file-type=csv --mon-interval=*
```

If you wish to use sudo, you must also add `use_sudo = true` to the Telegraf
configuration (see below).

#### In case of cores monitoring:
```
pqos -r --iface-os --mon-file-type=csv --mon-interval=INTERVAL --mon-core=all:[CORES]\;mbt:[CORES]
Expand Down Expand Up @@ -76,6 +99,10 @@ More about Intel RDT: https://www.intel.com/content/www/us/en/architecture-and-t
## Mandatory if cores aren't set and forbidden if cores are specified.
## e.g. ["qemu", "pmd"]
# processes = ["process"]

## Specify if the pqos process should be called with sudo.
## Mandatory if the telegraf process does not run as root.
# use_sudo = false
```

### Exposed metrics
Expand Down
38 changes: 33 additions & 5 deletions plugins/inputs/intel_rdt/intel_rdt.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"strconv"
"strings"
"sync"
"syscall"
"time"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -46,6 +47,7 @@ type IntelRDT struct {
Processes []string `toml:"processes"`
SamplingInterval int32 `toml:"sampling_interval"`
ShortenedMetrics bool `toml:"shortened_metrics"`
UseSudo bool `toml:"use_sudo"`

Log telegraf.Logger `toml:"-"`
Publisher Publisher `toml:"-"`
Expand Down Expand Up @@ -97,6 +99,10 @@ func (r *IntelRDT) SampleConfig() string {
## Mandatory if cores aren't set and forbidden if cores are specified.
## e.g. ["qemu", "pmd"]
# processes = ["process"]
## Specify if the pqos process should be called with sudo.
## Mandatory if the telegraf process does not run as root.
# use_sudo = false
`
}

Expand Down Expand Up @@ -254,6 +260,12 @@ func (r *IntelRDT) readData(ctx context.Context, args []string, processesPIDsAss

cmd := exec.Command(r.PqosPath, append(args)...)

if r.UseSudo {
// run pqos with `/bin/sh -c "sudo /path/to/pqos ..."`
args = []string{"-c", fmt.Sprintf("sudo %s %s", r.PqosPath, strings.Replace(strings.Join(args, " "), ";", "\\;", -1))}
cmd = exec.Command("/bin/sh", args...)
}

cmdReader, err := cmd.StdoutPipe()
if err != nil {
r.errorChan <- err
Expand Down Expand Up @@ -334,14 +346,30 @@ func (r *IntelRDT) processOutput(cmdReader io.ReadCloser, processesPIDsAssociati
}

func shutDownPqos(pqos *exec.Cmd) error {
timeout := time.Second * 2

if pqos.Process != nil {
err := pqos.Process.Signal(os.Interrupt)
if err != nil {
err = pqos.Process.Kill()
if err != nil {
return fmt.Errorf("failed to shut down pqos: %v", err)
// try to send interrupt signal, ignore err for now
_ = pqos.Process.Signal(os.Interrupt)

// wait and constantly check if pqos is still running
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
for {
if err := pqos.Process.Signal(syscall.Signal(0)); err == os.ErrProcessDone {
return nil
} else if ctx.Err() != nil {
break
}
}

// if pqos is still running after some period, try to kill it
// this will send SIGTERM to pqos, and leave garbage in `/sys/fs/resctrl/mon_groups`
// fixed in https://github.com/intel/intel-cmt-cat/issues/197
err := pqos.Process.Kill()
if err != nil {
return fmt.Errorf("failed to shut down pqos: %v", err)
}
}
return nil
}
Expand Down

0 comments on commit 4321f8a

Please sign in to comment.