Sync branch magefile with main #104308
@ -13,6 +13,7 @@ bugs in actually-released versions.
|
||||
- Fix an issue where the columns in the web interface wouldn't correctly resize when the shown information changed.
|
||||
- Add-on: replace the different 'refresh' buttons (for Manager info & storage location, job types, and worker tags) with a single button that just refreshes everything in one go. The information obtained from Flamenco Manager is now stored in a JSON file on disk, making it independent from Blender auto-saving the user preferences.
|
||||
- Ensure the web frontend connects to the backend correctly when served over HTTPS ([#104296](https://projects.blender.org/studio/flamenco/pulls/104296)).
|
||||
- For Workers running on Linux, it is now possible to configure the "OOM score adjustment" for sub-processes. This makes it possible for the out-of-memory killer to target Blender, and not Flamenco Worker itself.
|
||||
- Security updates of some dependencies:
|
||||
- [Incorrect forwarding of sensitive headers and cookies on HTTP redirect in net/http](https://pkg.go.dev/vuln/GO-2024-2600)
|
||||
- [Memory exhaustion in multipart form parsing in net/textproto and net/http](https://pkg.go.dev/vuln/GO-2024-2599)
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
"projects.blender.org/studio/flamenco/internal/appinfo"
|
||||
"projects.blender.org/studio/flamenco/internal/worker"
|
||||
"projects.blender.org/studio/flamenco/internal/worker/cli_runner"
|
||||
"projects.blender.org/studio/flamenco/pkg/oomscore"
|
||||
"projects.blender.org/studio/flamenco/pkg/sysinfo"
|
||||
"projects.blender.org/studio/flamenco/pkg/website"
|
||||
)
|
||||
@ -114,6 +115,10 @@ func main() {
|
||||
findBlender()
|
||||
findFFmpeg()
|
||||
|
||||
// Create the CLI runner before the auto-discovery, to make any configuration
|
||||
// problems clear before waiting for the Manager to respond.
|
||||
cliRunner := createCLIRunner(&configWrangler)
|
||||
|
||||
// Give the auto-discovery some time to find a Manager.
|
||||
discoverTimeout := 10 * time.Minute
|
||||
discoverCtx, discoverCancel := context.WithTimeout(context.Background(), discoverTimeout)
|
||||
@ -149,7 +154,6 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
cliRunner := cli_runner.NewCLIRunner()
|
||||
listener = worker.NewListener(client, buffer)
|
||||
cmdRunner := worker.NewCommandExecutor(cliRunner, listener, timeService)
|
||||
taskRunner := worker.NewTaskExecutor(cmdRunner, listener)
|
||||
@ -304,3 +308,27 @@ func logFatalManagerDiscoveryError(err error, discoverTimeout time.Duration) {
|
||||
Msgf("auto-discovery error, see %s", website.CannotFindManagerHelpURL)
|
||||
}
|
||||
}
|
||||
|
||||
func createCLIRunner(configWrangler *worker.FileConfigWrangler) *cli_runner.CLIRunner {
|
||||
config, err := configWrangler.WorkerConfig()
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msg("error loading worker configuration")
|
||||
}
|
||||
|
||||
if config.LinuxOOMScoreAdjust == nil {
|
||||
log.Debug().Msg("executables will be run without OOM score adjustment")
|
||||
return cli_runner.NewCLIRunner()
|
||||
}
|
||||
|
||||
if !oomscore.Available() {
|
||||
log.Warn().
|
||||
Msgf("config: oom_score_adjust configured, but that is only available on Linux, not this platform. See %s for more information.",
|
||||
website.OOMScoreAdjURL)
|
||||
return cli_runner.NewCLIRunner()
|
||||
}
|
||||
|
||||
adjustment := *config.LinuxOOMScoreAdjust
|
||||
log.Info().Int("oom_score_adjust", adjustment).Msg("executables will be run with OOM score adjustment")
|
||||
|
||||
return cli_runner.NewCLIRunnerWithOOMScoreAdjuster(adjustment)
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
|
||||
"github.com/alessio/shellescape"
|
||||
"github.com/rs/zerolog"
|
||||
"projects.blender.org/studio/flamenco/pkg/oomscore"
|
||||
)
|
||||
|
||||
// The buffer size used to read stdout/stderr output from subprocesses, in
|
||||
@ -20,11 +21,19 @@ const StdoutBufferSize = 40 * 1024
|
||||
|
||||
// CLIRunner is a wrapper around exec.CommandContext() to allow mocking.
|
||||
type CLIRunner struct {
|
||||
oomScoreAdjust int
|
||||
useOOMScoreAdjust bool
|
||||
}
|
||||
|
||||
func NewCLIRunner() *CLIRunner {
|
||||
return &CLIRunner{}
|
||||
}
|
||||
func NewCLIRunnerWithOOMScoreAdjuster(oomScoreAdjust int) *CLIRunner {
|
||||
return &CLIRunner{
|
||||
oomScoreAdjust: oomScoreAdjust,
|
||||
useOOMScoreAdjust: true,
|
||||
}
|
||||
}
|
||||
|
||||
func (cli *CLIRunner) CommandContext(ctx context.Context, name string, arg ...string) *exec.Cmd {
|
||||
return exec.CommandContext(ctx, name, arg...)
|
||||
@ -55,7 +64,7 @@ func (cli *CLIRunner) RunWithTextOutput(
|
||||
return err
|
||||
}
|
||||
|
||||
if err := execCmd.Start(); err != nil {
|
||||
if err := cli.startWithOOMAdjust(execCmd); err != nil {
|
||||
logger.Error().Err(err).Msg("error starting CLI execution")
|
||||
return err
|
||||
}
|
||||
@ -171,3 +180,13 @@ func (cli *CLIRunner) logCmd(
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// startWithOOMAdjust runs the command with its OOM score adjusted.
|
||||
func (cli *CLIRunner) startWithOOMAdjust(execCmd *exec.Cmd) error {
|
||||
if cli.useOOMScoreAdjust {
|
||||
oomScoreRestore := oomscore.Adjust(cli.oomScoreAdjust)
|
||||
defer oomScoreRestore()
|
||||
}
|
||||
|
||||
return execCmd.Start()
|
||||
}
|
||||
|
@ -58,6 +58,18 @@ type WorkerConfig struct {
|
||||
|
||||
TaskTypes []string `yaml:"task_types"`
|
||||
RestartExitCode int `yaml:"restart_exit_code"`
|
||||
|
||||
// LinuxOOMScoreAdjust controls the Linux out-of-memory killer. Is used when
|
||||
// spawning a sub-process, to adjust the likelyness that that subprocess is
|
||||
// killed rather than Flamenco Worker itself. That way Flamenco Worker can
|
||||
// report the failure to the Manager.
|
||||
//
|
||||
// If the Worker itself would be OOM-killed, it would just be restarted and
|
||||
// get the task it was already working on, causing an infinite OOM-loop.
|
||||
//
|
||||
// If this value is not specified in the configuration file, Flamenco Worker
|
||||
// will not attempt to adjust its OOM score.
|
||||
LinuxOOMScoreAdjust *int `yaml:"oom_score_adjust"`
|
||||
}
|
||||
|
||||
type WorkerCredentials struct {
|
||||
|
86
pkg/oomscore/oomscore.go
Normal file
86
pkg/oomscore/oomscore.go
Normal file
@ -0,0 +1,86 @@
|
||||
// package oomscore provides some functions to adjust the Linux
|
||||
// out-of-memory (OOM) score, i.e. the number that determines how likely it is
|
||||
// that a process is killed in an out-of-memory situation.
|
||||
//
|
||||
// It is available only on Linux. On other platforms ErrNotImplemented will be returned.
|
||||
package oomscore
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
var ErrNotImplemented = errors.New("OOM score functionality not implemented on this platform")
|
||||
|
||||
// Available returns whether the functionality in this package is available for
|
||||
// the current platform.
|
||||
func Available() bool {
|
||||
return available
|
||||
}
|
||||
|
||||
// GetOOMScore returns the current process' OOM score.
|
||||
func GetOOMScore() (int, error) {
|
||||
return getOOMScore()
|
||||
}
|
||||
|
||||
// GetOOMScoreAdj returns the current process' OOM score adjustment.
|
||||
func GetOOMScoreAdj() (int, error) {
|
||||
return getOOMScoreAdj()
|
||||
}
|
||||
|
||||
// SetOOMScoreAdj sets the current process' OOM score adjustment.
|
||||
func SetOOMScoreAdj(score int) error {
|
||||
return setOOMScoreAdj(score)
|
||||
}
|
||||
|
||||
type ScoreRestoreFunc func()
|
||||
|
||||
var emptyRestoreFunc ScoreRestoreFunc = func() {}
|
||||
|
||||
// Adjust temporarily sets the OOM score adjustment.
|
||||
// The returned function MUST be called to restore the original value.
|
||||
// Any problems changing the score are logged, but not otherwise returned.
|
||||
func Adjust(score int) (restoreFunc ScoreRestoreFunc) {
|
||||
restoreFunc = emptyRestoreFunc
|
||||
|
||||
if !Available() {
|
||||
return
|
||||
}
|
||||
|
||||
origScore, err := getOOMScoreAdj()
|
||||
if err != nil {
|
||||
log.Error().
|
||||
AnErr("cause", err).
|
||||
Msg("could not get the current process' oom_score_adj value")
|
||||
return
|
||||
}
|
||||
|
||||
log.Trace().
|
||||
Int("oom_score_adj", score).
|
||||
Msg("setting oom_score_adj")
|
||||
|
||||
err = setOOMScoreAdj(score)
|
||||
if err != nil {
|
||||
log.Error().
|
||||
Int("oom_score_adj", score).
|
||||
AnErr("cause", err).
|
||||
Msg("could not set the current process' oom_score_adj value")
|
||||
return
|
||||
}
|
||||
|
||||
return func() {
|
||||
log.Trace().
|
||||
Int("oom_score_adj", origScore).
|
||||
Msg("restoring oom_score_adj")
|
||||
|
||||
err = setOOMScoreAdj(origScore)
|
||||
if err != nil {
|
||||
log.Error().
|
||||
Int("oom_score_adj", origScore).
|
||||
AnErr("cause", err).
|
||||
Msg("could not restore the current process' oom_score_adj value")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
66
pkg/oomscore/oomscore_linux.go
Normal file
66
pkg/oomscore/oomscore_linux.go
Normal file
@ -0,0 +1,66 @@
|
||||
//go:build linux
|
||||
|
||||
package oomscore
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
const (
|
||||
available = true
|
||||
)
|
||||
|
||||
// getOOMScore returns the current process' OOM score.
|
||||
func getOOMScore() (int, error) {
|
||||
return readInt("oom_score")
|
||||
}
|
||||
|
||||
// getOOMScoreAdj returns the current process' OOM score adjustment.
|
||||
func getOOMScoreAdj() (int, error) {
|
||||
return readInt("oom_score_adj")
|
||||
}
|
||||
|
||||
// setOOMScoreAdj sets the current process' OOM score adjustment.
|
||||
func setOOMScoreAdj(newScore int) error {
|
||||
return writeInt(newScore, "oom_score_adj")
|
||||
}
|
||||
|
||||
// readInt reads an integer from /proc/{pid}/{filename}
|
||||
func readInt(filename string) (int, error) {
|
||||
fullPath := procPidPath(filename)
|
||||
|
||||
file, err := os.Open(fullPath)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("opening %s: %w", fullPath, err)
|
||||
}
|
||||
|
||||
var valueInFile int
|
||||
n, err := fmt.Fscan(file, &valueInFile)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("reading %s: %w", fullPath, err)
|
||||
}
|
||||
if n < 1 {
|
||||
return 0, fmt.Errorf("reading %s: did not find a number", fullPath)
|
||||
}
|
||||
|
||||
return valueInFile, nil
|
||||
}
|
||||
|
||||
// writeInt writes an integer to /proc/{pid}/{filename}
|
||||
func writeInt(value int, filename string) error {
|
||||
fullPath := procPidPath(filename)
|
||||
contents := fmt.Sprint(value)
|
||||
err := os.WriteFile(fullPath, []byte(contents), os.ModePerm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing %s: %w", fullPath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// procPidPath returns "/proc/{pid}/{filename}".
|
||||
func procPidPath(filename string) string {
|
||||
pid := os.Getpid()
|
||||
return filepath.Join("/proc", fmt.Sprint(pid), filename)
|
||||
}
|
19
pkg/oomscore/oomscore_nonlinux.go
Normal file
19
pkg/oomscore/oomscore_nonlinux.go
Normal file
@ -0,0 +1,19 @@
|
||||
//go:build !linux
|
||||
|
||||
package oomscore
|
||||
|
||||
const (
|
||||
available = false
|
||||
)
|
||||
|
||||
func getOOMScore() (int, error) {
|
||||
return 0, ErrNotImplemented
|
||||
}
|
||||
|
||||
func getOOMScoreAdj() (int, error) {
|
||||
return 0, ErrNotImplemented
|
||||
}
|
||||
|
||||
func setOOMScoreAdj(int) error {
|
||||
return ErrNotImplemented
|
||||
}
|
@ -9,4 +9,5 @@ const (
|
||||
BugReportURL = "https://flamenco.blender.org/get-involved"
|
||||
ShamanRequirementsURL = "https://flamenco.blender.org/usage/shared-storage/shaman/#requirements"
|
||||
WorkerConfigURL = "https://flamenco.blender.org/usage/worker-configuration/"
|
||||
OOMScoreAdjURL = WorkerConfigURL
|
||||
)
|
||||
|
@ -19,6 +19,9 @@ This is an example of such a configuration file:
|
||||
manager_url: http://flamenco.local:8080/
|
||||
task_types: [blender, ffmpeg, file-management, misc]
|
||||
restart_exit_code: 47
|
||||
|
||||
# Optional advanced option, available on Linux only:
|
||||
oom_score_adjust: 500
|
||||
```
|
||||
|
||||
- `manager_url`: The URL of the Manager to connect to. If the setting is blank
|
||||
@ -31,10 +34,17 @@ restart_exit_code: 47
|
||||
- `restart_exit_code`: Having this set to a non-zero value will mark this Worker
|
||||
as 'restartable'. See [Shut Down & Restart Actions][restarting] for more
|
||||
information.
|
||||
- `oom_score_adjust`: an optional value between 0 and 1000, only available on
|
||||
Linux. It configures the Out Of Memory behaviour of the Linux kernel. This is
|
||||
the `oom_score_adj` value for all sub-processes started by the Worker. Set
|
||||
this to a high value, so that when the machine runs out of memory when
|
||||
rendering, it is Blender that gets killed, and not Flamenco Worker itself. For
|
||||
more information, see [Linux Kernel: Per-Process Parameters][per-process-proc].
|
||||
|
||||
[scripts]: {{< ref "usage/job-types" >}}
|
||||
[task-types]: {{< ref "usage/job-types" >}}#task-types
|
||||
[restarting]: {{< ref "usage/worker-actions" >}}#shut-down--restart-actions
|
||||
[per-process-proc]: https://docs.kernel.org/filesystems/proc.html#chapter-3-per-process-parameters
|
||||
|
||||
## Worker-Specific Files
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user