Sync branch magefile with main #104308
@ -13,6 +13,7 @@ bugs in actually-released versions.
|
|||||||
- Fix an issue where the columns in the web interface wouldn't correctly resize when the shown information changed.
|
- Fix an issue where the columns in the web interface wouldn't correctly resize when the shown information changed.
|
||||||
- Add-on: replace the different 'refresh' buttons (for Manager info & storage location, job types, and worker tags) with a single button that just refreshes everything in one go. The information obtained from Flamenco Manager is now stored in a JSON file on disk, making it independent from Blender auto-saving the user preferences.
|
- Add-on: replace the different 'refresh' buttons (for Manager info & storage location, job types, and worker tags) with a single button that just refreshes everything in one go. The information obtained from Flamenco Manager is now stored in a JSON file on disk, making it independent from Blender auto-saving the user preferences.
|
||||||
- Ensure the web frontend connects to the backend correctly when served over HTTPS ([#104296](https://projects.blender.org/studio/flamenco/pulls/104296)).
|
- Ensure the web frontend connects to the backend correctly when served over HTTPS ([#104296](https://projects.blender.org/studio/flamenco/pulls/104296)).
|
||||||
|
- For Workers running on Linux, it is now possible to configure the "OOM score adjustment" for sub-processes. This makes it possible for the out-of-memory killer to target Blender, and not Flamenco Worker itself.
|
||||||
- Security updates of some dependencies:
|
- Security updates of some dependencies:
|
||||||
- [Incorrect forwarding of sensitive headers and cookies on HTTP redirect in net/http](https://pkg.go.dev/vuln/GO-2024-2600)
|
- [Incorrect forwarding of sensitive headers and cookies on HTTP redirect in net/http](https://pkg.go.dev/vuln/GO-2024-2600)
|
||||||
- [Memory exhaustion in multipart form parsing in net/textproto and net/http](https://pkg.go.dev/vuln/GO-2024-2599)
|
- [Memory exhaustion in multipart form parsing in net/textproto and net/http](https://pkg.go.dev/vuln/GO-2024-2599)
|
||||||
|
@ -23,6 +23,7 @@ import (
|
|||||||
"projects.blender.org/studio/flamenco/internal/appinfo"
|
"projects.blender.org/studio/flamenco/internal/appinfo"
|
||||||
"projects.blender.org/studio/flamenco/internal/worker"
|
"projects.blender.org/studio/flamenco/internal/worker"
|
||||||
"projects.blender.org/studio/flamenco/internal/worker/cli_runner"
|
"projects.blender.org/studio/flamenco/internal/worker/cli_runner"
|
||||||
|
"projects.blender.org/studio/flamenco/pkg/oomscore"
|
||||||
"projects.blender.org/studio/flamenco/pkg/sysinfo"
|
"projects.blender.org/studio/flamenco/pkg/sysinfo"
|
||||||
"projects.blender.org/studio/flamenco/pkg/website"
|
"projects.blender.org/studio/flamenco/pkg/website"
|
||||||
)
|
)
|
||||||
@ -114,6 +115,10 @@ func main() {
|
|||||||
findBlender()
|
findBlender()
|
||||||
findFFmpeg()
|
findFFmpeg()
|
||||||
|
|
||||||
|
// Create the CLI runner before the auto-discovery, to make any configuration
|
||||||
|
// problems clear before waiting for the Manager to respond.
|
||||||
|
cliRunner := createCLIRunner(&configWrangler)
|
||||||
|
|
||||||
// Give the auto-discovery some time to find a Manager.
|
// Give the auto-discovery some time to find a Manager.
|
||||||
discoverTimeout := 10 * time.Minute
|
discoverTimeout := 10 * time.Minute
|
||||||
discoverCtx, discoverCancel := context.WithTimeout(context.Background(), discoverTimeout)
|
discoverCtx, discoverCancel := context.WithTimeout(context.Background(), discoverTimeout)
|
||||||
@ -149,7 +154,6 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
cliRunner := cli_runner.NewCLIRunner()
|
|
||||||
listener = worker.NewListener(client, buffer)
|
listener = worker.NewListener(client, buffer)
|
||||||
cmdRunner := worker.NewCommandExecutor(cliRunner, listener, timeService)
|
cmdRunner := worker.NewCommandExecutor(cliRunner, listener, timeService)
|
||||||
taskRunner := worker.NewTaskExecutor(cmdRunner, listener)
|
taskRunner := worker.NewTaskExecutor(cmdRunner, listener)
|
||||||
@ -304,3 +308,27 @@ func logFatalManagerDiscoveryError(err error, discoverTimeout time.Duration) {
|
|||||||
Msgf("auto-discovery error, see %s", website.CannotFindManagerHelpURL)
|
Msgf("auto-discovery error, see %s", website.CannotFindManagerHelpURL)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func createCLIRunner(configWrangler *worker.FileConfigWrangler) *cli_runner.CLIRunner {
|
||||||
|
config, err := configWrangler.WorkerConfig()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("error loading worker configuration")
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.LinuxOOMScoreAdjust == nil {
|
||||||
|
log.Debug().Msg("executables will be run without OOM score adjustment")
|
||||||
|
return cli_runner.NewCLIRunner()
|
||||||
|
}
|
||||||
|
|
||||||
|
if !oomscore.Available() {
|
||||||
|
log.Warn().
|
||||||
|
Msgf("config: oom_score_adjust configured, but that is only available on Linux, not this platform. See %s for more information.",
|
||||||
|
website.OOMScoreAdjURL)
|
||||||
|
return cli_runner.NewCLIRunner()
|
||||||
|
}
|
||||||
|
|
||||||
|
adjustment := *config.LinuxOOMScoreAdjust
|
||||||
|
log.Info().Int("oom_score_adjust", adjustment).Msg("executables will be run with OOM score adjustment")
|
||||||
|
|
||||||
|
return cli_runner.NewCLIRunnerWithOOMScoreAdjuster(adjustment)
|
||||||
|
}
|
||||||
|
@ -11,6 +11,7 @@ import (
|
|||||||
|
|
||||||
"github.com/alessio/shellescape"
|
"github.com/alessio/shellescape"
|
||||||
"github.com/rs/zerolog"
|
"github.com/rs/zerolog"
|
||||||
|
"projects.blender.org/studio/flamenco/pkg/oomscore"
|
||||||
)
|
)
|
||||||
|
|
||||||
// The buffer size used to read stdout/stderr output from subprocesses, in
|
// The buffer size used to read stdout/stderr output from subprocesses, in
|
||||||
@ -20,11 +21,19 @@ const StdoutBufferSize = 40 * 1024
|
|||||||
|
|
||||||
// CLIRunner is a wrapper around exec.CommandContext() to allow mocking.
|
// CLIRunner is a wrapper around exec.CommandContext() to allow mocking.
|
||||||
type CLIRunner struct {
|
type CLIRunner struct {
|
||||||
|
oomScoreAdjust int
|
||||||
|
useOOMScoreAdjust bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCLIRunner() *CLIRunner {
|
func NewCLIRunner() *CLIRunner {
|
||||||
return &CLIRunner{}
|
return &CLIRunner{}
|
||||||
}
|
}
|
||||||
|
func NewCLIRunnerWithOOMScoreAdjuster(oomScoreAdjust int) *CLIRunner {
|
||||||
|
return &CLIRunner{
|
||||||
|
oomScoreAdjust: oomScoreAdjust,
|
||||||
|
useOOMScoreAdjust: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (cli *CLIRunner) CommandContext(ctx context.Context, name string, arg ...string) *exec.Cmd {
|
func (cli *CLIRunner) CommandContext(ctx context.Context, name string, arg ...string) *exec.Cmd {
|
||||||
return exec.CommandContext(ctx, name, arg...)
|
return exec.CommandContext(ctx, name, arg...)
|
||||||
@ -55,7 +64,7 @@ func (cli *CLIRunner) RunWithTextOutput(
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := execCmd.Start(); err != nil {
|
if err := cli.startWithOOMAdjust(execCmd); err != nil {
|
||||||
logger.Error().Err(err).Msg("error starting CLI execution")
|
logger.Error().Err(err).Msg("error starting CLI execution")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -171,3 +180,13 @@ func (cli *CLIRunner) logCmd(
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// startWithOOMAdjust runs the command with its OOM score adjusted.
|
||||||
|
func (cli *CLIRunner) startWithOOMAdjust(execCmd *exec.Cmd) error {
|
||||||
|
if cli.useOOMScoreAdjust {
|
||||||
|
oomScoreRestore := oomscore.Adjust(cli.oomScoreAdjust)
|
||||||
|
defer oomScoreRestore()
|
||||||
|
}
|
||||||
|
|
||||||
|
return execCmd.Start()
|
||||||
|
}
|
||||||
|
@ -58,6 +58,18 @@ type WorkerConfig struct {
|
|||||||
|
|
||||||
TaskTypes []string `yaml:"task_types"`
|
TaskTypes []string `yaml:"task_types"`
|
||||||
RestartExitCode int `yaml:"restart_exit_code"`
|
RestartExitCode int `yaml:"restart_exit_code"`
|
||||||
|
|
||||||
|
// LinuxOOMScoreAdjust controls the Linux out-of-memory killer. Is used when
|
||||||
|
// spawning a sub-process, to adjust the likelyness that that subprocess is
|
||||||
|
// killed rather than Flamenco Worker itself. That way Flamenco Worker can
|
||||||
|
// report the failure to the Manager.
|
||||||
|
//
|
||||||
|
// If the Worker itself would be OOM-killed, it would just be restarted and
|
||||||
|
// get the task it was already working on, causing an infinite OOM-loop.
|
||||||
|
//
|
||||||
|
// If this value is not specified in the configuration file, Flamenco Worker
|
||||||
|
// will not attempt to adjust its OOM score.
|
||||||
|
LinuxOOMScoreAdjust *int `yaml:"oom_score_adjust"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type WorkerCredentials struct {
|
type WorkerCredentials struct {
|
||||||
|
86
pkg/oomscore/oomscore.go
Normal file
86
pkg/oomscore/oomscore.go
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
// package oomscore provides some functions to adjust the Linux
|
||||||
|
// out-of-memory (OOM) score, i.e. the number that determines how likely it is
|
||||||
|
// that a process is killed in an out-of-memory situation.
|
||||||
|
//
|
||||||
|
// It is available only on Linux. On other platforms ErrNotImplemented will be returned.
|
||||||
|
package oomscore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrNotImplemented = errors.New("OOM score functionality not implemented on this platform")
|
||||||
|
|
||||||
|
// Available returns whether the functionality in this package is available for
|
||||||
|
// the current platform.
|
||||||
|
func Available() bool {
|
||||||
|
return available
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetOOMScore returns the current process' OOM score.
|
||||||
|
func GetOOMScore() (int, error) {
|
||||||
|
return getOOMScore()
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetOOMScoreAdj returns the current process' OOM score adjustment.
|
||||||
|
func GetOOMScoreAdj() (int, error) {
|
||||||
|
return getOOMScoreAdj()
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetOOMScoreAdj sets the current process' OOM score adjustment.
|
||||||
|
func SetOOMScoreAdj(score int) error {
|
||||||
|
return setOOMScoreAdj(score)
|
||||||
|
}
|
||||||
|
|
||||||
|
type ScoreRestoreFunc func()
|
||||||
|
|
||||||
|
var emptyRestoreFunc ScoreRestoreFunc = func() {}
|
||||||
|
|
||||||
|
// Adjust temporarily sets the OOM score adjustment.
|
||||||
|
// The returned function MUST be called to restore the original value.
|
||||||
|
// Any problems changing the score are logged, but not otherwise returned.
|
||||||
|
func Adjust(score int) (restoreFunc ScoreRestoreFunc) {
|
||||||
|
restoreFunc = emptyRestoreFunc
|
||||||
|
|
||||||
|
if !Available() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
origScore, err := getOOMScoreAdj()
|
||||||
|
if err != nil {
|
||||||
|
log.Error().
|
||||||
|
AnErr("cause", err).
|
||||||
|
Msg("could not get the current process' oom_score_adj value")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Trace().
|
||||||
|
Int("oom_score_adj", score).
|
||||||
|
Msg("setting oom_score_adj")
|
||||||
|
|
||||||
|
err = setOOMScoreAdj(score)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().
|
||||||
|
Int("oom_score_adj", score).
|
||||||
|
AnErr("cause", err).
|
||||||
|
Msg("could not set the current process' oom_score_adj value")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return func() {
|
||||||
|
log.Trace().
|
||||||
|
Int("oom_score_adj", origScore).
|
||||||
|
Msg("restoring oom_score_adj")
|
||||||
|
|
||||||
|
err = setOOMScoreAdj(origScore)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().
|
||||||
|
Int("oom_score_adj", origScore).
|
||||||
|
AnErr("cause", err).
|
||||||
|
Msg("could not restore the current process' oom_score_adj value")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
66
pkg/oomscore/oomscore_linux.go
Normal file
66
pkg/oomscore/oomscore_linux.go
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
//go:build linux
|
||||||
|
|
||||||
|
package oomscore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
available = true
|
||||||
|
)
|
||||||
|
|
||||||
|
// getOOMScore returns the current process' OOM score.
|
||||||
|
func getOOMScore() (int, error) {
|
||||||
|
return readInt("oom_score")
|
||||||
|
}
|
||||||
|
|
||||||
|
// getOOMScoreAdj returns the current process' OOM score adjustment.
|
||||||
|
func getOOMScoreAdj() (int, error) {
|
||||||
|
return readInt("oom_score_adj")
|
||||||
|
}
|
||||||
|
|
||||||
|
// setOOMScoreAdj sets the current process' OOM score adjustment.
|
||||||
|
func setOOMScoreAdj(newScore int) error {
|
||||||
|
return writeInt(newScore, "oom_score_adj")
|
||||||
|
}
|
||||||
|
|
||||||
|
// readInt reads an integer from /proc/{pid}/{filename}
|
||||||
|
func readInt(filename string) (int, error) {
|
||||||
|
fullPath := procPidPath(filename)
|
||||||
|
|
||||||
|
file, err := os.Open(fullPath)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("opening %s: %w", fullPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var valueInFile int
|
||||||
|
n, err := fmt.Fscan(file, &valueInFile)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("reading %s: %w", fullPath, err)
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
return 0, fmt.Errorf("reading %s: did not find a number", fullPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
return valueInFile, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeInt writes an integer to /proc/{pid}/{filename}
|
||||||
|
func writeInt(value int, filename string) error {
|
||||||
|
fullPath := procPidPath(filename)
|
||||||
|
contents := fmt.Sprint(value)
|
||||||
|
err := os.WriteFile(fullPath, []byte(contents), os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("writing %s: %w", fullPath, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// procPidPath returns "/proc/{pid}/{filename}".
|
||||||
|
func procPidPath(filename string) string {
|
||||||
|
pid := os.Getpid()
|
||||||
|
return filepath.Join("/proc", fmt.Sprint(pid), filename)
|
||||||
|
}
|
19
pkg/oomscore/oomscore_nonlinux.go
Normal file
19
pkg/oomscore/oomscore_nonlinux.go
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
//go:build !linux
|
||||||
|
|
||||||
|
package oomscore
|
||||||
|
|
||||||
|
const (
|
||||||
|
available = false
|
||||||
|
)
|
||||||
|
|
||||||
|
func getOOMScore() (int, error) {
|
||||||
|
return 0, ErrNotImplemented
|
||||||
|
}
|
||||||
|
|
||||||
|
func getOOMScoreAdj() (int, error) {
|
||||||
|
return 0, ErrNotImplemented
|
||||||
|
}
|
||||||
|
|
||||||
|
func setOOMScoreAdj(int) error {
|
||||||
|
return ErrNotImplemented
|
||||||
|
}
|
@ -9,4 +9,5 @@ const (
|
|||||||
BugReportURL = "https://flamenco.blender.org/get-involved"
|
BugReportURL = "https://flamenco.blender.org/get-involved"
|
||||||
ShamanRequirementsURL = "https://flamenco.blender.org/usage/shared-storage/shaman/#requirements"
|
ShamanRequirementsURL = "https://flamenco.blender.org/usage/shared-storage/shaman/#requirements"
|
||||||
WorkerConfigURL = "https://flamenco.blender.org/usage/worker-configuration/"
|
WorkerConfigURL = "https://flamenco.blender.org/usage/worker-configuration/"
|
||||||
|
OOMScoreAdjURL = WorkerConfigURL
|
||||||
)
|
)
|
||||||
|
@ -19,6 +19,9 @@ This is an example of such a configuration file:
|
|||||||
manager_url: http://flamenco.local:8080/
|
manager_url: http://flamenco.local:8080/
|
||||||
task_types: [blender, ffmpeg, file-management, misc]
|
task_types: [blender, ffmpeg, file-management, misc]
|
||||||
restart_exit_code: 47
|
restart_exit_code: 47
|
||||||
|
|
||||||
|
# Optional advanced option, available on Linux only:
|
||||||
|
oom_score_adjust: 500
|
||||||
```
|
```
|
||||||
|
|
||||||
- `manager_url`: The URL of the Manager to connect to. If the setting is blank
|
- `manager_url`: The URL of the Manager to connect to. If the setting is blank
|
||||||
@ -31,10 +34,17 @@ restart_exit_code: 47
|
|||||||
- `restart_exit_code`: Having this set to a non-zero value will mark this Worker
|
- `restart_exit_code`: Having this set to a non-zero value will mark this Worker
|
||||||
as 'restartable'. See [Shut Down & Restart Actions][restarting] for more
|
as 'restartable'. See [Shut Down & Restart Actions][restarting] for more
|
||||||
information.
|
information.
|
||||||
|
- `oom_score_adjust`: an optional value between 0 and 1000, only available on
|
||||||
|
Linux. It configures the Out Of Memory behaviour of the Linux kernel. This is
|
||||||
|
the `oom_score_adj` value for all sub-processes started by the Worker. Set
|
||||||
|
this to a high value, so that when the machine runs out of memory when
|
||||||
|
rendering, it is Blender that gets killed, and not Flamenco Worker itself. For
|
||||||
|
more information, see [Linux Kernel: Per-Process Parameters][per-process-proc].
|
||||||
|
|
||||||
[scripts]: {{< ref "usage/job-types" >}}
|
[scripts]: {{< ref "usage/job-types" >}}
|
||||||
[task-types]: {{< ref "usage/job-types" >}}#task-types
|
[task-types]: {{< ref "usage/job-types" >}}#task-types
|
||||||
[restarting]: {{< ref "usage/worker-actions" >}}#shut-down--restart-actions
|
[restarting]: {{< ref "usage/worker-actions" >}}#shut-down--restart-actions
|
||||||
|
[per-process-proc]: https://docs.kernel.org/filesystems/proc.html#chapter-3-per-process-parameters
|
||||||
|
|
||||||
## Worker-Specific Files
|
## Worker-Specific Files
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user