Support pausing jobs #104313
@ -5,8 +5,6 @@ package task_state_machine
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"projects.blender.org/studio/flamenco/pkg/website"
|
||||
David-Zhang-10 marked this conversation as resolved
Outdated
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
@ -120,6 +118,7 @@ func (sm *StateMachine) updateJobAfterTaskStatusChange(
|
||||
return sm.jobStatusIfAThenB(ctx, logger, job, api.JobStatusCompleted, api.JobStatusRequeueing, "task was queued")
|
||||
|
||||
case api.TaskStatusPaused:
|
||||
// Pausing a task has no impact on the job.
|
||||
return nil
|
||||
|
||||
case api.TaskStatusCanceled:
|
||||
@ -214,10 +213,19 @@ func (sm *StateMachine) updateJobOnTaskStatusCompleted(ctx context.Context, logg
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
numActive, _, err := sm.persist.CountTasksOfJobInStatus(ctx, job, api.TaskStatusActive)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if numComplete == numTotal {
|
||||
logger.Info().Msg("all tasks of job are completed, job is completed")
|
||||
return sm.JobStatusChange(ctx, job, api.JobStatusCompleted, "all tasks completed")
|
||||
}
|
||||
if numActive == 0 && job.Status == api.JobStatusPauseRequested {
|
||||
// there is no active task, and the job is in pause-requested status, so we can pause the job
|
||||
logger.Info().Msg("all tasks of job are completed, job is paused")
|
||||
David-Zhang-10 marked this conversation as resolved
Outdated
Sybren A. Stüvel
commented
I don't think this is the right structure. The Also, don't use Better to just use:
It'll also make it easier to move the code into a function of its own. It's the same "single responsibility principle" as before: there should be one piece of code that checks whether pausing a job is complete. I don't think this is the right structure. The `if {}` block now contains a copy of the code below it, which is error-prone. There should be one piece of code responsible for one thing. By duplicating this code, there is the chance that one of the copies goes out of sync with the other, introducing hard to reason about bugs.
Also, don't use `else` after a `return`, it doesn't mean anything.
Better to just use:
```go
if job.Status == api.JobStatusPauseRequested {
numActive, _, err := sm.persist.CountTasksOfJobInStatus(ctx, job, api.TaskStatusActive)
if err != nil {
return err
}
if numActive == 0 {
// There is no active task, and the job is in pause-requested status, so we can pause the job.
failLogger.Info().Msg("No more active tasks, job is paused")
return sm.JobStatusChange(ctx, job, api.JobStatusPaused, "all tasks completed")
}
}
```
It'll also make it easier to move the code into a function of its own. It's the same "single responsibility principle" as before: there should be one piece of code that checks whether pausing a job is complete.
|
||||
return sm.JobStatusChange(ctx, job, api.JobStatusPaused, "all tasks completed")
|
||||
}
|
||||
logger.Info().
|
||||
Int("taskNumTotal", numTotal).
|
||||
Int("taskNumComplete", numComplete).
|
||||
@ -453,7 +461,6 @@ func (sm *StateMachine) pauseTasks(
|
||||
// Any task that might run in the future should get paused.
|
||||
// Active jobs should remain active until finished
|
||||
taskStatusesToPause := []api.TaskStatus{
|
||||
api.TaskStatusActive,
|
||||
api.TaskStatusQueued,
|
||||
api.TaskStatusCanceled,
|
||||
api.TaskStatusSoftFailed,
|
||||
@ -466,15 +473,7 @@ func (sm *StateMachine) pauseTasks(
|
||||
return "", fmt.Errorf("pausing tasks of job %s: %w", job.UUID, err)
|
||||
}
|
||||
|
||||
// If pause was requested, it has now happened, so the job can transition.
|
||||
if job.Status == api.JobStatusPauseRequested {
|
||||
logger.Info().Msg("all tasks of job paused, job can go to 'paused' status")
|
||||
return api.JobStatusPaused, nil
|
||||
}
|
||||
|
||||
// This could mean state transition entered a non-recoverable error state.
|
||||
log.Warn().Str("jobStatus", string(job.Status)).Msgf("unexpected job status in StateMachine::pauseTasks(), please report this at %s", website.BugReportURL)
|
||||
return "", fmt.Errorf("unexpected job status %q in StateMachine::pauseTasks()", job.Status)
|
||||
return api.JobStatusPauseRequested, nil
|
||||
}
|
||||
|
||||
// requeueTasks re-queues all tasks of the job.
|
||||
|
Loading…
Reference in New Issue
Block a user
Keep the newline between built-in packages and 3rd party packages.