Skip to content

Commit

Permalink
Add preprocessing child workflow
Browse files Browse the repository at this point in the history
Allows to configure and trigger a custom preprocessing child workflow
to be handled by a different worker. This initial implementation requires
both workers to have access to the same filesystem to share the package.
  • Loading branch information
jraddaoui committed Apr 23, 2024
1 parent 4742be2 commit db1a339
Show file tree
Hide file tree
Showing 11 changed files with 413 additions and 40 deletions.
29 changes: 28 additions & 1 deletion Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,35 @@ KUBE_OVERLAY = 'hack/kube/overlays/dev-a3m'
if PRES_SYS == 'am':
KUBE_OVERLAY = 'hack/kube/overlays/dev-am'

# Load Kustomize YAML
yaml = kustomize(KUBE_OVERLAY)

# Preprocessing
PREPROCESSING_PATH = os.environ.get("PREPROCESSING_PATH", "")
if PREPROCESSING_PATH != "":
# Load preprocessing Tiltfile for Enduro
load_dynamic(PREPROCESSING_PATH + "/Tiltfile.enduro")
# Get Enduro a3m/am worker k8s manifest
if PRES_SYS == "a3m":
pres_yaml, yaml = filter_yaml(yaml, name="^enduro-a3m$", kind="StatefulSet")
else:
pres_yaml, yaml = filter_yaml(yaml, name="^enduro-am$", kind="Deployment")
# Append preprocessing volume and volume mount to worker container,
# this will only work in single node k8s cluster deployments
volume = {"name": "shared-dir", "persistentVolumeClaim": {"claimName": "preprocessing-pvc"}}
volume_mount = {"name": "shared-dir", "mountPath": "/home/enduro/preprocessing"}
pres_obj = decode_yaml(pres_yaml)
if "volumes" not in pres_obj["spec"]["template"]["spec"]:
pres_obj["spec"]["template"]["spec"]["volumes"] = []
pres_obj["spec"]["template"]["spec"]["volumes"].append(volume)
for container in pres_obj["spec"]["template"]["spec"]["containers"]:
if container["name"] in ["enduro-a3m-worker", "enduro-am-worker"]:
container["volumeMounts"].append(volume_mount)
pres_yaml = encode_yaml(pres_obj)
yaml = [yaml, pres_yaml]

# Load Kubernetes resources
k8s_yaml(kustomize(KUBE_OVERLAY))
k8s_yaml(yaml)

# Configure trigger mode
trigger_mode = TRIGGER_MODE_MANUAL
Expand Down
1 change: 1 addition & 0 deletions docs/src/dev-manual/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ This is the developer manual for Enduro SDPS.
- [Dependency management](deps.md)
- [Environment setup](devel.md)
- [Working with Archivematica](archivematica.md)
- [Preprocessing child workflow](preprocessing.md)
- [Logging](logging.md)
- [Makefile](make.md)
- [Testing](testing.md)
9 changes: 9 additions & 0 deletions docs/src/dev-manual/devel.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,14 @@ are planning to use Archivematica as preservation system.
Build and use a local version of a3m. Requires to have the `a3m` repository
cloned as a sibling of this repository folder.

### PREPROCESSING_PATH

Relative path to a preprocessing child workflow repository. It loads a Tiltfile
called `Tiltfile.enduro` from that repository and mounts a presistent volume
claim (PVC) in the preservation system pod. That PVC must be defined in the
preprocessing and be called `preprocessing-pvc`. Check the [Preprocessing child
workflow] docs to configure the child workflow execution.

## Tilt UI helpers

### Upload to Minio
Expand Down Expand Up @@ -259,3 +267,4 @@ is sometimes not setup properly. To solve it, from the Tilt UI, restart the
[visual studio code]: https://code.visualstudio.com/
[working with archivematica]: archivematica.md
[devbox]: https://www.jetpack.io/devbox/docs/quickstart/#install-devbox
[preprocessing child workflow]: preprocessing.md
34 changes: 34 additions & 0 deletions docs/src/dev-manual/preprocessing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Preprocessing child workflow

The processing workflow can be extended with the execution of a preprocessing
child workflow.

## Configuration

### `.tilt.env`

Check the [Tilt environment configuration].

### `enduro.toml`

```toml
# Optional preprocessing child workflow configuration.
[preprocessing]
# enabled triggers the execution of the child workflow, when set to false all other
# options are ignored.
enabled = true
# extract determines if the package extraction happens on the child workflow.
extract = false
# sharedPath is the full path to the directory used to share the package between workflows,
# required when enabled is set to true.
sharedPath = "/home/enduro/preprocessing"

# Temporal configuration to trigger the preprocessing child workflow, all fields are
# required when enabled is set to true.
[preprocessing.temporal]
namespace = "default"
taskQueue = "preprocessing"
workflowName = "preprocessing"
```

[tilt environment configuration]: devel.md#preprocessing_path
18 changes: 18 additions & 0 deletions enduro.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,21 @@ bucket = "sips"
enabled = false
address = ""
samplingRatio = 1.0

# Optional preprocessing child workflow configuration.
[preprocessing]
# enabled triggers the execution of the child workflow, when set to false all other
# options are ignored.
enabled = false
# extract determines if the package extraction happens on the child workflow.
extract = false
# sharedPath is the full path to the directory used to share the package between workflows,
# required when enabled is set to true.
sharedPath = "/home/enduro/preprocessing"

# Temporal configuration to trigger the preprocessing child workflow, all fields are
# required when enabled is set to true.
[preprocessing.temporal]
namespace = "default"
taskQueue = "preprocessing"
workflowName = "preprocessing"
45 changes: 20 additions & 25 deletions internal/config/config.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package config

import (
"errors"
"fmt"
"os"
"strings"
Expand All @@ -13,6 +14,7 @@ import (
"github.com/artefactual-sdps/enduro/internal/api"
"github.com/artefactual-sdps/enduro/internal/db"
"github.com/artefactual-sdps/enduro/internal/event"
"github.com/artefactual-sdps/enduro/internal/preprocessing"
"github.com/artefactual-sdps/enduro/internal/pres"
"github.com/artefactual-sdps/enduro/internal/storage"
"github.com/artefactual-sdps/enduro/internal/telemetry"
Expand All @@ -26,38 +28,31 @@ type ConfigurationValidator interface {
}

type Configuration struct {
Verbosity int
Debug bool
DebugListen string
Verbosity int

A3m a3m.Config
AM am.Config
API api.Config
Database db.Config
Event event.Config
Preservation pres.Config
Storage storage.Config
Temporal temporal.Config
Upload upload.Config
Watcher watcher.Config
Telemetry telemetry.Config
A3m a3m.Config
AM am.Config
API api.Config
Database db.Config
Event event.Config
Preprocessing preprocessing.Config
Preservation pres.Config
Storage storage.Config
Temporal temporal.Config
Upload upload.Config
Watcher watcher.Config
Telemetry telemetry.Config
}

func (c Configuration) Validate() error {
// TODO: should this validate all the fields in Configuration?
if config, ok := interface{}(c.Upload).(ConfigurationValidator); ok {
err := config.Validate()
if err != nil {
return err
}
}
if config, ok := interface{}(c.API.Auth).(ConfigurationValidator); ok {
err := config.Validate()
if err != nil {
return err
}
}
return nil
apiAuthErr := c.API.Auth.Validate()
preprocessingErr := c.Preprocessing.Validate()
uploadErr := c.Upload.Validate()

return errors.Join(apiAuthErr, preprocessingErr, uploadErr)
}

func Read(config *Configuration, configFile string) (found bool, configFileUsed string, err error) {
Expand Down
46 changes: 46 additions & 0 deletions internal/preprocessing/preprocessing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package preprocessing

import "errors"

type Config struct {
// Enable preprocessing child workflow.
Enabled bool
// Extract package in preprocessing.
Extract bool
// Local path shared between workers.
SharedPath string
// Temporal configuration.
Temporal Temporal
}

type Temporal struct {
Namespace string
TaskQueue string
WorkflowName string
}

type WorkflowParams struct {
// Relative path to the shared path.
RelativePath string
}

type WorkflowResult struct {
// Relative path to the shared path.
RelativePath string
}

// Validate implements config.ConfigurationValidator.
func (c Config) Validate() error {
if !c.Enabled {
return nil
}
if c.SharedPath == "" {
return errors.New("sharedPath is required in the [preprocessing] configuration")
}
if c.Temporal.Namespace == "" || c.Temporal.TaskQueue == "" || c.Temporal.WorkflowName == "" {
return errors.New(
"namespace, taskQueue and workflowName are required in the [preprocessing.temporal] configuration",
)
}
return nil
}
65 changes: 65 additions & 0 deletions internal/preprocessing/preprocessing_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package preprocessing_test

import (
"testing"

"gotest.tools/v3/assert"

"github.com/artefactual-sdps/enduro/internal/preprocessing"
)

func TestPreprocessingConfig(t *testing.T) {
t.Parallel()

type test struct {
name string
config preprocessing.Config
wantErr string
}
for _, tt := range []test{
{
name: "Validates if not enabled",
config: preprocessing.Config{
Enabled: false,
},
},
{
name: "Validates with all required fields",
config: preprocessing.Config{
Enabled: true,
SharedPath: "/tmp",
Temporal: preprocessing.Temporal{
Namespace: "default",
TaskQueue: "preprocessing",
WorkflowName: "preprocessing",
},
},
},
{
name: "Returns error if shared path is missing",
config: preprocessing.Config{
Enabled: true,
},
wantErr: "sharedPath is required in the [preprocessing] configuration",
},
{
name: "Returns error if temporal config is missing",
config: preprocessing.Config{
Enabled: true,
SharedPath: "/tmp",
},
wantErr: "namespace, taskQueue and workflowName are required in the [preprocessing.temporal] configuration",
},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

err := tt.config.Validate()
if tt.wantErr != "" {
assert.Error(t, err, tt.wantErr)
return
}
assert.NilError(t, err)
})
}
}
7 changes: 4 additions & 3 deletions internal/workflow/activities/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ type DownloadActivity struct {
}

type DownloadActivityParams struct {
Key string
WatcherName string
Key string
WatcherName string
DestinationPath string
}

type DownloadActivityResult struct {
Expand All @@ -47,7 +48,7 @@ func (a *DownloadActivity) Execute(
"WatcherName", params.WatcherName,
)

destDir, err := os.MkdirTemp("", "enduro")
destDir, err := os.MkdirTemp(params.DestinationPath, "enduro")
if err != nil {
return &DownloadActivityResult{}, temporal_tools.NewNonRetryableError(fmt.Errorf("make temp dir: %v", err))
}
Expand Down
Loading

0 comments on commit db1a339

Please sign in to comment.