Skip to content

Commit

Permalink
Check PREMIS validity after preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
mcantelon committed Oct 24, 2024
1 parent f924713 commit 146dc51
Show file tree
Hide file tree
Showing 9 changed files with 1,325 additions and 3 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
!go.mod
!go.sum
!main.go
!hack/xsd/premis.xsd
27 changes: 27 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
ARG TARGET=enduro
ARG GO_VERSION

FROM golang:${GO_VERSION}-bookworm AS build-libxml
RUN apt-get update && apt-get install -y --no-install-recommends libxml2-utils

FROM golang:${GO_VERSION}-alpine AS build-go
WORKDIR /src
ENV CGO_ENABLED=0
Expand Down Expand Up @@ -61,10 +64,34 @@ FROM base AS enduro-a3m-worker
COPY --from=build-enduro-a3m-worker --link /out/enduro-a3m-worker /home/enduro/bin/enduro-a3m-worker
COPY --from=build-enduro-a3m-worker --link /src/enduro.toml /home/enduro/.config/enduro.toml
CMD ["/home/enduro/bin/enduro-a3m-worker", "--config", "/home/enduro/.config/enduro.toml"]
COPY hack/xsd/premis.xsd /home/enduro/premis.xsd
COPY --from=build-libxml /usr/bin/xmllint /usr/bin/xmllint
COPY --from=build-libxml /lib/x86_64-linux-gnu/libxml2.so.2 /lib/x86_64-linux-gnu/libxml2.so.2
COPY --from=build-libxml /lib/x86_64-linux-gnu/libc.so.6 /lib/x86_64-linux-gnu/libc.so.6
COPY --from=build-libxml /lib/x86_64-linux-gnu/libicuuc.so.72 /lib/x86_64-linux-gnu/libicuuc.so.72
COPY --from=build-libxml /lib/x86_64-linux-gnu/libz.so.1 /lib/x86_64-linux-gnu/libz.so.1
COPY --from=build-libxml /lib/x86_64-linux-gnu/liblzma.so.5 /lib/x86_64-linux-gnu/liblzma.so.5
COPY --from=build-libxml /lib/x86_64-linux-gnu/libm.so.6 /lib/x86_64-linux-gnu/libm.so.6
COPY --from=build-libxml /lib64/ld-linux-x86-64.so.2 /lib64/ld-linux-x86-64.so.2
COPY --from=build-libxml /lib/x86_64-linux-gnu/libicudata.so.72 /lib/x86_64-linux-gnu/libicudata.so.72
COPY --from=build-libxml /lib/x86_64-linux-gnu/libstdc++.so.6 /lib/x86_64-linux-gnu/libstdc++.so.6
COPY --from=build-libxml /lib/x86_64-linux-gnu/libgcc_s.so.1 /lib/x86_64-linux-gnu/libgcc_s.so.1

FROM base AS enduro-am-worker
COPY --from=build-enduro-am-worker --link /out/enduro-am-worker /home/enduro/bin/enduro-am-worker
COPY --from=build-enduro-am-worker --link /src/enduro.toml /home/enduro/.config/enduro.toml
CMD ["/home/enduro/bin/enduro-am-worker", "--config", "/home/enduro/.config/enduro.toml"]
COPY hack/xsd/premis.xsd /home/enduro/premis.xsd
COPY --from=build-libxml /usr/bin/xmllint /usr/bin/xmllint
COPY --from=build-libxml /lib/x86_64-linux-gnu/libxml2.so.2 /lib/x86_64-linux-gnu/libxml2.so.2
COPY --from=build-libxml /lib/x86_64-linux-gnu/libc.so.6 /lib/x86_64-linux-gnu/libc.so.6
COPY --from=build-libxml /lib/x86_64-linux-gnu/libicuuc.so.72 /lib/x86_64-linux-gnu/libicuuc.so.72
COPY --from=build-libxml /lib/x86_64-linux-gnu/libz.so.1 /lib/x86_64-linux-gnu/libz.so.1
COPY --from=build-libxml /lib/x86_64-linux-gnu/liblzma.so.5 /lib/x86_64-linux-gnu/liblzma.so.5
COPY --from=build-libxml /lib/x86_64-linux-gnu/libm.so.6 /lib/x86_64-linux-gnu/libm.so.6
COPY --from=build-libxml /lib64/ld-linux-x86-64.so.2 /lib64/ld-linux-x86-64.so.2
COPY --from=build-libxml /lib/x86_64-linux-gnu/libicudata.so.72 /lib/x86_64-linux-gnu/libicudata.so.72
COPY --from=build-libxml /lib/x86_64-linux-gnu/libstdc++.so.6 /lib/x86_64-linux-gnu/libstdc++.so.6
COPY --from=build-libxml /lib/x86_64-linux-gnu/libgcc_s.so.1 /lib/x86_64-linux-gnu/libgcc_s.so.1

FROM ${TARGET}
5 changes: 5 additions & 0 deletions cmd/enduro-a3m-worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/artefactual-sdps/temporal-activities/bagvalidate"
"github.com/artefactual-sdps/temporal-activities/bucketupload"
"github.com/artefactual-sdps/temporal-activities/removepaths"
"github.com/artefactual-sdps/temporal-activities/xmlvalidate"
"github.com/hashicorp/go-cleanhttp"
"github.com/oklog/run"
"github.com/prometheus/client_golang/prometheus/promhttp"
Expand Down Expand Up @@ -252,6 +253,10 @@ func main() {
archiveextract.New(cfg.ExtractActivity).Execute,
temporalsdk_activity.RegisterOptions{Name: archiveextract.Name},
)
w.RegisterActivityWithOptions(
xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute,
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
)

Check warning on line 259 in cmd/enduro-a3m-worker/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/enduro-a3m-worker/main.go#L256-L259

Added lines #L256 - L259 were not covered by tests
w.RegisterActivityWithOptions(
activities.NewClassifyPackageActivity().Execute,
temporalsdk_activity.RegisterOptions{Name: activities.ClassifyPackageActivityName},
Expand Down
5 changes: 5 additions & 0 deletions cmd/enduro-am-worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/artefactual-sdps/temporal-activities/bagvalidate"
"github.com/artefactual-sdps/temporal-activities/bucketupload"
"github.com/artefactual-sdps/temporal-activities/removepaths"
"github.com/artefactual-sdps/temporal-activities/xmlvalidate"
"github.com/hashicorp/go-cleanhttp"
"github.com/jonboulle/clockwork"
"github.com/oklog/run"
Expand Down Expand Up @@ -329,6 +330,10 @@ func main() {
bucketupload.New(failedPIPs).Execute,
temporalsdk_activity.RegisterOptions{Name: activities.SendToFailedPIPsName},
)
w.RegisterActivityWithOptions(
xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute,
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
)

Check warning on line 336 in cmd/enduro-am-worker/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/enduro-am-worker/main.go#L333-L336

Added lines #L333 - L336 were not covered by tests

g.Add(
func() error {
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
github.com/XSAM/otelsql v0.29.0
github.com/alicebob/miniredis/v2 v2.32.1
github.com/artefactual-labs/bagit-gython v0.2.0
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4
github.com/coreos/go-oidc/v3 v3.10.0
github.com/cyphar/filepath-securejoin v0.2.4
github.com/dolmen-go/contextio v1.0.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -442,8 +442,8 @@ github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew
github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4=
github.com/artefactual-labs/bagit-gython v0.2.0 h1:Zje4Lb1goZVUPoxpc/k65sWtYpNgK9Rvphvaok5cYzE=
github.com/artefactual-labs/bagit-gython v0.2.0/go.mod h1:C+hFZQMDnji1hjGt3nrlMK3BahaBhvo/hU2uqd+Q9Z4=
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b h1:kTOc2pbkdII6/Z84Bus1q52z5KAOaT8vLpfRoOs1l1I=
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b/go.mod h1:FVh79rCGNlUU1QnioAU+lrSjLqrA1PJFYKIhWPsmyug=
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4 h1:WF95IOkZRVSCST/26SAqPYsUrtUuJpavBht6lvdeKl0=
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4/go.mod h1:FVh79rCGNlUU1QnioAU+lrSjLqrA1PJFYKIhWPsmyug=
github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU=
github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
github.com/aws/aws-sdk-go-v2 v1.30.3 h1:jUeBtG0Ih+ZIFH0F4UkmL9w3cSpaMv9tYYDbzILP8dY=
Expand Down
Loading

0 comments on commit 146dc51

Please sign in to comment.