Skip to content

Commit

Permalink
Merge pull request #1514 from gchq/feature/BAI-1458-add-file-scanning…
Browse files Browse the repository at this point in the history
…-connector

File scanning connector to handle multiple AV tools
  • Loading branch information
ARADDCC002 authored Nov 1, 2024
2 parents 1e8161b + c3362fa commit 1692d87
Show file tree
Hide file tree
Showing 73 changed files with 499 additions and 260 deletions.
8 changes: 4 additions & 4 deletions backend/config/default.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,6 @@ module.exports = {
text: '',
startTimestamp: '',
},

avScanning: {
enabled: false,
},
},

connectors: {
Expand All @@ -208,6 +204,10 @@ module.exports = {
audit: {
kind: 'silly',
},

fileScanners: {
kinds: [],
},
},

s3: {
Expand Down
6 changes: 3 additions & 3 deletions backend/config/docker_compose.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ module.exports = {
},
},

ui: {
avScanning: {
enabled: false,
connectors: {
fileScanners: {
kinds: ['clamAV'],
},
},
}
12 changes: 12 additions & 0 deletions backend/src/connectors/fileScanning/Base.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { FileInterface, ScanStateKeys } from '../../models/File.js'
export interface FileScanResult {
toolName: string
state: ScanStateKeys
isInfected?: boolean
viruses?: string[]
}

export abstract class BaseFileScanningConnector {
abstract info(): string[]
abstract scan(file: FileInterface): Promise<FileScanResult[]>
}
67 changes: 67 additions & 0 deletions backend/src/connectors/fileScanning/clamAv.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import NodeClam from 'clamscan'
import { Readable } from 'stream'

import { getObjectStream } from '../../clients/s3.js'
import { FileInterfaceDoc, ScanState } from '../../models/File.js'
import log from '../../services/log.js'
import config from '../../utils/config.js'
import { ConfigurationError } from '../../utils/error.js'
import { BaseFileScanningConnector, FileScanResult } from './Base.js'

let av: NodeClam
export const clamAvToolName = 'Clam AV'

export class ClamAvFileScanningConnector extends BaseFileScanningConnector {
constructor() {
super()
}

info() {
return [clamAvToolName]
}

async init() {
try {
av = await new NodeClam().init({ clamdscan: config.avScanning.clamdscan })
} catch (error) {
throw ConfigurationError('Could not scan file as Clam AV is not running.', {
clamAvConfig: config.avScanning,
})
}
}

async scan(file: FileInterfaceDoc): Promise<FileScanResult[]> {
if (!av) {
throw ConfigurationError(
'Clam AV does not look like it is running. Check that it has been correctly initialised by calling the init function.',
{
clamAvConfig: config.avScanning,
},
)
}
const s3Stream = (await getObjectStream(file.bucket, file.path)).Body as Readable
try {
const { isInfected, viruses } = await av.scanStream(s3Stream)
log.info(
{ modelId: file.modelId, fileId: file._id, name: file.name, result: { isInfected, viruses } },
'Scan complete.',
)
return [
{
toolName: clamAvToolName,
state: ScanState.Complete,
isInfected,
viruses,
},
]
} catch (error) {
log.error({ error, modelId: file.modelId, fileId: file._id, name: file.name }, 'Scan errored.')
return [
{
toolName: clamAvToolName,
state: ScanState.Error,
},
]
}
}
}
39 changes: 39 additions & 0 deletions backend/src/connectors/fileScanning/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import config from '../../utils/config.js'
import { ConfigurationError } from '../../utils/error.js'
import { BaseFileScanningConnector } from './Base.js'
import { ClamAvFileScanningConnector } from './clamAv.js'
import { FileScanningWrapper } from './wrapper.js'

export const FileScanKind = {
ClamAv: 'clamAV',
} as const
export type FileScanKindKeys = (typeof FileScanKind)[keyof typeof FileScanKind]

const fileScanConnectors: BaseFileScanningConnector[] = []
let scannerWrapper: undefined | BaseFileScanningConnector = undefined
export function runFileScanners(cache = true) {
if (scannerWrapper && cache) {
return scannerWrapper
}
config.connectors.fileScanners.kinds.forEach(async (fileScanner) => {
switch (fileScanner) {
case FileScanKind.ClamAv:
try {
const scanner = new ClamAvFileScanningConnector()
await scanner.init()
fileScanConnectors.push(scanner)
} catch (error) {
throw ConfigurationError('Could not configure or initialise Clam AV')
}
break
default:
throw ConfigurationError(`'${fileScanner}' is not a valid file scanning kind.`, {
validKinds: Object.values(FileScanKind),
})
}
})
scannerWrapper = new FileScanningWrapper(fileScanConnectors)
return scannerWrapper
}

export default runFileScanners()
34 changes: 34 additions & 0 deletions backend/src/connectors/fileScanning/wrapper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { FileInterface } from '../../models/File.js'
import log from '../../services/log.js'
import { BaseFileScanningConnector, FileScanResult } from './Base.js'

export class FileScanningWrapper extends BaseFileScanningConnector {
scanners: BaseFileScanningConnector[] = []

constructor(scanners: BaseFileScanningConnector[]) {
super()
this.scanners = scanners
}

info() {
const scannerNames: string[] = []
for (const scanner of this.scanners) {
scannerNames.push(...scanner.info())
}
return scannerNames
}

async scan(file: FileInterface) {
const results: FileScanResult[] = []
for (const scanner of this.scanners) {
log.info(
{ modelId: file.modelId, fileId: file._id, name: file.name, toolName: scanner.info().pop() },
'Scan started.',
)
const scannerResults = await scanner.scan(file)
results.push(...scannerResults)
}

return results
}
}
21 changes: 11 additions & 10 deletions backend/src/models/File.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { Document, model, ObjectId, Schema } from 'mongoose'
import MongooseDelete from 'mongoose-delete'

import { FileScanResult } from '../connectors/fileScanning/Base.js'

// This interface stores information about the properties on the base object.
// It should be used for plain object representations, e.g. for sending to the
// client.
Expand All @@ -17,11 +19,7 @@ export interface FileInterface {

complete: boolean

avScan: {
state: ScanStateKeys
isInfected?: boolean
viruses?: Array<unknown>
}
avScan: Array<FileScanResult>

createdAt: Date
updatedAt: Date
Expand Down Expand Up @@ -51,11 +49,14 @@ const FileSchema = new Schema<FileInterface>(
bucket: { type: String, required: true },
path: { type: String, required: true },

avScan: {
state: { type: String, enum: Object.values(ScanState), default: 'notScanned' },
isInfected: { type: Boolean },
viruses: [{ type: String }],
},
avScan: [
{
toolName: { type: String },
state: { type: String, enum: Object.values(ScanState) },
isInfected: { type: Boolean },
viruses: [{ type: String }],
},
],

complete: { type: Boolean, default: false },
},
Expand Down
3 changes: 3 additions & 0 deletions backend/src/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { getDockerRegistryAuth } from './routes/v1/registryAuth.js'
import { getCurrentUser } from './routes/v2/entities/getCurrentUser.js'
import { getEntities } from './routes/v2/entities/getEntities.js'
import { getEntityLookup } from './routes/v2/entities/getEntityLookup.js'
import { getFilescanningInfo } from './routes/v2/filescanning/getFilescanningInfo.js'
import { deleteAccessRequest } from './routes/v2/model/accessRequest/deleteAccessRequest.js'
import { getAccessRequest } from './routes/v2/model/accessRequest/getAccessRequest.js'
import { getAccessRequestCurrentUserPermissions } from './routes/v2/model/accessRequest/getAccessRequestCurrentUserPermissions.js'
Expand Down Expand Up @@ -217,6 +218,8 @@ server.delete('/api/v2/user/token/:accessKey', ...deleteUserToken)

server.get('/api/v2/specification', ...getSpecification)

server.get('/api/v2/filescanning/info', ...getFilescanningInfo)

// Python docs
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
Expand Down
15 changes: 15 additions & 0 deletions backend/src/routes/v2/filescanning/getFilescanningInfo.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import bodyParser from 'body-parser'
import { Request, Response } from 'express'

import scanners from '../../../connectors/fileScanning/index.js'

interface GetFileScanningInfoResponse {
scanners: string[]
}

export const getFilescanningInfo = [
bodyParser.json(),
async (req: Request, res: Response<GetFileScanningInfoResponse>) => {
return res.json({ scanners: scanners.info() })
},
]
65 changes: 29 additions & 36 deletions backend/src/services/file.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import NodeClam from 'clamscan'
import { Schema } from 'mongoose'
import { Readable } from 'stream'

import { getObjectStream, putObjectStream } from '../clients/s3.js'
import { FileAction } from '../connectors/authorisation/actions.js'
import authorisation from '../connectors/authorisation/index.js'
import { FileScanResult } from '../connectors/fileScanning/Base.js'
import scanners from '../connectors/fileScanning/index.js'
import FileModel, { ScanState } from '../models/File.js'
import { UserInterface } from '../models/User.js'
import config from '../utils/config.js'
import { BadReq, Forbidden, NotFound } from '../utils/error.js'
import { longId } from '../utils/id.js'
import log from './log.js'
import { getModelById } from './model.js'
import { removeFileFromReleases } from './release.js'

let av: NodeClam

export async function uploadFile(user: UserInterface, modelId: string, name: string, mime: string, stream: Readable) {
const model = await getModelById(user, modelId)
if (model.settings.mirror.sourceModelId) {
Expand All @@ -38,43 +37,37 @@ export async function uploadFile(user: UserInterface, modelId: string, name: str

await file.save()

if (config.ui.avScanning.enabled) {
if (!av) {
try {
av = await new NodeClam().init({ clamdscan: config.avScanning.clamdscan })
} catch (error) {
log.error(error, 'Unable to connect to ClamAV.')
return file
}
}
const avStream = av.passthrough()
const s3Stream = (await getObjectStream(file.bucket, file.path)).Body as Readable
s3Stream.pipe(avStream)
log.info({ modelId, fileId: file._id, name }, 'Scan started.')

avStream.on('scan-complete', async (result) => {
log.info({ result, modelId, fileId: file._id, name }, 'Scan complete.')
await file.update({
avScan: { state: ScanState.Complete, isInfected: result.isInfected, viruses: result.viruses },
})
})
avStream.on('error', async (error) => {
log.error({ error, modelId, fileId: file._id, name }, 'Scan errored.')
await file.update({
avScan: { state: ScanState.Error },
})
})
avStream.on('timeout', async (error) => {
log.error({ error, modelId, fileId: file._id, name }, 'Scan timed out.')
await file.update({
avScan: { state: ScanState.Error },
})
})
if (scanners.info()) {
const resultsInprogress = scanners.info().map((scannerName) => ({
toolName: scannerName,
state: ScanState.InProgress,
}))
await updateFileWithResults(file._id, resultsInprogress)
scanners.scan(file).then((resultsArray) => updateFileWithResults(file._id, resultsArray))
}

return file
}

async function updateFileWithResults(_id: Schema.Types.ObjectId, results: FileScanResult[]) {
for (const result of results) {
const updateExistingResult = await FileModel.updateOne(
{ _id, 'avScan.toolName': result.toolName },
{
$set: { 'avScan.$': result },
},
)
if (updateExistingResult.modifiedCount === 0) {
await FileModel.updateOne(
{ _id },
{
$set: { avScan: { toolName: result.toolName, state: result.state } },
},
)
}
}
}

export async function downloadFile(user: UserInterface, fileId: string, range?: { start: number; end: number }) {
const file = await getFileById(user, fileId)
const model = await getModelById(user, file.modelId)
Expand Down
7 changes: 4 additions & 3 deletions backend/src/services/mirroredModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { sign } from '../clients/kms.js'
import { getObjectStream, putObjectStream } from '../clients/s3.js'
import { ModelAction } from '../connectors/authorisation/actions.js'
import authorisation from '../connectors/authorisation/index.js'
import scanners from '../connectors/fileScanning/index.js'
import { FileInterfaceDoc, ScanState } from '../models/File.js'
import { ModelDoc } from '../models/Model.js'
import { ModelCardRevisionInterface } from '../models/ModelCardRevision.js'
Expand Down Expand Up @@ -354,7 +355,7 @@ async function checkReleaseFiles(user: UserInterface, modelId: string, semvers:
}
}

if (config.ui.avScanning.enabled) {
if (scanners.info()) {
const files: FileInterfaceDoc[] = await getFilesByIds(user, modelId, fileIds)
const scanErrors: {
missingScan: Array<{ name: string; id: string }>
Expand All @@ -364,9 +365,9 @@ async function checkReleaseFiles(user: UserInterface, modelId: string, semvers:
for (const file of files) {
if (!file.avScan) {
scanErrors.missingScan.push({ name: file.name, id: file.id })
} else if (file.avScan.state !== ScanState.Complete) {
} else if (file.avScan.some((scanResult) => scanResult.state !== ScanState.Complete)) {
scanErrors.incompleteScan.push({ name: file.name, id: file.id })
} else if (file.avScan.isInfected) {
} else if (file.avScan.some((scanResult) => scanResult.isInfected)) {
scanErrors.failedScan.push({ name: file.name, id: file.id })
}
}
Expand Down
4 changes: 0 additions & 4 deletions backend/src/types/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,4 @@ export interface UiConfig {
text: string
startTimestamp: string
}

avScanning: {
enabled: boolean
}
}
Loading

0 comments on commit 1692d87

Please sign in to comment.