Skip to content

Commit

Permalink
fix: scale default number of workers by files (#1068)
Browse files Browse the repository at this point in the history
  • Loading branch information
didroe authored Jun 15, 2023
1 parent 751614b commit 697259e
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 25 deletions.
2 changes: 1 addition & 1 deletion e2e/flags/.snapshots/TestInitCommand
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ scan:
external-rule-dir: []
force: false
internal-domains: []
parallel: 2
parallel: 0
quiet: false
scanner:
- sast
Expand Down
2 changes: 1 addition & 1 deletion e2e/flags/.snapshots/TestMetadataFlags-help-scan
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Scan Flags
--external-rule-dir strings Specify directories paths that contain .yaml files with external rules configuration
--force Disable the cache and runs the detections again
--internal-domains strings Define regular expressions for better classification of private or unreachable domains e.g. --internal-domains=".*.my-company.com,private.sh"
--parallel int Specify the amount of parallelism to use during the scan (default 2)
--parallel int Specify the amount of parallelism to use during the scan
--quiet Suppress non-essential messages
--scanner strings Specify which scanner to use e.g. --scanner=secrets, --scanner=secrets,sast (default [sast])
--skip-path strings Specify the comma separated files and directories to skip. Supports * syntax, e.g. --skip-path users/*.go,users/admin.sql
Expand Down
2 changes: 1 addition & 1 deletion e2e/flags/.snapshots/TestMetadataFlags-scan-help
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Scan Flags
--external-rule-dir strings Specify directories paths that contain .yaml files with external rules configuration
--force Disable the cache and runs the detections again
--internal-domains strings Define regular expressions for better classification of private or unreachable domains e.g. --internal-domains=".*.my-company.com,private.sh"
--parallel int Specify the amount of parallelism to use during the scan (default 2)
--parallel int Specify the amount of parallelism to use during the scan
--quiet Suppress non-essential messages
--scanner strings Specify which scanner to use e.g. --scanner=secrets, --scanner=secrets,sast (default [sast])
--skip-path strings Specify the comma separated files and directories to skip. Supports * syntax, e.g. --skip-path users/*.go,users/admin.sql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Scan Flags
--external-rule-dir strings Specify directories paths that contain .yaml files with external rules configuration
--force Disable the cache and runs the detections again
--internal-domains strings Define regular expressions for better classification of private or unreachable domains e.g. --internal-domains=".*.my-company.com,private.sh"
--parallel int Specify the amount of parallelism to use during the scan (default 2)
--parallel int Specify the amount of parallelism to use during the scan
--quiet Suppress non-essential messages
--scanner strings Specify which scanner to use e.g. --scanner=secrets, --scanner=secrets,sast (default [sast])
--skip-path strings Specify the comma separated files and directories to skip. Supports * syntax, e.g. --skip-path users/*.go,users/admin.sql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Scan Flags
--external-rule-dir strings Specify directories paths that contain .yaml files with external rules configuration
--force Disable the cache and runs the detections again
--internal-domains strings Define regular expressions for better classification of private or unreachable domains e.g. --internal-domains=".*.my-company.com,private.sh"
--parallel int Specify the amount of parallelism to use during the scan (default 2)
--parallel int Specify the amount of parallelism to use during the scan
--quiet Suppress non-essential messages
--scanner strings Specify which scanner to use e.g. --scanner=secrets, --scanner=secrets,sast (default [sast])
--skip-path strings Specify the comma separated files and directories to skip. Supports * syntax, e.g. --skip-path users/*.go,users/admin.sql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Scan Flags
--external-rule-dir strings Specify directories paths that contain .yaml files with external rules configuration
--force Disable the cache and runs the detections again
--internal-domains strings Define regular expressions for better classification of private or unreachable domains e.g. --internal-domains=".*.my-company.com,private.sh"
--parallel int Specify the amount of parallelism to use during the scan (default 2)
--parallel int Specify the amount of parallelism to use during the scan
--quiet Suppress non-essential messages
--scanner strings Specify which scanner to use e.g. --scanner=secrets, --scanner=secrets,sast (default [sast])
--skip-path strings Specify the comma separated files and directories to skip. Supports * syntax, e.g. --skip-path users/*.go,users/admin.sql
Expand Down
1 change: 0 additions & 1 deletion e2e/internal/testhelper/testhelper.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ func CreateCommand(arguments []string) (*exec.Cmd, context.CancelFunc) {
cmd = exec.CommandContext(ctx, "go", arguments...)
}

cmd.Env = append(os.Environ(), "BEARER_DEFAULT_PARALLEL=2")
cmd.Dir = os.Getenv("GITHUB_WORKSPACE")

return cmd, cancel
Expand Down
24 changes: 23 additions & 1 deletion pkg/commands/process/orchestrator/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"io"
"os"
"path"
"runtime"
"sync"

"github.com/hhatto/gocloc"
Expand Down Expand Up @@ -60,12 +61,15 @@ func newOrchestrator(
return nil, ErrFileListEmpty
}

parallel := getParallel(len(files), config)
log.Debug().Msgf("number of workers: %d", parallel)

return &orchestrator{
repository: repository,
config: config,
reportFile: reportFile,
files: files,
maxWorkersSemaphore: make(chan struct{}, config.Scan.Parallel),
maxWorkersSemaphore: make(chan struct{}, parallel),
done: make(chan struct{}),
pool: pool.New(config),
progressBar: bearerprogress.GetProgressBar(len(files), config, "files"),
Expand Down Expand Up @@ -187,3 +191,21 @@ func Scan(repository work.Repository, config settings.Config, goclogResult *gocl
orchestrator.Close()
return err
}

func getParallel(fileCount int, config settings.Config) int {
if config.Scan.Parallel != 0 {
return config.Scan.Parallel
}

result := fileCount / settings.FilesPerWorker

if result == 0 {
return 1
}

if result > runtime.NumCPU() {
return runtime.NumCPU()
}

return result
}
4 changes: 1 addition & 3 deletions pkg/commands/process/settings/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ var (
TimeoutWorkerOnline = 60 * time.Second // Maximum time to wait for a worker process to come online
TimeoutWorkerShutdown = 5 * time.Second // Maximum time to wait for a worker process to shut down cleanly
FileSizeMaximum = 2 * 1000 * 1000 // 2 MB Ignore files larger than the specified value
FilesToBatch = 1 // Specify the number of files to batch per worker
FilesPerWorker = 1000 // By default, start a worker per this many files, up to the number of CPUs
MemoryMaximum = 800 * 1000 * 1000 // 800 MB If the memory needed to scan a file surpasses the specified limit, skip the file.
ExistingWorker = "" // Specify the URL of an existing worker
)
Expand All @@ -33,7 +33,6 @@ type WorkerOptions struct {
TimeoutFileBytesPerSecond int `mapstructure:"timeout-file-bytes-per-second" json:"timeout-file-bytes-per-second" yaml:"timeout-file-bytes-per-second"`
TimeoutWorkerOnline time.Duration `mapstructure:"timeout-worker-online" json:"timeout-worker-online" yaml:"timeout-worker-online"`
FileSizeMaximum int `mapstructure:"file-size-max" json:"file-size-max" yaml:"file-size-max"`
FilesToBatch int `mapstructure:"files-to-batch" json:"files-to-batch" yaml:"files-to-batch"`
MemoryMaximum int `mapstructure:"memory-max" json:"memory-max" yaml:"memory-max"`
ExistingWorker string `mapstructure:"existing-worker" json:"existing-worker" yaml:"existing-worker"`
}
Expand Down Expand Up @@ -262,7 +261,6 @@ func defaultWorkerOptions() WorkerOptions {
TimeoutFileMaximum: TimeoutFileMaximum,
TimeoutFileBytesPerSecond: TimeoutFileBytesPerSecond,
TimeoutWorkerOnline: TimeoutWorkerOnline,
FilesToBatch: FilesToBatch,
FileSizeMaximum: FileSizeMaximum,
MemoryMaximum: MemoryMaximum,
ExistingWorker: ExistingWorker,
Expand Down
15 changes: 1 addition & 14 deletions pkg/flag/scan_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ package flag

import (
"errors"
"os"
"runtime"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -96,7 +93,7 @@ var (
ParallelFlag = Flag{
Name: "parallel",
ConfigName: "scan.parallel",
Value: parallelValue(),
Value: 0,
Usage: "Specify the amount of parallelism to use during the scan",
}
)
Expand Down Expand Up @@ -218,13 +215,3 @@ func getContext(flag *Flag) Context {
flagStr := strings.ToLower(getString(flag))
return Context(flagStr)
}

func parallelValue() int {
if overrideStr := os.Getenv("BEARER_DEFAULT_PARALLEL"); overrideStr != "" {
if override, err := strconv.Atoi(overrideStr); err == nil {
return override
}
}

return runtime.NumCPU()
}

0 comments on commit 697259e

Please sign in to comment.