diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..d581c92 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,26 @@ +name: Build +on: [push, pull_request, workflow_dispatch] +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v3 + with: + lfs: true + - name: Cache Maven packages + uses: actions/cache@v1 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - name: Set up JDK 21 + uses: actions/setup-java@v3 + with: + java-version: 21 + distribution: adopt + architecture: x64 + - name: Build + run: mvn --batch-mode test diff --git a/README.md b/README.md index 8a94f4e..b86985f 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,11 @@ This command-line utility runs a series of single-threaded workloads using [Phil to redact PII tokens in strings of varying sizes. Workloads can be run multiple times to warm up the JVM or test long-term use. Workloads run for a fixed amount of time rather than a fixed number of iterations. -[![CodeFactor](https://www.codefactor.io/repository/github/resurfaceio/phileas-benchmark/badge)](https://www.codefactor.io/repository/github/resurfaceio/phileas-benchmark) +[![CodeFactor](https://www.codefactor.io/repository/github/philterd/phileas-benchmark/badge)](https://www.codefactor.io/repository/github/resurfaceio/phileas-benchmark) ## Dependencies -* Java 22 +* Java 21 * Maven 3.9.x * [philterd/phileas](https://github.com/philterd/phileas) @@ -25,6 +25,12 @@ java -server -Xmx512M -XX:+AlwaysPreTouch -XX:PerBytecodeRecompilationCutoff=100 java -server -Xmx512M -XX:+AlwaysPreTouch -XX:PerBytecodeRecompilationCutoff=10000 -XX:PerMethodRecompilationCutoff=10000 -jar target/phileas-benchmark-cmd.jar gettysberg_address mask_credit_cards 1 1000 ``` +To get the results back as a JSON object, append a `json` argument to the command: + +``` +java -server -Xmx512M -XX:+AlwaysPreTouch -XX:PerBytecodeRecompilationCutoff=10000 -XX:PerMethodRecompilationCutoff=10000 -jar target/phileas-benchmark-cmd.jar all mask_all 1 15000 json +``` + ### Available documents * hello_world (11 chars) diff --git a/pom.xml b/pom.xml index d18839e..a2f4253 100644 --- a/pom.xml +++ b/pom.xml @@ -18,7 +18,7 @@ maven-compiler-plugin 3.13.0 - 22 + 21 -proc:none @@ -55,6 +55,12 @@ 2.7.0-SNAPSHOT + + com.google.code.gson + gson + 2.11.0 + + com.mscharhag.oleaster diff --git a/src/main/java/ai/philterd/phileas/benchmark/Documents.java b/src/main/java/ai/philterd/phileas/benchmark/Documents.java index 570ff3a..169ad54 100644 --- a/src/main/java/ai/philterd/phileas/benchmark/Documents.java +++ b/src/main/java/ai/philterd/phileas/benchmark/Documents.java @@ -28,7 +28,7 @@ public class Documents { // todo add JSON-encoded documents // todo add documents with PCI matches - public static final String GETTYSBERG_ADDRESS = """ + public static final String GETTYSBURG_ADDRESS = """ Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal. Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this. But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us -- that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the people, for the people, shall not perish from the earth. @@ -82,13 +82,13 @@ public class Documents { public static final List keys = List.of( "hello_world", - "gettysberg_address", + "gettysburg_address", "i_have_a_dream" ); public static final Map map = Map.ofEntries( new AbstractMap.SimpleEntry<>("hello_world", "Hello world"), - new AbstractMap.SimpleEntry<>("gettysberg_address", GETTYSBERG_ADDRESS), + new AbstractMap.SimpleEntry<>("gettysburg_address", GETTYSBURG_ADDRESS), new AbstractMap.SimpleEntry<>("i_have_a_dream", I_HAVE_A_DREAM) ); diff --git a/src/main/java/ai/philterd/phileas/benchmark/Main.java b/src/main/java/ai/philterd/phileas/benchmark/Main.java index 446a69f..f21287e 100644 --- a/src/main/java/ai/philterd/phileas/benchmark/Main.java +++ b/src/main/java/ai/philterd/phileas/benchmark/Main.java @@ -16,7 +16,12 @@ package ai.philterd.phileas.benchmark; +import com.google.gson.Gson; + +import java.util.HashMap; +import java.util.LinkedList; import java.util.List; +import java.util.Map; /** * Run benchmark workloads for Phileas PII engine. @@ -24,46 +29,93 @@ public class Main { public static void main(String[] args) throws Exception { + // show usage statement if needed - if (args.length != 4) { - System.out.println("Usage: java ai.philterd.phileas.benchmark.Main "); + if (args.length != 4 && args.length != 5) { + System.out.println("Usage: java ai.philterd.phileas.benchmark.Main "); throw new IllegalArgumentException("Invalid arguments"); } // read arguments - String arg_document = args[0]; - String arg_redactor = args[1]; - int repetitions = Integer.parseInt(args[2]); - int workload_millis = Integer.parseInt(args[3]); + final String arg_document = args[0]; + final String arg_redactor = args[1]; + final int repetitions = Integer.parseInt(args[2]); + final int workload_millis = Integer.parseInt(args[3]); + + String arg_format = "sysout"; + if(args.length == 5) { + arg_format = args[4]; + } // create redactor based on Phileas PII engine - Redactor redactor = new Redactor(arg_redactor); + final Redactor redactor = new Redactor(arg_redactor); + + final List results = new LinkedList<>(); // repeatedly redact documents and print results - List documents = "all".equals(arg_document) ? Documents.keys : List.of(arg_document); - int[] value_lengths = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 3072, 4096}; + final List documents = "all".equals(arg_document) ? Documents.keys : List.of(arg_document); + final int[] value_lengths = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 3072, 4096}; + for (int i = 0; i < repetitions; i++) { - for (String document : documents) { - try { + + for (final String document : documents) { + + if (!arg_format.equals("json")) { System.out.println("\n------------------------------------------------------------------------------------------"); System.out.println("Using document: " + document); System.out.println("Using redactor: " + arg_redactor); System.out.println("Using workload_millis: " + workload_millis); System.out.println("\nstring_length,calls_per_sec"); - for (int value_length : value_lengths) run_workload(workload_millis, redactor, Documents.get(document).substring(0, value_length)); - } catch (StringIndexOutOfBoundsException e) { - // do nothing, ignore } + + final Map calls = new HashMap<>(); + + for (int value_length : value_lengths) { + + if(Documents.get(document).length() >= value_length) { + + final String value = Documents.get(document).substring(0, value_length); + final long calls_per_sec = run_workload(workload_millis, redactor, value); + + if (!arg_format.equals("json")) { + System.out.println(value.length() + "," + calls_per_sec); + } + + calls.put(value_length, calls_per_sec); + + } else { + break; + } + + } + + final Result result = new Result(); + result.setWorkloadMillis(workload_millis); + result.setRedactor(arg_redactor); + result.setDocument(document); + result.setCallsPerSecond(calls); + + results.add(result); + } + } + + if(arg_format.equals("json")) { + final Gson gson = new Gson(); + System.out.println(gson.toJson(results)); + } + } - private static void run_workload(int millis, Redactor redactor, String value) throws Exception { - long start = System.currentTimeMillis(); + private static long run_workload(int millis, Redactor redactor, String value) throws Exception { + + final long start = System.currentTimeMillis(); long calls = -1; while ((++calls % 100 != 0) || (System.currentTimeMillis() - start < millis)) redactor.filter(value); - long calls_per_sec = calls * 1000 / (System.currentTimeMillis() - start); - System.out.println(value.length() + "," + calls_per_sec); + + return calls * 1000 / (System.currentTimeMillis() - start); + } } diff --git a/src/main/java/ai/philterd/phileas/benchmark/Redactor.java b/src/main/java/ai/philterd/phileas/benchmark/Redactor.java index bec1343..e4dee2f 100644 --- a/src/main/java/ai/philterd/phileas/benchmark/Redactor.java +++ b/src/main/java/ai/philterd/phileas/benchmark/Redactor.java @@ -19,11 +19,32 @@ import ai.philterd.phileas.model.configuration.PhileasConfiguration; import ai.philterd.phileas.model.enums.MimeType; import ai.philterd.phileas.model.policy.Identifiers; -import ai.philterd.phileas.model.policy.IgnoredPattern; import ai.philterd.phileas.model.policy.Policy; -import ai.philterd.phileas.model.policy.filters.*; +import ai.philterd.phileas.model.policy.filters.BankRoutingNumber; +import ai.philterd.phileas.model.policy.filters.BitcoinAddress; +import ai.philterd.phileas.model.policy.filters.CreditCard; +import ai.philterd.phileas.model.policy.filters.DriversLicense; +import ai.philterd.phileas.model.policy.filters.EmailAddress; +import ai.philterd.phileas.model.policy.filters.IbanCode; +import ai.philterd.phileas.model.policy.filters.IpAddress; +import ai.philterd.phileas.model.policy.filters.PassportNumber; +import ai.philterd.phileas.model.policy.filters.PhoneNumber; +import ai.philterd.phileas.model.policy.filters.Ssn; +import ai.philterd.phileas.model.policy.filters.TrackingNumber; +import ai.philterd.phileas.model.policy.filters.Vin; import ai.philterd.phileas.model.policy.filters.strategies.AbstractFilterStrategy; -import ai.philterd.phileas.model.policy.filters.strategies.rules.*; +import ai.philterd.phileas.model.policy.filters.strategies.rules.BankRoutingNumberFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.BitcoinAddressFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.CreditCardFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.DriversLicenseFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.EmailAddressFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.IbanCodeFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.IpAddressFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.PassportNumberFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.PhoneNumberFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.SsnFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.TrackingNumberFilterStrategy; +import ai.philterd.phileas.model.policy.filters.strategies.rules.VinFilterStrategy; import ai.philterd.phileas.model.responses.FilterResponse; import ai.philterd.phileas.services.PhileasFilterService; diff --git a/src/main/java/ai/philterd/phileas/benchmark/Result.java b/src/main/java/ai/philterd/phileas/benchmark/Result.java new file mode 100644 index 0000000..2d91842 --- /dev/null +++ b/src/main/java/ai/philterd/phileas/benchmark/Result.java @@ -0,0 +1,71 @@ +/* + * Copyright 2024 Philterd, LLC @ https://www.philterd.ai + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.philterd.phileas.benchmark; + +import com.google.gson.annotations.SerializedName; + +import java.util.HashMap; +import java.util.Map; + +public class Result { + + private String document; + private String redactor; + + @SerializedName("workload_millis") + private long workloadMillis; + + @SerializedName("calls_per_sec") + private Map callsPerSecond; + + public Result() { + this.callsPerSecond = new HashMap<>(); + } + + public String getDocument() { + return document; + } + + public void setDocument(String document) { + this.document = document; + } + + public String getRedactor() { + return redactor; + } + + public void setRedactor(String redactor) { + this.redactor = redactor; + } + + public long getWorkloadMillis() { + return workloadMillis; + } + + public void setWorkloadMillis(long workloadMillis) { + this.workloadMillis = workloadMillis; + } + + public Map getCallsPerSecond() { + return callsPerSecond; + } + + public void setCallsPerSecond(Map callsPerSecond) { + this.callsPerSecond = callsPerSecond; + } + +} diff --git a/src/main/resources/log4j2.properties b/src/main/resources/log4j2.properties new file mode 100644 index 0000000..329ba7a --- /dev/null +++ b/src/main/resources/log4j2.properties @@ -0,0 +1,5 @@ +rootLogger=OFF, STDOUT +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%-5level] %d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %c{1} - %msg%n