Skip to content

Commit

Permalink
Merge branch 'main' into adrianl/quant-tool-handle-prequantized-weights
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianlizarraga committed Nov 14, 2024
2 parents eb8981e + 09c9843 commit 7adcbd7
Show file tree
Hide file tree
Showing 30 changed files with 1,310 additions and 223 deletions.
53 changes: 48 additions & 5 deletions .config/1espt/PipelineAutobaseliningConfig.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@ pipelines:
retail:
source:
credscan:
lastModifiedDate: 2024-10-24
lastModifiedDate: 2024-10-25
policheck:
lastModifiedDate: 2024-10-24
lastModifiedDate: 2024-10-25
eslint:
lastModifiedDate: 2024-10-24
lastModifiedDate: 2024-10-25
psscriptanalyzer:
lastModifiedDate: 2024-10-24
lastModifiedDate: 2024-10-25
armory:
lastModifiedDate: 2024-10-24
lastModifiedDate: 2024-10-25
usedNonDefaultBranch: true
1299:
retail:
source:
Expand All @@ -25,10 +26,52 @@ pipelines:
lastModifiedDate: 2024-10-25
armory:
lastModifiedDate: 2024-10-25
policheck:
lastModifiedDate: 2024-10-29
binary:
credscan:
lastModifiedDate: 2024-10-25
binskim:
lastModifiedDate: 2024-10-25
spotbugs:
lastModifiedDate: 2024-10-25
1625:
retail:
source:
credscan:
lastModifiedDate: 2024-11-05
policheck:
lastModifiedDate: 2024-11-05
eslint:
lastModifiedDate: 2024-11-05
psscriptanalyzer:
lastModifiedDate: 2024-11-05
armory:
lastModifiedDate: 2024-11-05
binary:
credscan:
lastModifiedDate: 2024-11-13
binskim:
lastModifiedDate: 2024-11-13
spotbugs:
lastModifiedDate: 2024-11-13
1626:
retail:
source:
credscan:
lastModifiedDate: 2024-11-13
policheck:
lastModifiedDate: 2024-11-13
eslint:
lastModifiedDate: 2024-11-13
psscriptanalyzer:
lastModifiedDate: 2024-11-13
armory:
lastModifiedDate: 2024-11-13
binary:
credscan:
lastModifiedDate: 2024-11-13
binskim:
lastModifiedDate: 2024-11-13
spotbugs:
lastModifiedDate: 2024-11-13
43 changes: 43 additions & 0 deletions .config/guardian/.gdnbaselines
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"properties": {
"helpUri": "https://eng.ms/docs/microsoft-security/security/azure-security/cloudai-security-fundamentals-engineering/security-integration/guardian-wiki/microsoft-guardian/general/baselines"
},
"version": "1.0.0",
"baselines": {
"default": {
"name": "default",
"createdDate": "2024-11-13 00:40:35Z",
"lastUpdatedDate": "2024-11-13 00:40:35Z"
}
},
"results": {
"48f03e2797fc40ecea50f878a0268947c7e13db1b2fa51aa3981246844fc4c68": {
"signature": "48f03e2797fc40ecea50f878a0268947c7e13db1b2fa51aa3981246844fc4c68",
"alternativeSignatures": [],
"target": "ScanTelemetry_20241113003616898.json",
"line": 1,
"memberOf": [
"default"
],
"tool": "credscan",
"ruleId": "CSCAN-AZURE0130",
"createdDate": "2024-11-13 00:40:35Z",
"expirationDate": "2025-05-02 01:29:47Z",
"justification": "This error is baselined with an expiration date of 180 days from 2024-11-13 01:29:47Z"
},
"9cb6eddb3f3e886ad06cae65f5886412ff0c5fb0b96d4e943e4efa237be617b1": {
"signature": "9cb6eddb3f3e886ad06cae65f5886412ff0c5fb0b96d4e943e4efa237be617b1",
"alternativeSignatures": [],
"target": "ScanTelemetry_20241113111547065.json",
"line": 1,
"memberOf": [
"default"
],
"tool": "credscan",
"ruleId": "CSCAN-AZURE0130",
"createdDate": "2024-11-13 11:20:17Z",
"expirationDate": "2025-05-02 11:55:15Z",
"justification": "This error is baselined with an expiration date of 180 days from 2024-11-13 11:55:15Z"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,23 @@ struct OrtTensorRTProviderOptionsV2 {
* directory by means of the "trt_onnx_model_folder_path" option.
*
*/
int trt_dump_ep_context_model{0}; // Dump EP context node model
const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. Can be a path or a file name or a file name with path.
int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
int trt_weight_stripped_engine_enable{0}; // Enable weight-stripped engine build. Default 0 = false,
// nonzero = true
const char* trt_onnx_model_folder_path{nullptr}; // Folder path relative to the current working directory for
// the ONNX model containing the weights (applicable only when
// the "trt_weight_stripped_engine_enable" option is enabled)
const void* trt_onnx_bytestream{nullptr}; // The byte stream of th original ONNX model containing the weights
// (applicable only when the "trt_weight_stripped_engine_enable"
// option is enabled)
// can be updated using: UpdateTensorRTProviderOptionsWithValue
size_t trt_onnx_bytestream_size{0}; // size of the byte stream provided as "trt_onnx_bytestream"
// can be updated using: UpdateTensorRTProviderOptionsWithValue

const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix
int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true
int trt_dump_ep_context_model{0}; // Dump EP context node model
const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. Can be a path or a file name or a file name with path.
int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
int trt_weight_stripped_engine_enable{0}; // Enable weight-stripped engine build. Default 0 = false,
// nonzero = true
const char* trt_onnx_model_folder_path{nullptr}; // Folder path relative to the current working directory for
// the ONNX model containing the weights (applicable only when
// the "trt_weight_stripped_engine_enable" option is enabled)
const void* trt_onnx_bytestream{nullptr}; // The byte stream of th original ONNX model containing the weights
// (applicable only when the "trt_weight_stripped_engine_enable"
// option is enabled)
// can be updated using: UpdateTensorRTProviderOptionsWithValue
size_t trt_onnx_bytestream_size{0}; // size of the byte stream provided as "trt_onnx_bytestream"
// can be updated using: UpdateTensorRTProviderOptionsWithValue
const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix
int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true
const char* trt_op_types_to_exclude{"NonMaxSuppression,NonZero,RoiAlign"}; // Exclude specific ops from running on TRT.
// There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) from TRT versions 10.0 to 10.7.
// TRT EP excludes DDS ops from running on TRT by default, user can override default value with empty string to include all ops.
};
1 change: 1 addition & 0 deletions js/web/docs/webgpu-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Do not modify directly.*
| Reshape | ai.onnx(5-12,13,14-18,19-20,21+) | no GPU kernel |
| Resize | ai.onnx(10,11-12,13-17,18,19+); com.ms.internal.nhwc(10,11-12,13-17,18,19+) | CoordinateTransformMode align_corners is not supported with downsampling |
| RotaryEmbedding | com.microsoft(1+) | |
| ScatterND | ai.onnx(11-12,13-15,16-17,18+) | |
| Shape | ai.onnx(1-12,13-14,15-18,19-20,21+) | no GPU kernel; an ORT warning is generated - need to fix |
| Sigmoid | ai.onnx(6-12,13+) | |
| SimplifiedLayerNormalization | ai.onnx(1+) | |
Expand Down
2 changes: 2 additions & 0 deletions js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import { pad } from './ops/pad';
import * as pool from './ops/pool';
import { dequantizeLinear, parseDequantizeLinearAttributes } from './ops/quantize-linear';
import { range } from './ops/range';
import { scatterND, parseScatterNDAttributes } from './ops/scatter-nd';
import {
reduceL1,
reduceL2,
Expand Down Expand Up @@ -140,6 +141,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
['Relu', [unaryOps.relu]],
['Resize', [resize, parseResizeAttributes]],
['RotaryEmbedding', [rotaryEmbedding]],
['ScatterND', [scatterND, parseScatterNDAttributes]],
['Sigmoid', [unaryOps.sigmoid]],
['Sin', [unaryOps.sin]],
['Sinh', [unaryOps.sinh]],
Expand Down
19 changes: 16 additions & 3 deletions js/web/lib/wasm/jsep/webgpu/ops/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ export interface IndicesHelper {
/**
* whether the helper is for an input, an output or an internal variable.
*/
readonly usage: 'input' | 'output' | 'internal';
readonly usage: 'input' | 'output' | 'atomicOutput' | 'internal';

/**
* the rank of the input or output.
Expand Down Expand Up @@ -733,6 +733,20 @@ export const outputVariable = (
components: 1 | 2 | 3 | 4 = 1,
): IndicesHelper => createIndicesHelper(name, type, shapeOrRank, 'output', components);

/**
* Create a IndicesHelper for an atomic output.
*
* @param name - the name of the output.
* @param type - the tensor type of the output.
* @param shapeOrRank - the tensor shape or the rank of the output.
* @returns an IndicesHelper for the output.
*/
export const atomicOutputVariable = (
name: string,
type: number,
shapeOrRank: number | readonly number[],
): IndicesHelper => createIndicesHelper(name, type, shapeOrRank, 'atomicOutput', 1);

/**
* Create a IndicesHelper for an internal variable.
*
Expand Down Expand Up @@ -905,9 +919,8 @@ class ShaderHelperImpl implements ShaderHelper {
}
this.variables.push(variable);
this.appendVariableUniforms(variable);

const access = variable.usage === 'input' ? 'read' : 'read_write';
const storageType = variable.type.storage;
const storageType = variable.usage === 'atomicOutput' ? `atomic<i32>` : variable.type.storage;
return `@group(0) @binding(${bindingIndex}) var<storage, ${access}> ${variable.name}: array<${storageType}>;`;
}

Expand Down
Loading

0 comments on commit 7adcbd7

Please sign in to comment.