diff --git a/docs/source/debugging-optimizing/numactl-utility.md b/docs/source/debugging-optimizing/numactl-utility.md index 86bf11ef32..2c0e1a4b00 100644 --- a/docs/source/debugging-optimizing/numactl-utility.md +++ b/docs/source/debugging-optimizing/numactl-utility.md @@ -52,7 +52,7 @@ Given the architecture above, to run the DeepSparse Engine on the first four CPU Appending `--preferred 1` is needed here since the DeepSparse Engine is being bound to CPUs on the second socket. -Note that using more than two sockets may not offer improvements over two sockets; if you have options, try different scenarios to see which setup is ideal for your use case. For batch size considerations, use an amount that is evenly divisible by the number of sockets you intend to use. +Note: When running on multiple sockets using a batch size that is evenly divisible by the number of sockets will yield the best performance. ## DeepSparse Engine and Thread Pinning diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py index 1335299fb7..ff146bd13f 100644 --- a/src/deepsparse/engine.py +++ b/src/deepsparse/engine.py @@ -122,9 +122,6 @@ class Engine(object): Note 1: Engines are compiled for a specific batch size and for a specific number of CPU cores. - Note 2: multi socket support is not yet built in to the Engine, - all execution assumes single socket - | Example: | # create an engine for batch size 1 on all available cores | engine = Engine("path/to/onnx", batch_size=1, num_cores=None) @@ -224,8 +221,7 @@ def num_cores(self) -> int: @property def num_sockets(self) -> int: """ - :return: The number of sockets the engine is compiled to run on; - only current support is 1 + :return: The number of sockets the engine is compiled to run on """ return self._num_sockets @@ -501,8 +497,8 @@ def compile_model( """ Convenience function to compile a model in the DeepSparse Engine from an ONNX file for inference. - Gives defaults of batch_size == 1 and num_cores == None - (will use all physical cores available on a single socket). + Gives defaults of batch_size == 1, num_cores == None and num_sockets = None + (will use all physical cores available on all available sockets). :param model: Either a path to the model's onnx file, a SparseZoo model stub prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File @@ -537,6 +533,8 @@ def benchmark_model( Gives defaults of batch_size == 1 and num_cores == None (will use all physical cores available on a single socket). + Note 1: Benchmarking is currently only supported on a single socket. + :param model: Either a path to the model's onnx file, a SparseZoo model stub prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File object that defines the neural network @@ -592,6 +590,8 @@ def analyze_model( Gives defaults of batch_size == 1 and num_cores == None (will use all physical cores available on a single socket). + Note 1: Analysis is currently only supported on a single socket. + :param model: Either a path to the model's onnx file, a SparseZoo model stub prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File object that defines the neural network graph definition to analyze