From ba45c3def54e69ab5bd1647dd8e4c6232ff5c1ef Mon Sep 17 00:00:00 2001
From: bnellnm <49004751+bnellnm@users.noreply.github.com>
Date: Mon, 22 Mar 2021 16:14:08 -0400
Subject: [PATCH] Fix documentation related to number of sockets (#63)

* Fix documentation related to number of sockets

* Update src/deepsparse/engine.py

Co-authored-by: Michael Goin <michael@neuralmagic.com>

* Update src/deepsparse/engine.py

Co-authored-by: Michael Goin <michael@neuralmagic.com>

Co-authored-by: Michael Goin <michael@neuralmagic.com>
---
 .../source/debugging-optimizing/numactl-utility.md |  2 +-
 src/deepsparse/engine.py                           | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/source/debugging-optimizing/numactl-utility.md b/docs/source/debugging-optimizing/numactl-utility.md
index 86bf11ef32..2c0e1a4b00 100644
--- a/docs/source/debugging-optimizing/numactl-utility.md
+++ b/docs/source/debugging-optimizing/numactl-utility.md
@@ -52,7 +52,7 @@ Given the architecture above, to run the DeepSparse Engine on the first four CPU
 
 Appending `--preferred 1` is needed here since the DeepSparse Engine is being bound to CPUs on the second socket.
 
-Note that using more than two sockets may not offer improvements over two sockets; if you have options, try different scenarios to see which setup is ideal for your use case. For batch size considerations, use an amount that is evenly divisible by the number of sockets you intend to use.
+Note: When running on multiple sockets using a batch size that is evenly divisible by the number of sockets will yield the best performance.
 
 
 ## DeepSparse Engine and Thread Pinning
diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py
index 1335299fb7..ff146bd13f 100644
--- a/src/deepsparse/engine.py
+++ b/src/deepsparse/engine.py
@@ -122,9 +122,6 @@ class Engine(object):
     Note 1: Engines are compiled for a specific batch size and
     for a specific number of CPU cores.
 
-    Note 2: multi socket support is not yet built in to the Engine,
-    all execution assumes single socket
-
     | Example:
     |    # create an engine for batch size 1 on all available cores
     |    engine = Engine("path/to/onnx", batch_size=1, num_cores=None)
@@ -224,8 +221,7 @@ def num_cores(self) -> int:
     @property
     def num_sockets(self) -> int:
         """
-        :return: The number of sockets the engine is compiled to run on;
-            only current support is 1
+        :return: The number of sockets the engine is compiled to run on
         """
         return self._num_sockets
 
@@ -501,8 +497,8 @@ def compile_model(
     """
     Convenience function to compile a model in the DeepSparse Engine
     from an ONNX file for inference.
-    Gives defaults of batch_size == 1 and num_cores == None
-    (will use all physical cores available on a single socket).
+    Gives defaults of batch_size == 1, num_cores == None and num_sockets = None
+    (will use all physical cores available on all available sockets).
 
     :param model: Either a path to the model's onnx file, a SparseZoo model stub
         prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
@@ -537,6 +533,8 @@ def benchmark_model(
     Gives defaults of batch_size == 1 and num_cores == None
     (will use all physical cores available on a single socket).
 
+    Note 1: Benchmarking is currently only supported on a single socket.
+
     :param model: Either a path to the model's onnx file, a SparseZoo model stub
         prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
         object that defines the neural network
@@ -592,6 +590,8 @@ def analyze_model(
     Gives defaults of batch_size == 1 and num_cores == None
     (will use all physical cores available on a single socket).
 
+    Note 1: Analysis is currently only supported on a single socket.
+
     :param model: Either a path to the model's onnx file, a SparseZoo model stub
         prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
         object that defines the neural network graph definition to analyze