Skip to content

Commit

Permalink
publish from 8594a983 (gopub)
Browse files Browse the repository at this point in the history
  • Loading branch information
sysxpum committed Mar 6, 2024
1 parent 1db7ace commit fdcb817
Show file tree
Hide file tree
Showing 33 changed files with 464 additions and 210 deletions.
12 changes: 12 additions & 0 deletions SECURITY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Security Policy

## Report a Vulnerability

Please report security issues or vulnerabilities to the [Intel Security Center].

For more information on how Intel works to resolve security issues, see
[Vulnerability Handling Guidelines].

[Intel Security Center]:https://www.intel.com/security

[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.30
1.2.31
140 changes: 134 additions & 6 deletions cli/src/comlet_dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "xpum_structs.h"
#include "utility.h"
#include "exit_code.h"
#include <chrono>

using xpum::dump::engineNameMap;

Expand Down Expand Up @@ -255,6 +256,51 @@ void ComletDump::setupOptions() {
auto dumpTimesOpt = addOption("-n", this->opts->dumpTimes, "Number of the device statistics dump to screen. The dump will never be ended if this parameter is not specified.\n");
dumpTimesOpt->check(CLI::Range(1, std::numeric_limits<int>::max()));

#ifdef DAEMONLESS
std::string msTimeHelp = "";
auto dumpFileFlag = addOption("--file", this->opts->dumpFilePath, "Dump the raw statistics to the file.");
auto msTimeIntervalOpt = addOption("--ims", this->opts->msTimeInterval, msTimeHelp +
"The interval (in milliseconds) to dump the device statistics to file for high-frequency monitorming.\n" +
"The recommended metrics types for high-frequency sampling: GPU power, GPU frequency, GPU utilization,\n"+
"GPU temperature, GPU memory read/write/bandwidth, GPU PCIe read/write, GPU engine utilizations, Xe Link throughput.");
msTimeIntervalOpt->check(
[](const std::string &str) {
std::string errStr = "Value should be integer larger than or equal to 10 and less than or equal 1000";
if (!isNumber(str))
return errStr;
int value;
try {
value = std::stoi(str);
} catch (const std::out_of_range &oor) {
return errStr;
}
if (value < 10 || value > 1000)
return errStr;
return std::string();
});
msTimeIntervalOpt->excludes(timeIntervalOpt);
msTimeIntervalOpt->excludes(dumpTimesOpt);
msTimeIntervalOpt->needs(dumpFileFlag);

auto dumpTotalTimeFlag = addOption("--time", this->opts->dumpTotalTime, "Dump total time in seconds.");
dumpTotalTimeFlag->check(
[](const std::string &str) {
std::string errStr = "Value should be integer larger than or equal to 0 and less than or equal 100000000";
if (!isNumber(str))
return errStr;
int value;
try {
value = std::stoi(str);
} catch (const std::out_of_range &oor) {
return errStr;
}
if (value < 0 || value > 100000000)
return errStr;
return std::string();
});
dumpTotalTimeFlag->needs(msTimeIntervalOpt);
#endif

#ifndef DAEMONLESS
auto dumpRawDataFlag = addFlag("--rawdata", this->opts->rawData, "Dump the required raw statistics to a file in background.");
auto startDumpFlag = addFlag("--start", this->opts->startDumpTask, "Start a new background task to dump the raw statistics to a file. The task ID and the generated file path are returned.");
Expand Down Expand Up @@ -294,6 +340,28 @@ std::unique_ptr<nlohmann::json> ComletDump::run() {
return json;
}

// In this case the ims is set
if(this->opts->msTimeInterval != 0){
int64_t interval = this->opts->msTimeInterval / 2;
// monitor freq set is {5, 10, 20, 50, 100, 200, 500, 1000}
if (interval >= 500){
interval = 500;
} else if (interval >= 200) {
interval = 200;
} else if (interval >= 100) {
interval = 100;
} else if (interval >= 50) {
interval = 50;
} else if (interval >= 20) {
interval = 20;
} else if (interval >= 10) {
interval = 10;
} else {
interval = 5;
}
this->coreStub->setAgentConfig("XPUM_AGENT_CONFIG_SAMPLE_INTERVAL", &interval);
}

if (this->opts->rawData) {
if (this->opts->startDumpTask && !this->opts->deviceIds.empty()) {
if (this->opts->deviceIds.size() > 1) {
Expand Down Expand Up @@ -445,12 +513,41 @@ void ComletDump::dumpRawDataToFile(std::ostream &out) {
}
}

void ComletDump::waitForEsc() {
int key;
std::cout << "Dump data to file " << this->opts->dumpFilePath << ". Press the key ESC to stop dumping." << std::endl;
while (true) {
key = getChar();
if (key == -1) {
std::cerr << "Something wrong in getChar." << std::endl;
keepDumping = false;
break;
}
if (key == 27) {
keepDumping = false;
break;
}
}
}

void ComletDump::getTableResult(std::ostream &out) {
if (this->opts->rawData) {
dumpRawDataToFile(out);
} else {
#ifdef DAEMONLESS
if (gpu_id_to_bdfs.size() > 0 && this->opts->metricsIdList.size() == 1 && this->opts->metricsIdList[0] == XPUM_DUMP_POWER) {
keepDumping = true;
if (!this->opts->dumpFilePath.empty()){
dumpFile.open(this->opts->dumpFilePath);
if (!dumpFile) {
std::cout << "Error: "
<< "open file failed" << std::endl;
return;
}
std::thread([this] { this->waitForEsc(); }).detach();
printByLine(dumpFile);
dumpFile.close();
std::cout << "Dumping is stopped." << std::endl;
} else if (gpu_id_to_bdfs.size() > 0 && this->opts->metricsIdList.size() == 1 && this->opts->metricsIdList[0] == XPUM_DUMP_POWER) {
printByLineWithoutInitializeCore(out);
} else {
printByLine(out);
Expand Down Expand Up @@ -837,7 +934,14 @@ void ComletDump::printByLine(std::ostream &out) {
// timestamp column
columnSchemaList.push_back({"Timestamp",
[]() {
return CoreStub::isotimestamp(time(nullptr) * 1000, true);
long ms; // Milliseconds
time_t s; // Seconds
struct timespec spec;
clock_gettime(CLOCK_REALTIME, &spec);

s = spec.tv_sec;
ms = spec.tv_nsec / 1000000; // Convert nanoseconds to milliseconds
return CoreStub::isotimestamp(s * 1000 + ms, true);
}});

// device id column
Expand Down Expand Up @@ -1093,9 +1197,31 @@ void ComletDump::printByLine(std::ostream &out) {
out << std::endl;

int iter = 0;
u_int64_t index = 0;
uint64_t sleepMilliseconds = (this->opts->msTimeInterval == 0) ? (1000 * this->opts->timeInterval) : this->opts->msTimeInterval;

while (true) {
std::this_thread::sleep_for(std::chrono::milliseconds(this->opts->timeInterval * 1000));
std::chrono::system_clock::time_point begin = std::chrono::system_clock::now();
while (keepDumping) {
if ((this->opts->dumpTotalTime != -1 && (uint64_t)(this->opts->dumpTotalTime) * 1000 <= sleepMilliseconds * index)) {
keepDumping = false;
break;
}

++index;

// for big interval
if (sleepMilliseconds > 1000){
auto leftTime = sleepMilliseconds;
while (leftTime > 1000 && keepDumping){
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
leftTime -= 1000;
}
if(!keepDumping){
break;
}
}

std::this_thread::sleep_until(begin + std::chrono::milliseconds(sleepMilliseconds * index));
res = run();
if (res->contains("error")) {
out << "Error: " << (*res)["error"].get<std::string>() << std::endl;
Expand Down Expand Up @@ -1147,9 +1273,11 @@ void ComletDump::printByLine(std::ostream &out) {
out << std::endl;
}
}
if (this->opts->dumpTimes != -1 && ++iter >= this->opts->dumpTimes) {
break;
if ((this->opts->dumpTimes != -1 && ++iter >= this->opts->dumpTimes)) {
keepDumping = false;
}

}
}

} // end namespace xpum::cli
9 changes: 9 additions & 0 deletions cli/src/comlet_dump.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#pragma once

#include <atomic>
#include <map>
#include <memory>
#include <nlohmann/json.hpp>
Expand All @@ -27,6 +28,9 @@ struct ComletDumpOptions {
std::vector<std::string> deviceTileIds = {"-1"};
std::vector<int> metricsIdList;
uint32_t timeInterval = 1;
uint64_t msTimeInterval = 0;
std::string dumpFilePath;
int64_t dumpTotalTime = -1;
int dumpTimes = -1;
// for dump raw data to file
bool rawData;
Expand All @@ -47,6 +51,9 @@ class ComletDump : public ComletBase {
std::string curDeviceId;
std::string curTileId;

std::atomic<bool> keepDumping;
std::ofstream dumpFile;

std::string metricsHelpStr = "Metrics type to collect raw data, options. Separated by the comma.\n";
std::set<std::string> sumMetricsList{"XPUM_STATS_MEMORY_READ", "XPUM_STATS_MEMORY_WRITE", "XPUM_STATS_MEMORY_READ_THROUGHPUT", "XPUM_STATS_MEMORY_WRITE_THROUGHPUT", "XPUM_STATS_MEMORY_USED", "XPUM_STATS_PCIE_READ_THROUGHPUT", "XPUM_STATS_PCIE_WRITE_THROUGHPUT", "XPUM_STATS_RAS_ERROR_CAT_RESET", "XPUM_STATS_RAS_ERROR_CAT_PROGRAMMING_ERRORS", "XPUM_STATS_RAS_ERROR_CAT_DRIVER_ERRORS", "XPUM_STATS_RAS_ERROR_CAT_CACHE_ERRORS_CORRECTABLE", "XPUM_STATS_RAS_ERROR_CAT_CACHE_ERRORS_UNCORRECTABLE", "XPUM_STATS_RAS_ERROR_CAT_DISPLAY_ERRORS_CORRECTABLE", "XPUM_STATS_RAS_ERROR_CAT_DISPLAY_ERRORS_UNCORRECTABLE", "XPUM_STATS_RAS_ERROR_CAT_NON_COMPUTE_ERRORS_CORRECTABLE", "XPUM_STATS_RAS_ERROR_CAT_NON_COMPUTE_ERRORS_UNCORRECTABLE"};

Expand All @@ -70,6 +77,8 @@ class ComletDump : public ComletBase {

virtual void getTableResult(std::ostream &out) override;

void waitForEsc();

void printByLine(std::ostream &out);

void printByLineWithoutInitializeCore(std::ostream &out);
Expand Down
36 changes: 35 additions & 1 deletion cli/src/core_stub/agentset_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,43 @@ struct AgentConfigType {
static AgentConfigType agentConfigTypeList[]{
{XPUM_AGENT_CONFIG_SAMPLE_INTERVAL, "XPUM_AGENT_CONFIG_SAMPLE_INTERVAL", VALUE_TYPE_INT64, "sampling_interval"}};


static int agentConfigStrToKey(std::string keyStr) {
for (auto config : agentConfigTypeList) {
if (config.keyStr.compare(keyStr) == 0) {
return config.key;
}
}
return -1;
}
std::unique_ptr<nlohmann::json> LibCoreStub::setAgentConfig(std::string jsonName, void* pValue) {
auto json = std::unique_ptr<nlohmann::json>(new nlohmann::json());

auto key = agentConfigStrToKey(jsonName);
if(key == -1){
(*json)["error"] = "Config Name is not found";
}
auto res = xpumSetAgentConfig((xpum_agent_config_t)key, pValue);

switch (res)
{
case XPUM_LEVEL_ZERO_INITIALIZATION_ERROR:
(*json)["error"] = "Level Zero Initialization Error";
break;
case XPUM_NOT_INITIALIZED:
(*json)["error"] = "XPUM is not initializaed";
break;
case XPUM_RESULT_UNKNOWN_AGENT_CONFIG_KEY:
(*json)["error"] = "Unknow Agent Config Key";
break;
case XPUM_RESULT_AGENT_SET_INVALID_VALUE:
(*json)["error"] = "Invalid Agent Set Value";
break;

default:
(*json)["error"] = "Error";
break;
}

return json;
}

Expand Down
28 changes: 28 additions & 0 deletions cli/src/utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <stdarg.h>
#include <syslog.h>
#include <unistd.h>
#include <termios.h>

#include <cstdio>
#include <string>
Expand Down Expand Up @@ -176,4 +177,31 @@ std::string getKeyStringValue(std::string key, const nlohmann::json &item) {
return "";
}

int getChar() {
char ch = 0;
struct termios oldTermios = {0};
if (tcgetattr(0, &oldTermios) < 0){
perror("error in tcgetattr()");
return -1;
}
oldTermios.c_lflag &= ~ICANON;
oldTermios.c_lflag &= ~ECHO;
oldTermios.c_cc[VMIN] = 1;
oldTermios.c_cc[VTIME] = 0;
if (tcsetattr(0, TCSANOW, &oldTermios) < 0){
perror("error in tcsetattr()");
return -1;
}

if (read(0, &ch, 1) < 0){
perror ("error in read()");
}
oldTermios.c_lflag |= ICANON;
oldTermios.c_lflag |= ECHO;
if (tcsetattr(0, TCSADRAIN, &oldTermios) < 0){
perror ("error in tcsetattr()");
}
return (ch);
}

}// end namespace xpum::cli
2 changes: 2 additions & 0 deletions cli/src/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,6 @@ std::string getKeyNumberValue(std::string key, const nlohmann::json &item);

std::string getKeyStringValue(std::string key, const nlohmann::json &item);

int getChar();

} // end namespace xpum::cli
7 changes: 7 additions & 0 deletions core/resources/config/diagnostics.conf
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ SINGLE_PRECISION_MIN_GFLOPS = 8000 # GFLOPS
POWER_MIN_STRESS_WATT = 80 # W
MEMORY_BANDWIDTH_MIN_GBPS = 320 # GBPS

# ATS-M1C 512EU 16GB
NAME = Intel(R) Graphics [0x56c2]
PCIE_BANDWIDTH_MIN_GBPS = 11 # GBPS
SINGLE_PRECISION_MIN_GFLOPS = 6400 # GFLOPS
POWER_MIN_STRESS_WATT = 80 # W
MEMORY_BANDWIDTH_MIN_GBPS = 260 # GBPS

# PVC 2T 1024EU 128GB
NAME = Intel(R) Graphics [0x0bd4]
PCIE_BANDWIDTH_MIN_GBPS = 22 # GBPS
Expand Down
Loading

0 comments on commit fdcb817

Please sign in to comment.