Working, needs refactor + doc string

Mellanox · Nov 11, 2024 · 2bd0a85 · 2bd0a85
1 parent f5c1d79
commit 2bd0a85
Show file tree

Hide file tree

Showing 3 changed files with 111 additions and 28 deletions.
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py
@@ -333,7 +333,7 @@ def create_analyzer(parsed_args, full_extracted_logs_list,
         ibdianget_2_ports_primary_analyzer = partial_create_analyzer(log_name="ufm_logs_ibdiagnet2_port_counters.log",
                                                                      analyzer_clc=Ibdiagnet2PortCountersAnalyzer)
 
-        ibdianget_2_ports_secondary_analyzer = partial_create_analyzer(log_name="ufm_logs_ibdiagnet2_port_counters.log",
+        ibdianget_2_ports_secondary_analyzer = partial_create_analyzer(log_name="secondary_telemetry_ibdiagnet2_port_counters.log",
                                                                      analyzer_clc=Ibdiagnet2PortCountersAnalyzer)
         second_telemetry_samples = get_files_in_dest_by_type(args.destination,
                                                                  "secondary_",
@@ -387,6 +387,27 @@ def create_analyzer(parsed_args, full_extracted_logs_list,
         text_to_show_in_pdf += os.linesep + os.linesep + "More than 5 events burst over a minute:" \
             + os.linesep + critical_events_text
 
+        # Adding telemetry stats to the PDF
+        for cur_telemetry in [ibdianget_2_ports_primary_analyzer, ibdianget_2_ports_secondary_analyzer]:
+            txt = f"{cur_telemetry.telemetry_type} info: {os.linesep}"
+            txt += f"Found the following collectx version(s):{os.linesep}"
+            for collectx_version in cur_telemetry.get_collectx_versions():
+                txt += f"{collectx_version}, "
+            txt += os.linesep
+            txt += f"Found {cur_telemetry.get_number_of_core_dumps()} core dumps{os.linesep}"
+            txt += str(cur_telemetry.get_number_of_switches_and_ports())
+            iteration_stats = cur_telemetry.get_last_iterations_time_stats()
+            if iteration_stats is None:
+                cur_telemetry.analyze_iteration_time()
+                iteration_stats = cur_telemetry.get_last_iterations_time_stats()
+            txt += f"Iteration time stats:{os.linesep}"
+            txt += str(iteration_stats)
+            text_to_show_in_pdf += os.linesep + os.linesep + txt
+
+            #Also print the stats to the screen
+            print(f"stats for {cur_telemetry.telemetry_type}:")
+            print(cur_telemetry.get_last_iterations_time_stats())
+            print(cur_telemetry.get_number_of_switches_and_ports())
         # PDF creator gets all the images and to add to the report
         pdf = PDFCreator(pdf_path, pdf_header, png_images, text_to_show_in_pdf)
         pdf.created_pdf()

diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/base_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/base_analyzer.py
@@ -47,7 +47,7 @@ def __init__(self, dest_image_path):
         self._funcs_for_analysis = set()
 
     def _save_data_based_on_timestamp(
-        self, data_to_plot, x_label, y_label, title
+        self, data_to_plot, x_label, y_label, title, large_sample=False
     ):
         with plt.ion():
             log.LOGGER.debug(f"saving {title}")
@@ -61,7 +61,10 @@ def _save_data_based_on_timestamp(
             # Set the locator to show ticks every hour and the formatter to
             # include both date and time
             ax = plt.gca()
-            ax.xaxis.set_major_locator(mdates.HourLocator())
+            if large_sample:
+                ax.xaxis.set_major_locator(mdates.HourLocator(interval=24))  # Show labels every hour
+            else:
+                ax.xaxis.set_major_locator(mdates.HourLocator())
             ax.xaxis.set_minor_locator(
                 mdates.MinuteLocator(interval=15)
             )  # Add minor ticks every 15 minutes

diff --git a/...ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet2_port_counters_analyzer.py b/...ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet2_port_counters_analyzer.py
@@ -11,52 +11,50 @@
 #
 
 from typing import List
-
+import warnings
 import pandas as pd
 from loganalyze.log_analyzers.base_analyzer import BaseAnalyzer
-import loganalyze.logger as log
+from datetime import timedelta
+
 
 class Ibdiagnet2PortCountersAnalyzer(BaseAnalyzer):
     def __init__(self, logs_csvs: List[str], hours: int, dest_image_path: str, sort_timestamp=False):
         super().__init__(logs_csvs, hours, dest_image_path, sort_timestamp)
-
-        # This will make all the extra colum are int
-        # Convert the 'extra' columns to integers if possible
+        self._iteration_time_data = None
+        self._iteration_time_stats = None
+        # This will make sure all the extra columns are int
         extra_columns = ['extra1', 'extra2', 'extra3', 'extra4', 'extra5']
-
         for col in extra_columns:
             self._log_data_sorted[col] = pd.to_numeric(
             self._log_data_sorted[col],
             errors='coerce'
-            ).astype('Int64') 
-
-        # self._log_data_sorted['extra'] = (
-        # self._log_data_sorted['extra']
-        # .fillna(0)  # Replace NaN with 0
-        # .astype(int)  # Convert to integer
-        # )
+            ).astype('Int64')
+        self._funcs_for_analysis = {self.plot_iteration_time_over_time}
+        # Based on the log path, decided if this is primary or secondary
+        if "ufm_logs" in logs_csvs[0]:
+            self.telemetry_type = "primary"
+        elif "secondary_telemetry" in logs_csvs[0]:
+            self.telemetry_type = "secondary"
+        else:
+            self.telemetry_type = "Unknown_telemetry_type"
 
     def get_collectx_versions(self):
             unique_collectx_versions = self._log_data_sorted[self._log_data_sorted['type'] == 'collectx_version']['data'].unique()
             return unique_collectx_versions
 
-
     def get_number_of_switches_and_ports(self):
         """
         Generate summary statistics for 'total_devices_ports' data.
-        This function calculates the average, maximum, minimum, and non-zero counts
+        This function calculates the average, maximum, minimum
         for switches, CAs, routers, and ports.
         """
-        # Step 1: Filter data for 'total_devices_ports'
         filtered_data = self._log_data_sorted[self._log_data_sorted['type'] == 'total_devices_ports']
 
-        # Step 2: Create a combined column for 'extra1', 'extra3', and 'extra5'
-        combined_columns = ['extra1', 'extra3', 'extra5']
+        ports_numbers_columns = ['extra1', 'extra3', 'extra5']
         filtered_data['extra135'] = pd.to_numeric(
-            filtered_data[combined_columns].stack(), errors='coerce'
+            filtered_data[ports_numbers_columns].stack(), errors='coerce'
         ).groupby(level=0).sum(min_count=1)
 
-        # Define columns of interest and their mapping to meaningful names
         columns_of_interest = ['data', 'extra2', 'extra4', 'extra135']
         column_mapping = {
             'data': 'Number of Switches',
@@ -65,16 +63,13 @@ def get_number_of_switches_and_ports(self):
             'extra135': 'Ports'
         }
 
-        # Step 3: Initialize a list to store the summary statistics
         summary_stats = []
 
-        # Step 4: Calculate statistics for each column
         for col in columns_of_interest:
             numeric_col = pd.to_numeric(filtered_data[col], errors='coerce')
             non_zero_col = numeric_col[numeric_col != 0]
 
-            # Determine stats, defaulting to 0 if the column has no non-zero values
-            avg = int(round(non_zero_col.mean())) if not non_zero_col.empty else 0
+            avg = round(non_zero_col.mean()) if not non_zero_col.empty else 0
             max_val = int(non_zero_col.max()) if not non_zero_col.empty else 0
             min_val = int(non_zero_col.min()) if not non_zero_col.empty else 0
             count = int(non_zero_col.count())
@@ -87,7 +82,71 @@ def get_number_of_switches_and_ports(self):
                 'Total Rows (Non-Zero)': count
             })
 
-        # Step 5: Convert the summary stats list into a DataFrame
         summary_df = pd.DataFrame(summary_stats)
 
         return summary_df
+
+    def analyze_iteration_time(self, threshold=0.15):
+        """
+        Analyze rows where 'type' is 'iteration_time'.
+        Keep only 'type', 'timestamp', and 'data' columns.
+        Calculate statistics for the 'data' column, including timestamps for max and min.
+        Also, find gaps of at least 2 minutes with no data and allow filtering by a threshold.
+        
+        Parameters:
+        - threshold (float): Minimum value to consider for analysis. Default is 0.5 seconds.
+        """
+        filtered_data = self._log_data_sorted[self._log_data_sorted['type'] == 'iteration_time']
+        filtered_data = filtered_data[['type', 'timestamp', 'data']]
+        filtered_data['data'] = pd.to_numeric(filtered_data['data'], errors='coerce')
+
+        filtered_data = filtered_data[filtered_data['data'] >= threshold]
+        filtered_data['timestamp'] = pd.to_datetime(filtered_data['timestamp'], errors='coerce')
+        filtered_data = filtered_data.dropna(subset=['timestamp'])
+
+        if not filtered_data['data'].empty:
+            average = filtered_data['data'].mean()
+            max_value = filtered_data['data'].max()
+            min_value = filtered_data['data'].min()
+
+            max_timestamp = filtered_data.loc[filtered_data['data'] == max_value, 'timestamp'].iloc[0]
+            min_timestamp = filtered_data.loc[filtered_data['data'] == min_value, 'timestamp'].iloc[0]
+        else:
+            average = max_value = min_value = 0.0
+            max_timestamp = min_timestamp = None
+
+        stats = {
+            'Average': average,
+            'Maximum': max_value,
+            'Max Timestamp': max_timestamp,
+            'Minimum': min_value,
+            'Min Timestamp': min_timestamp,
+            'Total Rows': filtered_data['data'].count()
+        }
+        stats_df = pd.DataFrame([stats])
+        self._iteration_time_data = filtered_data
+        self._iteration_time_stats = stats_df
+        return stats_df 
+
+    def get_last_iterations_time_stats(self):
+        return self._iteration_time_stats
+
+    def plot_iteration_time_over_time(self):
+        if not self._iteration_time_data:
+            self.analyze_iteration_time()
+
+            self._iteration_time_data.set_index('timestamp', inplace=True)
+
+        # Plot the data using the existing method
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", ".*Locator attempting to generate.*")
+                self._save_data_based_on_timestamp(
+                    data_to_plot=self._iteration_time_data['data'],
+                    x_label='Timestamp',
+                    y_label='Iteration Time (s)',
+                    title=f'{self.telemetry_type} Iteration Time',
+                    large_sample=True)
+
+    def get_number_of_core_dumps(self):
+        core_dumps = self._log_data_sorted[self._log_data_sorted['type'] == 'timeout_dump_core']
+        return len(core_dumps)