diff --git a/.zenodo.json b/.zenodo.json index b6d79af4..3ff0915e 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -5,33 +5,24 @@ "affiliation": "CERN" }, { - "name": "Lukáš Malina", + "name": "Felix Simon Carlier", "affiliation": "CERN", - "orcid": "0000-0002-4673-6035" + "orcid": "0000-0002-7173-8833" }, { - "name": "Joschua Dilly", + "name": "Jaime Maria Coello De Portugal - Martinez Vazquez", "affiliation": "CERN", - "orcid": "0000-0001-7864-5448" + "orcid": "0000-0002-6899-3809" }, { - "name": "Michael Hofer", + "name": "Joschua Dilly", "affiliation": "CERN", - "orcid": "0000-0001-6173-0232" - }, - { - "name": "Felix Soubelet", - "affiliation": "University of Liverpool & CERN", - "orcid": "0000-0001-8012-1440" - }, - { - "name": "Andreas Wegscheider", - "affiliation": "CERN" + "orcid": "0000-0001-7864-5448" }, { - "name": "Jaime Maria Coello De Portugal - Martinez Vazquez", + "name": "Hector Garcia Morales", "affiliation": "CERN", - "orcid": "0000-0002-6899-3809" + "orcid": "0000-0001-5100-8975" }, { "name": "Maël Le Garrec", @@ -39,23 +30,41 @@ "orcid": "0000-0002-8146-2340" }, { - "name": "Tobias Persson", + "name": "Joshua Mark Gray", "affiliation": "CERN" }, + { + "name": "Michael Hofer", + "affiliation": "CERN", + "orcid": "0000-0001-6173-0232" + }, { "name": "Jacqueline Keintzel", "affiliation": "CERN", "orcid": "0000-0003-1396-8478" }, { - "name": "Hector Garcia Morales", + "name": "Lukáš Malina", "affiliation": "CERN", - "orcid": "0000-0001-5100-8975" + "orcid": "0000-0002-4673-6035" + }, + { + "name": "Tobias Persson", + "affiliation": "CERN" + }, + { + "name": "Felix Soubelet", + "affiliation": "University of Liverpool & CERN", + "orcid": "0000-0001-8012-1440" }, { "name": "Rogelio Tomas Garcia", "affiliation": "CERN", "orcid": "0000-0002-9857-1703" + }, + { + "name": "Andreas Wegscheider", + "affiliation": "CERN" } ], "title": "OMC3", diff --git a/CHANGELOG.md b/CHANGELOG.md index 61622d2e..d2d001fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # OMC3 Changelog +#### 2024-11-14 - v0.20.0 - _jdilly_, _awegsche_ + +- Added: + - `bad_bpms_summary` script: Collect and summarize the bad BPMs from GUI runs. + #### 2024-11-13 - v0.19.0 - _fscarlier_, _jdilly_ - Added K-Modulation tools: diff --git a/README.md b/README.md index 14d25df5..59d2a474 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ Other general utility scripts are in [`/omc3/scripts`](omc3/scripts): - `kmod_average.py` to calculate the average of multiple K-modulation measurements. - `kmod_import.py` to import a K-modulation measurement into an optics-measurement directory. - `kmod_lumi_imbalace.py` to calculate the luminosity imbalance between two IPs from averaged K-modulation files. +- `bad_bpms_summary.py` to collect and summarize the bad BPMs from GUI runs. Example use for these scripts can be found in the [`tests`](tests) files. Documentation including relevant flags and parameters can be found at . diff --git a/doc/entrypoints/scripts.rst b/doc/entrypoints/scripts.rst index 26bfe996..597c9c3a 100644 --- a/doc/entrypoints/scripts.rst +++ b/doc/entrypoints/scripts.rst @@ -40,3 +40,7 @@ Scripts :members: :noindex: + +.. automodule:: omc3.scripts.bad_bpms_summary + :members: + :noindex: diff --git a/omc3/__init__.py b/omc3/__init__.py index 963141e4..292c8d94 100644 --- a/omc3/__init__.py +++ b/omc3/__init__.py @@ -11,7 +11,7 @@ __title__ = "omc3" __description__ = "An accelerator physics tools package for the OMC team at CERN." __url__ = "https://github.com/pylhc/omc3" -__version__ = "0.19.0" +__version__ = "0.20.0" __author__ = "pylhc" __author_email__ = "pylhc@github.com" __license__ = "MIT" diff --git a/omc3/optics_measurements/iforest.py b/omc3/optics_measurements/iforest.py index 44dd5cc9..4d3a0ab9 100644 --- a/omc3/optics_measurements/iforest.py +++ b/omc3/optics_measurements/iforest.py @@ -5,9 +5,11 @@ This module contains the isolation forest functionality of ``optics_measurements``. It provides functions to detect and exclude BPMs with anomalies. """ +from pathlib import Path import numpy as np import pandas as pd from sklearn.ensemble import IsolationForest +import tfs from omc3.definitions.constants import PLANE_TO_NUM from omc3.utils import logging_tools @@ -21,8 +23,8 @@ def clean_with_isolation_forest(input_files, meas_input, plane): bad_bpms = identify_bad_bpms(meas_input, input_files, plane) input_files = remove_bad_bpms(input_files, list(set(bad_bpms.NAME)), plane) LOGGER.info(str(list(set(bad_bpms.NAME)))) - # TODO potentially write output files ... currently not unique indices! - # tfs.write(os.path.join(meas_input.outputdir, f"bad_bpms_iforest_{plane.lower()}.tfs"), bad_bpms) + if meas_input.outputdir is not None: + tfs.write(Path(meas_input.outputdir)/ f"bad_bpms_iforest_{plane.lower()}.tfs", bad_bpms) return input_files diff --git a/omc3/scripts/bad_bpms_summary.py b/omc3/scripts/bad_bpms_summary.py new file mode 100644 index 00000000..aac18980 --- /dev/null +++ b/omc3/scripts/bad_bpms_summary.py @@ -0,0 +1,414 @@ +""" +Bad BPMs Summary +---------------- + +Scans all measurements in a list of given GUI output folders and compiles a list of bad BPMs with +their given number of appearances after 'harpy' and 'isolation forest'. + + + +.. admonition:: Usage + + Get bad BPMs for LHC-Beam 1 from September 2024 and 2024-10-03 + + .. code-block:: none + + python -m omc3.scripts.bad_bpms_summary --dates 2024-09-* 2024-10-03 --accel_glob LHCB1 --outfile bad_bpms_sep_2024.txt --print_percentage 50 + + + +*--Required--* + +- **dates** *(str)*: + + Dates to include in analysis. This should be either subfolders in + `root` or glob-patterns for those. + + +*--Optional--* + +- **accel_glob** *(str)*: + + Accelerator name (glob for the sub-directories). + + default: ``LHCB*`` + + +- **outfile** *(PathOrStr)*: + + Path to the file to write out. + + +- **print_percentage** *(float)*: + + Print out BPMs that appear in more than this percentage of + measurements. + + +- **root** *(PathOrStr)*: + + Path to the root directory, containing the dates. + + default: ``/user/slops/data/LHC_DATA/OP_DATA/Betabeat`` + + +""" +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd +import tfs +from generic_parser import EntryPointParameters, entrypoint + +from omc3.utils import logging_tools +from omc3.utils.iotools import PathOrStr, OptionalFloat + +if TYPE_CHECKING: + from collections.abc import Sequence + from generic_parser import DotDict + +LOG = logging_tools.get_logger(__name__) + +# Constants --- +ROOT = Path("/user/slops/data/LHC_DATA/OP_DATA/Betabeat") +IFOREST = "IFOREST" +HARPY = "HARPY" + +# Columns --- +NAME = "NAME" +ACCEL = "ACCELERATOR" +PLANE = "PLANE" +SOURCE = "SOURCE" +REASON = "REASON" +COUNT = "COUNT" +FILE = "FILE" +FILE_COUNT = "FILE_COUNT" +PERCENTAGE = "PERCENTAGE" + +# Files --- +MEASUREMENTS_DIR = "Measurements" +RESULTS_DIR = "Results" +BAD_BPMS_HARPY = "*.bad_bpms_*" +BAD_BPMS_IFOREST = "bad_bpms_iforest_*.tfs" + + +def get_params(): + params = EntryPointParameters() + params.add_parameter( + name="dates", + type=str, + nargs="+", + required=True, + help=("Dates to include in analysis. " + "This should be either subfolders in `root` or glob-patterns for those." + ) + ) + params.add_parameter( + name="root", + type=PathOrStr, + default=ROOT, + help="Path to the root directory, containing the dates." + ) + params.add_parameter( + name="outfile", + type=PathOrStr, + help="Path to the file to write out." + ) + params.add_parameter( + name="print_percentage", + type=OptionalFloat, + help="Print out BPMs that appear in more than this percentage of measurements." + ) + params.add_parameter( + name="accel_glob", + type=str, + default="LHCB*", + help="Accelerator name (glob for the sub-directories)." + ) + return params + + +@entrypoint(get_params(), strict=True) +def bad_bpms_summary(opt: DotDict) -> tfs.TfsDataFrame: + outfile = None + if opt.outfile is not None: + outfile = Path(opt.outfile) + outfile.parent.mkdir(parents=True, exist_ok=True) + + df_collection = collect_bad_bpms(Path(opt.root), opt.dates, opt.accel_glob) + if outfile is not None: + tfs.write(outfile.with_stem(f"{outfile.stem}_collected"), df_collection) + + df_evaluated = evaluate(df_collection) + if outfile is not None: + tfs.write(outfile, df_evaluated) + + if opt.print_percentage is not None: + print_results(df_evaluated, opt.print_percentage) + + return df_evaluated + + +# Collection of Data --- + +def get_empty_df() -> tfs.TfsDataFrame: + """ Create an empty TfsDataFrame with the correct column names. """ + return tfs.TfsDataFrame(columns=[NAME, ACCEL, PLANE, SOURCE, FILE]) + + +def collect_bad_bpms(root: Path, dates: Sequence[Path | str], accel_glob: str) -> tfs.TfsDataFrame: + """ Create a TfsDataFrame with all bad-bpms within selected dates. + + Args: + root (Path): Root path to the GUI output folder. + dates (Sequence[Path | str]): List of dates or glob patterns to collect bad-bpms from. + accel_glob (str): Accelerator name (glob for the sub-directories). + + Returns: + tfs.TfsDataFrame: TfsDataFrame with all bad-bpms within selected dates. + + """ + dfs = [] + + def collect_and_append(date_dir: Path): + """ Helper to collect for date_dir and append to dfs if not None. """ + df_new = collect_date(date_dir, accel_glob) + if df_new is not None: + dfs.append(df_new) + + # Loop over dates --- + for date in dates: + date_dir = root / date + if date_dir.is_dir(): + collect_and_append(date_dir) + + else: + for date_dir in root.glob(date): + collect_and_append(date_dir) + + # Check and return --- + if not len(dfs): + LOG.warning("No bad-bpms found! Resulting TfsDataFrame will be empty.") + return get_empty_df() + + return tfs.concat(dfs, axis="index", ignore_index=True) + + +def collect_date(date_dir: Path, accel_glob: str) -> tfs.TfsDataFrame | None: + """ Collect bad-bpms for a single date, by checking the sub-directories + which conform to the `accel_glob` pattern. + + In each accel directory, check for sub-directories named `Measurements` and `Results`, + which in turn contain which in turn have entries containing the bad-bpms files. + + Args: + date_dir (Path): Path to the date directory. + accel_glob (str): Accelerator name (glob for the sub-directories). + + Returns: + tfs.TfsDataFrame: TfsDataFrame with all bad-bpms for the date. + """ + dfs: list[tfs.TfsDataFrame] = [] + + for accel_dir in date_dir.glob(accel_glob): + for subdir_name in (MEASUREMENTS_DIR, RESULTS_DIR): + analysis_stage_dir = accel_dir / subdir_name + if not analysis_stage_dir.is_dir(): + continue + + for data_dir in analysis_stage_dir.iterdir(): + if not data_dir.is_dir(): + continue + + df_collected = collect_bad_bpm_files_in_dir(data_dir) + if df_collected is not None: + df_collected.loc[:, ACCEL] = accel_dir.name + dfs.append(df_collected) + + if not len(dfs): + return None + + return tfs.concat(dfs, axis="index", ignore_index=True) + + +def collect_bad_bpm_files_in_dir(directory: Path) -> tfs.TfsDataFrame | None: + """ Collect bad-bpms for a single measurement directory. + + Args: + directory (Path): Path to the directory possibly containing bad-bpm files of type `file_types`. + + Returns: + tfs.TfsDataFrame: TfsDataFrame with all bad-bpms from the given directory. + + """ + readers_map = { + BAD_BPMS_HARPY: read_harpy_bad_bpms_file, + BAD_BPMS_IFOREST: read_iforest_bad_bpms_file + } + + dfs: list[tfs.TfsDataFrame] = [] + + for glob_pattern, reader in readers_map.items(): + for bad_bpms_file in directory.glob(glob_pattern): + new_df = reader(bad_bpms_file) + if new_df is not None: + dfs.append(new_df) + + if not len(dfs): + return None + + return tfs.concat(dfs, axis="index", ignore_index=True) + + +# File Readers -- + +def read_harpy_bad_bpms_file(svd_file: Path) -> tfs.TfsDataFrame: + """ Reads a harpy bad-bpm file and returns a TfsDataFrame with all unique bad-bpms. + + Args: + svd_file (Path): Path to the bad-bpm file. + + Returns: + tfs.TfsDataFrame: TfsDataFrame with all unique bad-bpms. + + """ + TO_IGNORE = ("not found in model",) + TO_MARK = ("known bad bpm",) + COMMENT = "#" + + plane = svd_file.name[-1] + + # Read and parse file + lines = svd_file.read_text().splitlines() + lines = [line.strip().split(maxsplit=1) for line in lines] + lines = [(line[0].strip(), line[1].lower().strip()) for line in lines] + + lines = [line for line in lines if not line[0].startswith(COMMENT) and line[1] not in TO_IGNORE] + bpms = set(f"[{line[0]}]" if line[1] in TO_MARK else line[0] for line in lines) + + # Create DataFrame + df = get_empty_df() + df.loc[:, NAME] = list(bpms) + df.loc[:, PLANE] = plane.upper() + df.loc[:, SOURCE] = HARPY + df.loc[:, FILE] = str(svd_file) + return df + + +def read_iforest_bad_bpms_file(iforest_file: Path) -> tfs.TfsDataFrame: + """ Reads an iforest bad-bpm file and returns a TfsDataFrame with all unique bad-bpms. + + Args: + iforest_file (Path): Path to the bad-bpm file. + + Returns: + tfs.TfsDataFrame: TfsDataFrame with all unique bad-bpms. + + """ + df_iforest = tfs.read(iforest_file) + plane = iforest_file.stem[-1] + + df = get_empty_df() + df.loc[:, NAME] = list(set(df_iforest[NAME])) # hint: be sure to ignore index + df.loc[:, PLANE] = plane.upper() + df.loc[:, SOURCE] = IFOREST + df.loc[:, FILE] = str(iforest_file) + return df + + +# Evaluaion ---- + + +def evaluate(df: tfs.TfsDataFrame) -> tfs.TfsDataFrame: + """ Evaluates the gathered bad-bpms and returns a TfsDataFrame with the results. + + The evaluation is based on the following criteria: + - Count how often a BPM is bad + - Count the total number of (unique) files for each combination of accelerator, source and plane + + From this information the percentage of how often a BPM is deemed bad is calculated. + + Args: + df (tfs.TfsDataFrame): TfsDataFrame with all bad-bpms. + + Returns: + tfs.TfsDataFrame: TfsDataFrame with the evaluated results. + """ + # Count how often a BPM is bad + df_counted = df.groupby([NAME, ACCEL, SOURCE, PLANE]).size().reset_index(name=COUNT) + + # Count the total number of (unique) files for each combination of accelerator, source and plane + file_count = df.groupby([ACCEL, SOURCE, PLANE])[FILE].nunique().reset_index(name=FILE_COUNT) + df_counted = df_counted.merge(file_count, how="left", on=[ACCEL, SOURCE, PLANE]) + + df_counted.loc[:, PERCENTAGE] = round( + (df_counted[COUNT] / df_counted[FILE_COUNT]) * 100, 2 + ) + + df_counted = tfs.TfsDataFrame(df_counted.sort_values(PERCENTAGE, ascending=False), headers=df.headers) + return df_counted + + +def print_results(df_counted: tfs.TfsDataFrame, print_percentage: float): + """ Log the results to console (INFO level if logger is setup, print otherwise). + + Args: + df_counted (tfs.TfsDataFrame): TfsDataFrame with the evaluated results. + print_percentage (float): Print out BPMs that appear in more than this percentage of measurements. + """ + percentage_mask = df_counted[PERCENTAGE] >= print_percentage + printer = print + if LOG.hasHandlers(): + printer = LOG.info + + planes = df_counted[PLANE].unique() + + printer("Bad BPMs Summary. Hint: '[BPM]' were filtered as known bad BPMs.") + for accel in sorted(df_counted[ACCEL].unique()): + accel_mask = df_counted[ACCEL] == accel + for source in sorted(df_counted[SOURCE].unique()): + source_mask = df_counted[SOURCE] == source + + df_filtered = df_counted.loc[source_mask & accel_mask, :] + if len(planes) == 2: + # Merge X and Y for nicer output --- + df_x = df_filtered.loc[df_filtered[PLANE] == "X", :].set_index(NAME) + df_y = df_filtered.loc[df_filtered[PLANE] == "Y", :].set_index(NAME) + + df_merged = pd.merge(df_x, df_y, how="outer", left_index=True, right_index=True, suffixes=("X", "Y")) + df_merged['max_pct'] = df_merged[[f"{PERCENTAGE}X", f"{PERCENTAGE}Y"]].max(axis=1) + df_merged = df_merged.sort_values(by='max_pct', ascending=False) + df_merged = df_merged.loc[df_merged['max_pct'] >= print_percentage, :] + + # Print Table --- + header = f"{'BPM':>20s} {'X':^18s} {'Y':^18s}\n" + msg = header + "\n".join( + f"{name:>20s} " + + " ".join( + ( + "{:^18s}".format("-") if np.isnan(row[f'{FILE_COUNT}{plane}']) else + f"{row[f'{PERCENTAGE}{plane}']:5.1f}% " + "{:<11s}".format(f"({int(row[f'{COUNT}{plane}']):d}/{int(row[f'{FILE_COUNT}{plane}']):d})") + for plane in ('X', 'Y') + ) + ) + for name, row in df_merged.iterrows() + ) + + else: + # Print a list --- + df_filtered = df_counted.loc[percentage_mask & source_mask & accel_mask, :] + msg = "\n".join( + f"{row[NAME]:>20s} {row[PLANE]}: {row[PERCENTAGE]:5.1f}% ({row[COUNT]}/{row[FILE_COUNT]})" + for _,row in df_filtered.iterrows() + ) + printer(f"Highest bad BPMs of {accel} from {source}:\n{msg}") + + +# Script Mode ------------------------------------------------------------------ + +if __name__ == "__main__": + bad_bpms_summary() diff --git a/tests/inputs/bad_bpms/LHCB1/Measurements/example/bad_bpms_iforest_x.tfs b/tests/inputs/bad_bpms/LHCB1/Measurements/example/bad_bpms_iforest_x.tfs new file mode 100644 index 00000000..9d7df2d1 --- /dev/null +++ b/tests/inputs/bad_bpms/LHCB1/Measurements/example/bad_bpms_iforest_x.tfs @@ -0,0 +1,9 @@ +* NAME FEATURE VALUE AVG SCORE +$ %s %s %le %le %le + BPM.20R5.B1 AMPX 0.2739793273915 0.1983870023371 -0.08321628370052 + BPM.22R5.B1 AMPX 0.1645414911027 0.1983870023371 -0.1192688174725 + BPM.34R7.B1 AMPX 0.1608806519601 0.1983870023371 -0.1121605330153 + BPM.27R8.B1 AMPX 0.1164325835062 0.1983870023371 -0.08355632185117 + BPMS.2L5.B1 NOISE_SCALED 0.0006165859078803 0.0009016042719894 -0.1390448828672 + BPMW.5L7.B1 AMPX 0.1645414911027 0.2062029479469 -0.2122963397363 + BPMSW.1L8.B1 NOISE_SCALED 0.001344342784781 0.0009016042719894 -0.1248972948542 \ No newline at end of file diff --git a/tests/inputs/bad_bpms/LHCB1/Measurements/example/bad_bpms_iforest_y.tfs b/tests/inputs/bad_bpms/LHCB1/Measurements/example/bad_bpms_iforest_y.tfs new file mode 100644 index 00000000..0dc3f718 --- /dev/null +++ b/tests/inputs/bad_bpms/LHCB1/Measurements/example/bad_bpms_iforest_y.tfs @@ -0,0 +1,9 @@ +* NAME FEATURE VALUE AVG SCORE +$ %s %s %le %le %le + BPM.34R2.B1 AMPY 0.1684687445507 0.2816166477169 -0.07696353786083 + BPM.28R3.B1 AMPY 0.2274634866394 0.2816166477169 -0.07844693271801 + BPM.15R7.B1 AMPY 0.1987839789693 0.2816166477169 -0.08603128649899 + BPM.19L8.B1 NOISE_SCALED 0.000701003563438 0.0006483937620229 -0.120787110444 + BPMWI.4L2.B1 NOISE_SCALED 0.001066794573165 0.000649595233316 -0.1719495412989 + BPMS.2R5.B1 NOISE_SCALED 0.0007888325396217 0.000649595233316 -0.1308678268808 + BPMS.2R1.B1 TUNEY 0.3219851293843 0.3219858241878 -0.13606497737 \ No newline at end of file diff --git a/tests/inputs/bad_bpms/LHCB1/Measurements/example/example.sdds.bad_bpms_x b/tests/inputs/bad_bpms/LHCB1/Measurements/example/example.sdds.bad_bpms_x new file mode 100644 index 00000000..d712fde0 --- /dev/null +++ b/tests/inputs/bad_bpms/LHCB1/Measurements/example/example.sdds.bad_bpms_x @@ -0,0 +1,4 @@ +BPMSI.A4R6.B1 not found in model +BPMSI.B4R6.B1 not found in model +BPMSE.4L6.B1 Spiky BPM, found spike higher than 20.0 +BPMSX.4L8.B1 Detected from SVD, single peak value is greater then 0.925 diff --git a/tests/inputs/bad_bpms/LHCB1/Measurements/example/example.sdds.bad_bpms_y b/tests/inputs/bad_bpms/LHCB1/Measurements/example/example.sdds.bad_bpms_y new file mode 100644 index 00000000..93b229c6 --- /dev/null +++ b/tests/inputs/bad_bpms/LHCB1/Measurements/example/example.sdds.bad_bpms_y @@ -0,0 +1,8 @@ +BPMSI.A4R6.B1 not found in model +BPMSI.B4R6.B1 not found in model +BPM.31L5.B1 Flat BPM, the difference between min/max is smaller than 1e-05 +BPMSX.A4L6.B1 Flat BPM, the difference between min/max is smaller than 1e-05 +BPM.31L5.B1 Found an exact zero +BPMSX.A4L6.B1 Found an exact zero +BPMSE.4L6.B1 Detected from SVD, single peak value is greater then 0.925 +BPMWI.A5L4.B1 Detected from SVD, single peak value is greater then 0.925 diff --git a/tests/unit/test_bad_bpms_summary.py b/tests/unit/test_bad_bpms_summary.py new file mode 100644 index 00000000..6ce7fbcb --- /dev/null +++ b/tests/unit/test_bad_bpms_summary.py @@ -0,0 +1,41 @@ +import pytest +import tfs + +from tests.conftest import INPUTS, assert_tfsdataframe_equal +from omc3.scripts.bad_bpms_summary import NAME, SOURCE, bad_bpms_summary, IFOREST, HARPY +import logging + + +@pytest.mark.extended +def test_bad_bpms_summary(tmp_path, caplog): + + outfile = tmp_path / "bad_bpms_summary.tfs" + with caplog.at_level(logging.INFO): + df_eval = bad_bpms_summary( + root=INPUTS, + outfile=outfile, + dates=["bad_bpms"], + accel_glob="LHCB1", + print_percentage=50, + ) + + # Test Data has been written + assert df_eval is not None + assert_tfsdataframe_equal(df_eval.reset_index(drop=True), tfs.read(outfile)) + + # Test some random BPMs + not_in_model = ["BPMSI.A4R6.B1", ] + for bpm in not_in_model: + assert bpm not in df_eval[NAME].tolist() + + iforest_bpms = ["BPM.27R8.B1", "BPMS.2R1.B1"] + eval_iforest_bpms = df_eval.loc[df_eval[SOURCE] == IFOREST, NAME].tolist() + for bpm in iforest_bpms: + assert bpm in eval_iforest_bpms + assert bpm in caplog.text + + harpy_bpms = ["BPMSE.4L6.B1", "BPM.31L5.B1"] + eval_harpy_bpms = df_eval.loc[df_eval[SOURCE] == HARPY, NAME].tolist() + for bpm in harpy_bpms: + assert bpm in eval_harpy_bpms + assert bpm in caplog.text