data. DefaultCPUAllocator: not enough memory: you tried to allocate 51906150400 bytes. #1201
-
normal_dir: 31681 files dataset:
name: SLJ
format: folder
path: E:/2023_ShuJu_WEB/anomalib_ShuJu/SLJ/
normal_dir: OK_256 # name of the folder containing normal images.
abnormal_dir: NG_512 # name of the folder containing abnormal images.
normal_test_dir: CS # name of the folder containing normal test images.
task: classification # classification or segmentation
mask: null #optional
mask_dir: null #optional
extensions: null
split_ratio: 0.2 # normal images ratio to create a test split
seed: 0
image_size: 256
train_batch_size: 32
eval_batch_size: 32
num_workers: 8
normalization: imagenet # data distribution to which the images will be normalized
test_split_mode: from_dir # options [from_dir, synthetic]
val_split_ratio: 0.5 # fraction of train/test images held out for validation (usage depends on val_split_mode)
transform_config:
train: null
eval: null
val_split_mode: from_test # determines how the validation set is created, options [same_as_test, from_test]
tiling:
apply: false
tile_size: null
stride: null
remove_border_count: 0
use_random_tiling: False
random_tile_count: 16
model:
name: padim
backbone: resnet18
pre_trained: true
layers:
- layer1
- layer2
- layer3
normalization_method: min_max # options: [none, min_max, cdf]
# model:
# name: padim
# backbone: wide_resnet50_2
# pre_trained: true
# layers:
# - layer1
# - layer2
# - layer3
# normalization_method: min_max # options: [none, min_max, cdf]
metrics:
image:
- F1Score
- AUROC
pixel:
- F1Score
- AUROC
threshold:
method: adaptive #options: [adaptive, manual]
manual_image: null
manual_pixel: null
visualization:
show_images: False # show images on the screen
save_images: True # save images to the file system
log_images: True # log images to the available loggers (if any)
image_save_path: null # path to which images will be saved
mode: full # options: ["full", "simple"]
project:
seed: 42
path: E:/2023_ShuJu_WEB/anomaly_RUN/results_CS
logging:
logger: [] # options: [comet, tensorboard, wandb, csv] or combinations.
log_graph: false # Logs the model graph to respective logger.
optimization:
export_mode: torch # options: torch, onnx, openvino
# PL Trainer Args. Don't add extra parameter here.
trainer:
enable_checkpointing: true
default_root_dir: null
gradient_clip_val: 0
gradient_clip_algorithm: norm
num_nodes: 1
devices: 1
enable_progress_bar: true
overfit_batches: 0.0
track_grad_norm: -1
check_val_every_n_epoch: 1 # Don't validate before extracting features.
fast_dev_run: false
accumulate_grad_batches: 1
max_epochs: 1
min_epochs: null
max_steps: -1
min_steps: null
max_time: null
limit_train_batches: 1.0
limit_val_batches: 1.0
limit_test_batches: 1.0
limit_predict_batches: 1.0
val_check_interval: 1.0 # Don't validate before extracting features.
log_every_n_steps: 50
accelerator: auto # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
strategy: null
sync_batchnorm: false
precision: 32
enable_model_summary: true
num_sanity_val_steps: 0
profiler: null
benchmark: false
deterministic: false
reload_dataloaders_every_n_epochs: 0
auto_lr_find: false
replace_sampler_ddp: true
detect_anomaly: false
auto_scale_batch_size: true
plugins: null
move_metrics_to_cpu: false
multiple_trainloader_mode: max_size_cycle 2023-07-14 15:08:48,278 - pytorch_lightning.utilities.rank_zero - INFO - You are using a CUDA device ('NVIDIA GeForce RTX 3080 Ti Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2023-07-14 15:08:53,742 - anomalib.utils.callbacks.metrics_configuration - WARNING - Cannot perform pixel-level evaluation when task type is classification. Ignoring the following pixel-level metrics: ['F1Score', 'AUROC']
E:\anaconda3\envs\anomalib_env\lib\site-packages\torchmetrics\utilities\prints.py:36: UserWarning: Metric `ROC` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.
warnings.warn(*args, **kwargs)
2023-07-14 15:08:53,828 - pytorch_lightning.accelerators.cuda - INFO - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
E:\anaconda3\envs\anomalib_env\lib\site-packages\pytorch_lightning\core\optimizer.py:183: UserWarning: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer
rank_zero_warn( RuntimeError: [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 51906150400 bytes.
Epoch 0: 98%|█████████▊| 991/1009 [02:01<00:02, 8.15it/s, loss=nan] |
Beta Was this translation helpful? Give feedback.
Replies: 4 comments
-
Hello. Your dataset is quite big and during the training, embeddings that are produced, are moved to CPU. I believe that this eventually causes your main memory to fill up. |
Beta Was this translation helpful? Give feedback.
-
Hello. How to avoid such errors by adjusting the parameters, such as batchsize? |
Beta Was this translation helpful? Give feedback.
-
Batch size would change this. Padim, fits the entire dataset into memory before computing the statistics. What you could do is potentially reduce |
Beta Was this translation helpful? Give feedback.
-
If you have any other questions, feel free to ask here. |
Beta Was this translation helpful? Give feedback.
Batch size would change this. Padim, fits the entire dataset into memory before computing the statistics. What you could do is potentially reduce
n_features
parameter inPadim
model section to reduce the extracted features. You could check the paramter hereanomalib/src/anomalib/models/padim/lightning_model.py
Line 44 in 9323985