Skip to content

Commit

Permalink
Merge pull request #39 from dule1322/CTX-4370
Browse files Browse the repository at this point in the history
CTX-4370: Perform hard linking of sample file to datasets instead of …
  • Loading branch information
igorperic17 authored Jul 27, 2023
2 parents 7c7568d + a8351bc commit d4e674b
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 68 deletions.
2 changes: 1 addition & 1 deletion coretex/coretex/conversion/base_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def convert(self) -> ImageDatasetType:
MultithreadedDataProcessor(
self._dataSource(),
self._extractSingleAnnotation,
title = "Converting dataset"
title = "Converting dataset..."
).process()

return self._dataset
11 changes: 5 additions & 6 deletions coretex/coretex/dataset/network_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,24 +164,23 @@ def download(self, ignoreCache: bool = False) -> None:
\b
>>> dummyDataset = NetworkDataset.fetchById(1023)
>>> dummyDataset.download()
>> [Coretex] Downloading dataset: [==>...........................] - 10%
"""

self.path.mkdir(exist_ok = True)

def sampleDownloader(sample: SampleType) -> None:
downloadSuccess = sample.download(ignoreCache)
if not downloadSuccess:
return
raise RuntimeError(f">> [Coretex] Failed to download sample \"{sample.name}\"")

symlinkPath = self.path / f"{sample.id}.zip"
if not symlinkPath.exists():
os.symlink(sample.zipPath, symlinkPath)
sampleHardLinkPath = self.path / sample.zipPath.name
if not sampleHardLinkPath.exists():
os.link(sample.zipPath, sampleHardLinkPath)

processor = MultithreadedDataProcessor(
self.samples,
sampleDownloader,
title = "Downloading dataset"
title = f"Downloading dataset \"{self.name}\"..."
)

processor.process()
Expand Down
12 changes: 12 additions & 0 deletions coretex/coretex/sample/network_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from typing_extensions import Self
from pathlib import Path

import os

from .sample import Sample
from ..space import SpaceTask
from ... import folder_manager
Expand Down Expand Up @@ -125,6 +127,16 @@ def download(self, ignoreCache: bool = False) -> bool:
ignoreCache = ignoreCache
)

# If sample was downloaded succesfully relink it to datasets to which it is linked
if not response.hasFailed():
for datasetPath in folder_manager.datasetsFolder.iterdir():
sampleHardLinkPath = datasetPath / self.zipPath.name
if not sampleHardLinkPath.exists():
continue

sampleHardLinkPath.unlink()
os.link(self.zipPath, sampleHardLinkPath)

return not response.hasFailed()

def load(self) -> SampleDataType:
Expand Down
10 changes: 5 additions & 5 deletions coretex/threading/threaded_data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from typing import Any, Callable, Final, Optional, List
from concurrent.futures import ThreadPoolExecutor, Future

from ..utils import ConsoleProgressBar
import logging


class MultithreadedDataProcessor:
Expand Down Expand Up @@ -69,7 +69,7 @@ def __init__(self, data: List[Any], singleElementProcessor: Callable[[Any], None
self.__data: Final = data
self.__singleElementProcessor: Final = singleElementProcessor
self.__threadCount: Final = threadCount
self.__progressBar: Final = ConsoleProgressBar(len(data), "" if title is None else title)
self.__title = title

def process(self) -> None:
"""
Expand All @@ -80,16 +80,16 @@ def process(self) -> None:
Any unhandled exception which happened during the processing
"""

if self.__title is not None:
logging.getLogger("coretexpylib").info(f">> [Coretex] {self.__title}")

futures: List[Future] = []

with ThreadPoolExecutor(max_workers = self.__threadCount) as pool:
for element in self.__data:
future = pool.submit(self.__singleElementProcessor, element)
future.add_done_callback(lambda _: self.__progressBar.update())
futures.append(future)

self.__progressBar.finish()

for future in futures:
exception = future.exception()
if exception is not None:
Expand Down
1 change: 0 additions & 1 deletion coretex/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from .console_progress_bar import ConsoleProgressBar
from .number import mathematicalRound
from .file import guessMimeType, InvalidFileExtension
from .date import DATE_FORMAT
55 changes: 0 additions & 55 deletions coretex/utils/console_progress_bar.py

This file was deleted.

0 comments on commit d4e674b

Please sign in to comment.