From ddec1b145666598e2969f273e634882ca486d549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Morav=C4=8D=C3=ADk?= Date: Mon, 23 Jan 2023 13:35:54 +0100 Subject: [PATCH] feat: Add documentation for `StorageManager` and `StorageClientManager`, open_* methods in `Actor` --- docs/docs.md | 55 +++++++++++++++++++++++++-- src/apify/actor.py | 50 ++++++++++++++++++++++-- src/apify/storage_client_manager.py | 16 ++++++-- src/apify/storages/storage_manager.py | 34 +++++++++++++---- 4 files changed, 137 insertions(+), 18 deletions(-) diff --git a/docs/docs.md b/docs/docs.md index 780c8057..6d4d7e3d 100644 --- a/docs/docs.md +++ b/docs/docs.md @@ -146,7 +146,23 @@ That’s useful if you want to use the client as a different Apify user than the #### async classmethod open_dataset(dataset_id_or_name=None, \*, force_cloud=False) -TODO: docs. +Open a dataset. + +Datasets are used to store structured data where each object stored has the same attributes, +such as online store products or real estate offers. +The actual data is stored either on the local filesystem or in the Apify cloud. + +* **Parameters** + + * **dataset_id_or_name** (`str`, *optional*) – ID or name of the dataset to be opened. + If not provided, the method returns the default dataset associated with the actor run. + + * **force_cloud** (`bool`, *optional*) – If set to True then the Apify cloud storage is always used. + This way it is possible to combine local and cloud storage. + +* **Returns** + + An instance of the Dataset class for the given ID or name. * **Return type** @@ -156,7 +172,23 @@ TODO: docs. #### async classmethod open_key_value_store(key_value_store_id_or_name=None, \*, force_cloud=False) -TODO: docs. +Open a key-value store. + +Key-value stores are used to store records or files, along with their MIME content type. +The records are stored and retrieved using a unique key. +The actual data is stored either on a local filesystem or in the Apify cloud. + +* **Parameters** + + * **key_value_store_id_or_name** (`str`, *optional*) – ID or name of the key-value store to be opened. + If not provided, the method returns the default key-value store associated with the actor run. + + * **force_cloud** (`bool`, *optional*) – If set to True then the Apify cloud storage is always used. + This way it is possible to combine local and cloud storage. + +* **Returns** + + An instance of the KeyValueStore class for the given ID or name. * **Return type** @@ -166,7 +198,24 @@ TODO: docs. #### async classmethod open_request_queue(request_queue_id_or_name=None, \*, force_cloud=False) -TODO: docs. +Open a request queue. + +Request queue represents a queue of URLs to crawl, which is stored either on local filesystem or in the Apify cloud. +The queue is used for deep crawling of websites, where you start with several URLs and then +recursively follow links to other pages. The data structure supports both breadth-first +and depth-first crawling orders. + +* **Parameters** + + * **request_queue_id_or_name** (`str`, *optional*) – ID or name of the request queue to be opened. + If not provided, the method returns the default request queue associated with the actor run. + + * **force_cloud** (`bool`, *optional*) – If set to True then the Apify cloud storage is always used. + This way it is possible to combine local and cloud storage. + +* **Returns** + + An instance of the RequestQueue class for the given ID or name. * **Return type** diff --git a/src/apify/actor.py b/src/apify/actor.py index 74a54f63..7f8f1879 100644 --- a/src/apify/actor.py +++ b/src/apify/actor.py @@ -478,7 +478,22 @@ def _get_storage_client(self, force_cloud: bool) -> Optional[ApifyClientAsync]: @classmethod async def open_dataset(cls, dataset_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> Dataset: - """TODO: docs.""" + """Open a dataset. + + Datasets are used to store structured data where each object stored has the same attributes, + such as online store products or real estate offers. + The actual data is stored either on the local filesystem or in the Apify cloud. + + Args: + dataset_id_or_name (str, optional): ID or name of the dataset to be opened. + If not provided, the method returns the default dataset associated with the actor run. + force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used. + This way it is possible to combine local and cloud storage. + + Returns: + Dataset: An instance of the `Dataset` class for the given ID or name. + + """ return await cls._get_default_instance().open_dataset(dataset_id_or_name=dataset_id_or_name, force_cloud=force_cloud) async def _open_dataset_internal(self, dataset_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> Dataset: @@ -488,7 +503,21 @@ async def _open_dataset_internal(self, dataset_id_or_name: Optional[str] = None, @classmethod async def open_key_value_store(cls, key_value_store_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> KeyValueStore: - """TODO: docs.""" + """Open a key-value store. + + Key-value stores are used to store records or files, along with their MIME content type. + The records are stored and retrieved using a unique key. + The actual data is stored either on a local filesystem or in the Apify cloud. + + Args: + key_value_store_id_or_name (str, optional): ID or name of the key-value store to be opened. + If not provided, the method returns the default key-value store associated with the actor run. + force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used. + This way it is possible to combine local and cloud storage. + + Returns: + KeyValueStore: An instance of the `KeyValueStore` class for the given ID or name. + """ return await cls._get_default_instance().open_key_value_store(key_value_store_id_or_name=key_value_store_id_or_name, force_cloud=force_cloud) async def _open_key_value_store_internal(self, key_value_store_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> KeyValueStore: @@ -498,7 +527,22 @@ async def _open_key_value_store_internal(self, key_value_store_id_or_name: Optio @classmethod async def open_request_queue(cls, request_queue_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> RequestQueue: - """TODO: docs.""" + """Open a request queue. + + Request queue represents a queue of URLs to crawl, which is stored either on local filesystem or in the Apify cloud. + The queue is used for deep crawling of websites, where you start with several URLs and then + recursively follow links to other pages. The data structure supports both breadth-first + and depth-first crawling orders. + + Args: + request_queue_id_or_name (str, optional): ID or name of the request queue to be opened. + If not provided, the method returns the default request queue associated with the actor run. + force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used. + This way it is possible to combine local and cloud storage. + + Returns: + RequestQueue: An instance of the `RequestQueue` class for the given ID or name. + """ return await cls._get_default_instance().open_request_queue(request_queue_id_or_name=request_queue_id_or_name, force_cloud=force_cloud) async def _open_request_queue_internal( diff --git a/src/apify/storage_client_manager.py b/src/apify/storage_client_manager.py index d60376bd..bd4fef5e 100644 --- a/src/apify/storage_client_manager.py +++ b/src/apify/storage_client_manager.py @@ -7,7 +7,7 @@ class StorageClientManager: - """TODO: docs.""" + """A class for managing storage clients.""" _config: Configuration @@ -16,18 +16,26 @@ class StorageClientManager: _default_instance: Optional['StorageClientManager'] = None def __init__(self) -> None: - """TODO: docs.""" + """Create a `StorageClientManager` instance.""" self._config = Configuration.get_global_configuration() self._client = MemoryStorage(persist_storage=self._config.persist_storage) @classmethod def get_storage_client(cls) -> Union[ApifyClientAsync, MemoryStorage]: - """TODO: docs.""" + """Get the current storage client instance. + + Returns: + ApifyClientAsync or MemoryStorage: The current storage client instance. + """ return cls._get_default_instance()._client @classmethod def set_storage_client(cls, client: Union[ApifyClientAsync, MemoryStorage]) -> None: - """TODO: docs.""" + """Set the storage client. + + Args: + client (ApifyClientAsync or MemoryStorage): The instance of a storage client. + """ cls._get_default_instance()._client = client @classmethod diff --git a/src/apify/storages/storage_manager.py b/src/apify/storages/storage_manager.py index 82e8c8e4..3570c449 100644 --- a/src/apify/storages/storage_manager.py +++ b/src/apify/storages/storage_manager.py @@ -15,7 +15,7 @@ class Storage(Protocol[T]): - """TODO: Docs.""" + """A protocol defining common interface for storage classes.""" @classmethod def _create_instance(cls, storage_id_or_name: str, client: Union[ApifyClientAsync, MemoryStorage]) -> T: # noqa: U100 @@ -33,16 +33,14 @@ async def _purge_default_storages(client: Union[ApifyClientAsync, MemoryStorage] class StorageManager: - """TODO: docs.""" + """A class for managing storages.""" _default_instance: Optional['StorageManager'] = None _cache: Dict[Type[Storage], Dict[str, Storage]] - _config: Configuration def __init__(self) -> None: - """TODO: docs.""" + """Create a `StorageManager` instance.""" self._cache = {} - self._config = Configuration.get_global_configuration() @classmethod def _get_default_instance(cls) -> 'StorageManager': @@ -59,9 +57,23 @@ async def open_storage( client: Optional[Union[ApifyClientAsync, MemoryStorage]] = None, config: Optional[Configuration] = None, ) -> T: - """TODO: docs.""" + """Open a storage of the given class, or return a cached storage object if it was opened before. + + Opens a new storage (`Dataset`, `KeyValueStore`, or `RequestQueue`) with the given ID or name. + Returns the cached storage object if the storage was opened before. + + Args: + storage_class (Type[Dataset] or Type[KeyValueStore] or Type[RequestQueue]): Class of the storage to be opened. + storage_id_or_name (str, optional): ID or name of the storage to be opened. If omitted, an unnamed storage will be opened. + client (ApifyClientAsync or MemoryStorage, optional): The storage client which should be used in the storage. + If omitted, the default client will be used. + config (Configuration, optional): The actor configuration to be used in this call. If omitted, the global configuration will be used. + + Returns: + An instance of the storage given by `storage_class`. + """ storage_manager = StorageManager._get_default_instance() - used_config = config or storage_manager._config + used_config = config or Configuration.get_global_configuration() used_client = client or StorageClientManager.get_storage_client() # Create cache for the given storage class if missing @@ -93,7 +105,13 @@ async def open_storage( @classmethod async def close_storage(cls, storage_class: Type[Storage], id: str, name: Optional[str]) -> None: - """TODO: docs.""" + """Close the given storage by removing it from the cache. + + Args: + storage_class (Type[Dataset] or Type[KeyValueStore] or Type[RequestQueue]): Class of the storage to be closed. + id (str): ID of the storage to be closed. + name (str, optional): Name of the storage to be closed. + """ storage_manager = StorageManager._get_default_instance() del storage_manager._cache[storage_class][id] if name is not None: