Skip to content

Commit

Permalink
feat: add download_image function
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Jul 1, 2024
1 parent d6aff4b commit e58c46b
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 7 deletions.
57 changes: 55 additions & 2 deletions openfoodfacts/images.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import re
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Tuple, Union
from urllib.parse import urlparse

import requests
from PIL import Image

from openfoodfacts.types import Environment, Flavor
from openfoodfacts.utils import URLBuilder
from openfoodfacts.utils import URLBuilder, get_image_from_url

BARCODE_PATH_REGEX = re.compile(r"^(...)(...)(...)(.*)$")
# Base URL of the public Open Food Facts S3 bucket
AWS_S3_BASE_URL = "https://openfoodfacts-images.s3.eu-west-3.amazonaws.com/data"


def split_barcode(barcode: str) -> List[str]:
Expand Down Expand Up @@ -156,3 +161,51 @@ def extract_source_from_url(url: str) -> str:
url_path = str(Path(url_path).with_suffix(".jpg"))

return url_path


def download_image(
image: Union[str, Tuple[str, str]],
use_cache: bool = True,
error_raise: bool = True,
session: Optional[requests.Session] = None,
return_bytes: bool = False,
) -> Union[None, Image.Image, Tuple[Optional[Image.Image], bytes]]:
"""Download an Open Food Facts image.
:param image: the image URL or a tuple containing the barcode and the
image ID
:param use_cache: whether to use the S3 dataset cache, defaults to True
:param error_raise: whether to raise an error if the download fails,
defaults to True
:param session: the requests session to use, defaults to None
:param return_bytes: if True, return the image bytes as well, defaults to
False.
:return: the loaded image or None if an error occured. If `return_bytes`
is True, a tuple with the image and the image bytes is returned.
>>> download_image("https://images.openfoodfacts.org/images/products/324/227/210/2359/4.jpg")
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1244x1500>
>>> download_image(("3242272102359", "4"))
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1244x1500>
"""
if isinstance(image, str):
if use_cache and image.startswith("http"):
image_path = extract_source_from_url(image)
image_url = f"{AWS_S3_BASE_URL}{image_path}"
else:
image_url = image

if isinstance(image, tuple):
if use_cache:
image_path = generate_image_path(*image)
image_url = f"{AWS_S3_BASE_URL}{image_path}"
else:
image_url = generate_image_url(*image)

return get_image_from_url(
image_url,
error_raise=error_raise,
session=session,
return_bytes=return_bytes,
)
19 changes: 14 additions & 5 deletions openfoodfacts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import time
from io import BytesIO
from pathlib import Path
from typing import Callable, Dict, Iterable, List, Optional, Union
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union

import requests
import tqdm
Expand Down Expand Up @@ -318,16 +318,22 @@ def get_asset_from_url(


def get_image_from_url(
image_url: str, error_raise: bool = True, session: Optional[requests.Session] = None
) -> Optional["Image.Image"]:
image_url: str,
error_raise: bool = True,
session: Optional[requests.Session] = None,
return_bytes: bool = False,
) -> Union[None, "Image.Image", Tuple[Optional["Image.Image"], bytes]]:
"""Fetch an image from `image_url` and load it.
:param image_url: URL of the image to load
:param error_raise: if True, raises a `AssetLoadingException` if an error
occured, defaults to False. If False, None is returned if an error
occured.
:param session: requests Session to use, by default no session is used.
:return: the Pillow Image or None.
:param return_bytes: if True, return the image bytes as well, defaults to
False.
:return: the loaded image or None if an error occured. If `return_bytes`
is True, a tuple with the image and the image bytes is returned.
"""
if not _pillow_available:
raise ImportError("Pillow is required to load images")
Expand All @@ -338,7 +344,10 @@ def get_image_from_url(
content_bytes = r.content

try:
return Image.open(BytesIO(content_bytes))
image = Image.open(BytesIO(content_bytes))
if return_bytes:
return image, content_bytes
return image
except PIL.UnidentifiedImageError:
error_message = f"Cannot identify image {image_url}"
if error_raise:
Expand Down

0 comments on commit e58c46b

Please sign in to comment.