Preprocess the image input. Args: image (`PipelineImageInput`): The image input, accepted formats are PIL images, NumPy arrays, PyTorch tensors; Also accept list of supported formats. height (`int`, *optional*):
(
self,
image: PipelineImageInput,
height: int | None = None,
width: int | None = None,
resize_mode: str = "default", # "default", "fill", "crop"
crops_coords: tuple[int, int, int, int] | None = None,
)
| 605 | return height, width |
| 606 | |
| 607 | def preprocess( |
| 608 | self, |
| 609 | image: PipelineImageInput, |
| 610 | height: int | None = None, |
| 611 | width: int | None = None, |
| 612 | resize_mode: str = "default", # "default", "fill", "crop" |
| 613 | crops_coords: tuple[int, int, int, int] | None = None, |
| 614 | ) -> torch.Tensor: |
| 615 | """ |
| 616 | Preprocess the image input. |
| 617 | |
| 618 | Args: |
| 619 | image (`PipelineImageInput`): |
| 620 | The image input, accepted formats are PIL images, NumPy arrays, PyTorch tensors; Also accept list of |
| 621 | supported formats. |
| 622 | height (`int`, *optional*): |
| 623 | The height in preprocessed image. If `None`, will use the `get_default_height_width()` to get default |
| 624 | height. |
| 625 | width (`int`, *optional*): |
| 626 | The width in preprocessed. If `None`, will use get_default_height_width()` to get the default width. |
| 627 | resize_mode (`str`, *optional*, defaults to `default`): |
| 628 | The resize mode, can be one of `default` or `fill`. If `default`, will resize the image to fit within |
| 629 | the specified width and height, and it may not maintaining the original aspect ratio. If `fill`, will |
| 630 | resize the image to fit within the specified width and height, maintaining the aspect ratio, and then |
| 631 | center the image within the dimensions, filling empty with data from image. If `crop`, will resize the |
| 632 | image to fit within the specified width and height, maintaining the aspect ratio, and then center the |
| 633 | image within the dimensions, cropping the excess. Note that resize_mode `fill` and `crop` are only |
| 634 | supported for PIL image input. |
| 635 | crops_coords (`list[tuple[int, int, int, int]]`, *optional*, defaults to `None`): |
| 636 | The crop coordinates for each image in the batch. If `None`, will not crop the image. |
| 637 | |
| 638 | Returns: |
| 639 | `torch.Tensor`: |
| 640 | The preprocessed image. |
| 641 | """ |
| 642 | supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) |
| 643 | |
| 644 | # Expand the missing dimension for 3-dimensional pytorch tensor or numpy array that represents grayscale image |
| 645 | if self.config.do_convert_grayscale and isinstance(image, (torch.Tensor, np.ndarray)) and image.ndim == 3: |
| 646 | if isinstance(image, torch.Tensor): |
| 647 | # if image is a pytorch tensor could have 2 possible shapes: |
| 648 | # 1. batch x height x width: we should insert the channel dimension at position 1 |
| 649 | # 2. channel x height x width: we should insert batch dimension at position 0, |
| 650 | # however, since both channel and batch dimension has same size 1, it is same to insert at position 1 |
| 651 | # for simplicity, we insert a dimension of size 1 at position 1 for both cases |
| 652 | image = image.unsqueeze(1) |
| 653 | else: |
| 654 | # if it is a numpy array, it could have 2 possible shapes: |
| 655 | # 1. batch x height x width: insert channel dimension on last position |
| 656 | # 2. height x width x channel: insert batch dimension on first position |
| 657 | if image.shape[-1] == 1: |
| 658 | image = np.expand_dims(image, axis=0) |
| 659 | else: |
| 660 | image = np.expand_dims(image, axis=-1) |
| 661 | |
| 662 | if isinstance(image, list) and isinstance(image[0], np.ndarray) and image[0].ndim == 4: |
| 663 | warnings.warn( |
| 664 | "Passing `image` as a list of 4d np.ndarray is deprecated." |