Skip to content

Videolab #243

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -18,6 +18,8 @@ coverage.xml
# Misc
results/
image_files*
data/
frames

# datasets
cifar*
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -46,11 +46,11 @@ pytorch = ["torchvision>=0.12.0"]
azure = ["adlfs>=2022.2.0"] # latest compatible with Python 3.7
gcs = ["gcsfs>=2022.1.0"] # latest compatible with Python 3.7
s3 = ["s3fs>=2023.1.0"] # latest compatible with Python 3.7
video = ["av>=10.0.0"]

all = ["cleanvision[huggingface,pytorch,azure,gcs,s3]"]
all = ["cleanvision[huggingface,pytorch,azure,gcs,s3,video]"]

[project.urls]
"Source" = "https://github.com/cleanlab/cleanvision"
"Bug Tracker" = "https://github.com/cleanlab/cleanvision/issues"
"Documentation" = "https://cleanvision.readthedocs.io/"

3 changes: 3 additions & 0 deletions src/cleanvision/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import sys

from cleanvision.imagelab import Imagelab as _Imagelab
from cleanvision.videolab import Videolab as _Videolab

PYTHON_VERSION_INFO = sys.version_info

@@ -21,3 +23,4 @@ def get_version() -> str:
pass

Imagelab = _Imagelab
Videolab = _Videolab
59 changes: 59 additions & 0 deletions src/cleanvision/dataset/video_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from cleanvision.dataset.base_dataset import Dataset
from pathlib import Path
from typing import Generator, Iterator, List, Optional, Union
from cleanvision.utils.constants import VIDEO_FILE_EXTENSIONS


class VideoDataset(Dataset):
"""Wrapper class to handle video datasets."""

def __init__(
self,
data_folder: Optional[str] = None,
filepaths: Optional[List[str]] = None,
) -> None:
"""Determine video dataset source and populate index."""
# check if data folder is given
if data_folder:
# get filepaths from video dataset directory
self._filepaths = [
str(path) for path in self.__get_filepaths(Path(data_folder))
]

else:
# store user supplied video file paths
# todo: raise an exception if assert fails
assert filepaths is not None
self._filepaths = filepaths

# create index
self._set_index()
self.frames_dir = Path.cwd() / "frames"

def __len__(self) -> int:
"""Get video dataset file count."""
return len(self.index)

def __iter__(self) -> Iterator[Union[int, str]]:
"""Defining the iteration behavior."""
return iter(self.index)

def _set_index(self) -> None:
"""Create internal storage for filepaths."""
self.index = [path for path in self._filepaths]

def __get_filepaths(self, dataset_path: Path) -> Generator[Path, None, None]:
"""Scan file system for video files and grab their file paths."""
# notify user
print(f"Reading videos from {dataset_path}")

# iterate over video file extensions
for ext in VIDEO_FILE_EXTENSIONS:
# loop through video paths matching ext
yield from dataset_path.glob(f"**/{ext}")

def __getitem__(self, item: int) -> str:
return self.index[item]

def set_frames_dir(self, frames_dir: Path):
self.frames_dir = frames_dir
4 changes: 2 additions & 2 deletions src/cleanvision/imagelab.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@
)
from cleanvision.utils.base_issue_manager import IssueManager
from cleanvision.utils.constants import (
DEFAULT_ISSUE_TYPES,
DEFAULT_ISSUE_TYPES_IMAGELAB,
DUPLICATE,
DUPLICATE_ISSUE_TYPES_LIST,
IMAGE_PROPERTY,
@@ -166,7 +166,7 @@ def _set_default_config(self) -> Dict[str, Any]:
@staticmethod
def list_default_issue_types() -> List[str]:
"""Returns a list of the issue types that are run by default in :py:meth:`Imagelab.find_issues`"""
return DEFAULT_ISSUE_TYPES
return DEFAULT_ISSUE_TYPES_IMAGELAB

@staticmethod
def list_possible_issue_types() -> List[str]:
14 changes: 13 additions & 1 deletion src/cleanvision/utils/constants.py
Original file line number Diff line number Diff line change
@@ -37,7 +37,7 @@
"*.WEBP",
] # filetypes supported by PIL

DEFAULT_ISSUE_TYPES = [
DEFAULT_ISSUE_TYPES_IMAGELAB = [
"dark",
"light",
"odd_aspect_ratio",
@@ -48,3 +48,15 @@
"grayscale",
"odd_size",
]

DEFAULT_ISSUE_TYPES_VIDEOLAB = [
"dark",
"light",
"odd_aspect_ratio",
"low_information",
"blurry",
"grayscale",
"odd_size",
]

VIDEO_FILE_EXTENSIONS: List[str] = ["*.mp4", "*.avi", "*.mkv", "*.mov", "*.webm"]
39 changes: 39 additions & 0 deletions src/cleanvision/utils/frame_sampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from importlib import import_module
from pathlib import Path


class FrameSampler:
"""Simplest frame sampling strategy."""

def __init__(self, k: int) -> None:
"""Store frame sample interval k and import PyAV."""
# storing frame sampling interval
self.k = k

# attempting to import PyAV
try:
self.av = import_module("av")
except ImportError as error:
raise ImportError(
"Cannot import package `av`. "
"Please install it via `pip install av` and then try again."
) from error

def sample(self, video_path: str, output_dir: Path) -> None:
"""Loop through frames and store every k-th frame."""
with self.av.open(video_path) as container:
# get video stream
stream = container.streams.video[0]

# iterate frames
for frame_indx, frame in enumerate(container.decode(stream)):
# check for k-th frame
if not frame_indx % self.k:
# get PIL image
frame_pil = frame.to_image()

# use frame timestamp as image file name
image_file_name = str(frame.time) + ".jpg"

# save to output dir
frame_pil.save(output_dir / image_file_name)
Loading