Skip to content

Commit

Permalink
Add studio flag for rm-dataset and edit-dataset (#572)
Browse files Browse the repository at this point in the history
  • Loading branch information
amritghimire authored Nov 27, 2024
1 parent 6244161 commit 2135270
Show file tree
Hide file tree
Showing 4 changed files with 296 additions and 16 deletions.
116 changes: 108 additions & 8 deletions src/datachain/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
from datachain.config import Config
from datachain.error import DataChainError
from datachain.lib.dc import DataChain
from datachain.studio import list_datasets, process_studio_cli_args
from datachain.studio import (
edit_studio_dataset,
list_datasets,
process_studio_cli_args,
remove_studio_dataset,
)
from datachain.telemetry import telemetry

if TYPE_CHECKING:
Expand Down Expand Up @@ -403,21 +408,44 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
parse_edit_dataset.add_argument(
"--new-name",
action="store",
default="",
help="Dataset new name",
)
parse_edit_dataset.add_argument(
"--description",
action="store",
default="",
help="Dataset description",
)
parse_edit_dataset.add_argument(
"--labels",
default=[],
nargs="+",
help="Dataset labels",
)
parse_edit_dataset.add_argument(
"--studio",
action="store_true",
default=False,
help="Edit dataset from Studio",
)
parse_edit_dataset.add_argument(
"-L",
"--local",
action="store_true",
default=False,
help="Edit local dataset only",
)
parse_edit_dataset.add_argument(
"-a",
"--all",
action="store_true",
default=True,
help="Edit both datasets from studio and local",
)
parse_edit_dataset.add_argument(
"--team",
action="store",
default=None,
help="The team to edit a dataset. By default, it will use team from config.",
)

datasets_parser = subp.add_parser(
"datasets", parents=[parent_parser], description="List datasets"
Expand Down Expand Up @@ -466,6 +494,32 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
action=BooleanOptionalAction,
help="Force delete registered dataset with all of it's versions",
)
rm_dataset_parser.add_argument(
"--studio",
action="store_true",
default=False,
help="Remove dataset from Studio",
)
rm_dataset_parser.add_argument(
"-L",
"--local",
action="store_true",
default=False,
help="Remove local datasets only",
)
rm_dataset_parser.add_argument(
"-a",
"--all",
action="store_true",
default=True,
help="Remove both local and studio",
)
rm_dataset_parser.add_argument(
"--team",
action="store",
default=None,
help="The team to delete a dataset. By default, it will use team from config.",
)

dataset_stats_parser = subp.add_parser(
"dataset-stats",
Expand Down Expand Up @@ -909,8 +963,40 @@ def rm_dataset(
name: str,
version: Optional[int] = None,
force: Optional[bool] = False,
studio: bool = False,
local: bool = False,
all: bool = True,
team: Optional[str] = None,
):
token = Config().read().get("studio", {}).get("token")
all, local, studio = _determine_flavors(studio, local, all, token)

if all or local:
catalog.remove_dataset(name, version=version, force=force)

if (all or studio) and token:
remove_studio_dataset(team, name, version, force)


def edit_dataset(
catalog: "Catalog",
name: str,
new_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[list[str]] = None,
studio: bool = False,
local: bool = False,
all: bool = True,
team: Optional[str] = None,
):
catalog.remove_dataset(name, version=version, force=force)
token = Config().read().get("studio", {}).get("token")
all, local, studio = _determine_flavors(studio, local, all, token)

if all or local:
catalog.edit_dataset(name, new_name, description, labels)

if (all or studio) and token:
edit_studio_dataset(team, name, new_name, description, labels)


def dataset_stats(
Expand Down Expand Up @@ -1127,11 +1213,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
edatachain_file=args.edatachain_file,
)
elif args.command == "edit-dataset":
catalog.edit_dataset(
edit_dataset(
catalog,
args.name,
description=args.description,
new_name=args.new_name,
description=args.description,
labels=args.labels,
studio=args.studio,
local=args.local,
all=args.all,
team=args.team,
)
elif args.command == "ls":
ls(
Expand Down Expand Up @@ -1164,7 +1255,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
schema=args.schema,
)
elif args.command == "rm-dataset":
rm_dataset(catalog, args.name, version=args.version, force=args.force)
rm_dataset(
catalog,
args.name,
version=args.version,
force=args.force,
studio=args.studio,
local=args.local,
all=args.all,
team=args.team,
)
elif args.command == "dataset-stats":
dataset_stats(
catalog,
Expand Down
48 changes: 40 additions & 8 deletions src/datachain/remote/studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,17 +178,9 @@ def _send_request(self, route: str, data: dict[str, Any]) -> Response[Any]:
data = {}

if not ok:
logger.error(
"Got bad response from Studio, content is %s",
response.content.decode("utf-8"),
)
if response.status_code == 403:
message = f"Not authorized for the team {self.team}"
else:
logger.error(
"Got bad response from Studio, content is %s",
response.content.decode("utf-8"),
)
message = data.get("message", "")
else:
message = ""
Expand Down Expand Up @@ -230,6 +222,46 @@ def ls(self, paths: Iterable[str]) -> Iterator[tuple[str, Response[LsData]]]:
def ls_datasets(self) -> Response[LsData]:
return self._send_request("datachain/ls-datasets", {})

def edit_dataset(
self,
name: str,
new_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[list[str]] = None,
) -> Response[DatasetInfoData]:
body = {
"dataset_name": name,
}

if new_name is not None:
body["new_name"] = new_name

if description is not None:
body["description"] = description

if labels is not None:
body["labels"] = labels # type: ignore[assignment]

return self._send_request(
"datachain/edit-dataset",
body,
)

def rm_dataset(
self,
name: str,
version: Optional[int] = None,
force: Optional[bool] = False,
) -> Response[DatasetInfoData]:
return self._send_request(
"datachain/rm-dataset",
{
"dataset_name": name,
"version": version,
"force": force,
},
)

def dataset_info(self, name: str) -> Response[DatasetInfoData]:
def _parse_dataset_info(dataset_info):
_parse_dates(dataset_info, ["created_at", "finished_at"])
Expand Down
29 changes: 29 additions & 0 deletions src/datachain/studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,35 @@ def list_datasets(team: Optional[str] = None):
yield (name, version)


def edit_studio_dataset(
team_name: Optional[str],
name: str,
new_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[list[str]] = None,
):
client = StudioClient(team=team_name)
response = client.edit_dataset(name, new_name, description, labels)
if not response.ok:
raise_remote_error(response.message)

print(f"Dataset {name} updated")


def remove_studio_dataset(
team_name: Optional[str],
name: str,
version: Optional[int] = None,
force: Optional[bool] = False,
):
client = StudioClient(team=team_name)
response = client.rm_dataset(name, version, force)
if not response.ok:
raise_remote_error(response.message)

print(f"Dataset {name} removed")


def save_config(hostname, token):
config = Config(ConfigLevel.GLOBAL)
with config.edit() as conf:
Expand Down
Loading

0 comments on commit 2135270

Please sign in to comment.