Skip to content

Commit

Permalink
Merge pull request #11230 from GlobalDataverseCommunityConsortium/DAN…
Browse files Browse the repository at this point in the history
…S-bulk_file_delete

Delete Files From Dataset
  • Loading branch information
ofahimIQSS authored Feb 25, 2025
2 parents 30736a5 + af2bcc5 commit b0d136c
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/release-notes/11230-deleteFiles api call.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
A new /api/datasets/{id}/deleteFiles call has beed added to the API, allowing delete of multiple file from the latest version of a dataset in one operation.
33 changes: 33 additions & 0 deletions doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3553,6 +3553,39 @@ To update the blocks that are linked, send an array with those blocks.
To remove all links to blocks, send an empty array.
Delete Files from a Dataset
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Delete files from a dataset. This API call allows you to delete multiple files from a dataset in a single operation.
.. code-block:: bash
export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
export SERVER_URL=https://demo.dataverse.org
export PERSISTENT_IDENTIFIER=doi:10.5072/FK2ABCDEF
curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/datasets/:persistentId/deleteFiles?persistentId=$PERSISTENT_IDENTIFIER" \
-H "Content-Type: application/json" \
-d '{"fileIds": [1, 2, 3]}'
The fully expanded example above (without environment variables) looks like this:
.. code-block:: bash
curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/datasets/:persistentId/deleteFiles?persistentId=doi:10.5072/FK2ABCDEF" \
-H "Content-Type: application/json" \
-d '{"fileIds": [1, 2, 3]}'
The ``fileIds`` in the JSON payload should be an array of file IDs that you want to delete from the dataset.
You must have the appropriate permissions to delete files from the dataset.
Upon success, the API will return a JSON response with a success message and the number of files deleted.
The API call will report a 400 (BAD REQUEST) error if any of the files specified do not exist or are not in the latest version of the specified dataset.
The ``fileIds`` in the JSON payload should be an array of file IDs that you want to delete from the dataset.
Files
-----
Expand Down
65 changes: 65 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,14 @@

import static edu.harvard.iq.dataverse.api.ApiConstants.*;
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
import edu.harvard.iq.dataverse.dataset.DatasetType;
import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean;
import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*;
import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
import static jakarta.ws.rs.core.Response.Status.NOT_FOUND;
import static jakarta.ws.rs.core.Response.Status.FORBIDDEN;

@Path("datasets")
public class Datasets extends AbstractApiBean {
Expand Down Expand Up @@ -5368,4 +5370,67 @@ public Response updateDatasetTypeLinksWithMetadataBlocks(@Context ContainerReque
}
}

@PUT
@AuthRequired
@Path("{id}/deleteFiles")
@Consumes(MediaType.APPLICATION_JSON)
public Response deleteDatasetFiles(@Context ContainerRequestContext crc, @PathParam("id") String id,
JsonArray fileIds) {
try {
getRequestAuthenticatedUserOrDie(crc);
} catch (WrappedResponse ex) {
return ex.getResponse();
}
return response(req -> {
Dataset dataset = findDatasetOrDie(id);
// Convert JsonArray to List<Long>
List<Long> fileIdList = new ArrayList<>();
for (JsonValue value : fileIds) {
fileIdList.add(((JsonNumber) value).longValue());
}
// Find the files to be deleted
List<FileMetadata> filesToDelete = dataset.getOrCreateEditVersion().getFileMetadatas().stream()
.filter(fileMetadata -> fileIdList.contains(fileMetadata.getDataFile().getId()))
.collect(Collectors.toList());

if (filesToDelete.isEmpty()) {
return badRequest("No files found with the provided IDs.");
}

if (filesToDelete.size() != fileIds.size()) {
return badRequest(
"Some files listed are not present in the latest dataset version and cannot be deleted.");
}
try {

UpdateDatasetVersionCommand update_cmd = new UpdateDatasetVersionCommand(dataset, req, filesToDelete);

commandEngine.submit(update_cmd);
for (FileMetadata fm : filesToDelete) {
DataFile dataFile = fm.getDataFile();
boolean deletePhysicalFile = !dataFile.isReleased();
if (deletePhysicalFile) {
try {
fileService.finalizeFileDelete(dataFile.getId(),
fileService.getPhysicalFileToDelete(dataFile));
} catch (IOException ioex) {
logger.warning("Failed to delete the physical file associated with the deleted datafile id="
+ dataFile.getId() + ", storage location: "
+ fileService.getPhysicalFileToDelete(dataFile));
}
}
}
} catch (PermissionException ex) {
return error(FORBIDDEN, "You do not have permission to delete files ont this dataset.");
} catch (CommandException ex) {
return error(BAD_REQUEST,
"File deletes failed for dataset ID " + id + " (CommandException): " + ex.getMessage());
} catch (EJBException ex) {
return error(jakarta.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR,
"File deletes failed for dataset ID " + id + "(EJBException): " + ex.getMessage());
}
return ok(fileIds.size() + " files deleted successfully");

}, getRequestUser(crc));
}
}
137 changes: 137 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import static java.lang.Thread.sleep;
import static org.hamcrest.CoreMatchers.*;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.hasEntry;
import static org.junit.jupiter.api.Assertions.*;

public class DatasetsIT {
Expand Down Expand Up @@ -5589,4 +5590,140 @@ public void testRequireFilesToPublishDatasets() {
publishDatasetResponse.prettyPrint();
publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
}

@Test
public void testDeleteFiles() {
Response createUser = UtilIT.createRandomUser();
String username = UtilIT.getUsernameFromResponse(createUser);
String apiToken = UtilIT.getApiTokenFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse);

// Add files to the dataset
String pathToFile1 = "scripts/api/data/licenses/licenseCC0-1.0.json";
String pathToFile2 = "scripts/api/data/licenses/licenseCC-BY-4.0.json";
String pathToFile3 = "scripts/api/data/licenses/licenseCC-BY-NC-4.0.json";
String pathToFile4 = "scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json";
String pathToFile5 = "scripts/api/data/licenses/licenseCC-BY-ND-4.0.json";

JsonObjectBuilder json = Json.createObjectBuilder();
json.add("description", "File 1");
Response addFile1Response = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile1, json.build(), apiToken);
Long file1Id = JsonPath.from(addFile1Response.body().asString()).getLong("data.files[0].dataFile.id");

json.add("description", "File 2");
Response addFile2Response = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile2, json.build(), apiToken);
Long file2Id = JsonPath.from(addFile2Response.body().asString()).getLong("data.files[0].dataFile.id");

json.add("description", "File 3");
Response addFile3Response = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile3, json.build(), apiToken);
Long file3Id = JsonPath.from(addFile3Response.body().asString()).getLong("data.files[0].dataFile.id");

json.add("description", "File 4");
Response addFile4Response = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile4, json.build(), apiToken);
Long file4Id = JsonPath.from(addFile4Response.body().asString()).getLong("data.files[0].dataFile.id");

json.add("description", "File 5");
Response addFile5Response = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile5, json.build(), apiToken);
Long file5Id = JsonPath.from(addFile5Response.body().asString()).getLong("data.files[0].dataFile.id");

// Delete files 1 and 2
JsonArrayBuilder fileIdsToDelete = Json.createArrayBuilder();
fileIdsToDelete.add(file1Id);
fileIdsToDelete.add(file2Id);

Response deleteFilesResponse = UtilIT.deleteDatasetFiles(datasetId.toString(), fileIdsToDelete.build(), apiToken);
deleteFilesResponse.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.message", startsWith("2"));

// Verify files were deleted
Response getDatasetResponse = UtilIT.nativeGet(datasetId, apiToken);
getDatasetResponse.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.latestVersion.files.findAll { it.dataFile.id == " + file1Id + " }.size()", equalTo(0))
.body("data.latestVersion.files.findAll { it.dataFile.id == " + file2Id + " }.size()", equalTo(0))
.body("data.latestVersion.files.findAll { it.dataFile.id == " + file3Id + " }.size()", equalTo(1))
.body("data.latestVersion.files.findAll { it.dataFile.id == " + file4Id + " }.size()", equalTo(1))
.body("data.latestVersion.files.findAll { it.dataFile.id == " + file5Id + " }.size()", equalTo(1));


// Test deleting after dataset publication
Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken);
publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode());

// Publish the dataset
Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken);
publishDatasetResponse.then().assertThat()
.statusCode(OK.getStatusCode());

// Delete files 3 and 4 from the published dataset
fileIdsToDelete = Json.createArrayBuilder();
fileIdsToDelete.add(file3Id);
fileIdsToDelete.add(file4Id);

deleteFilesResponse = UtilIT.deleteDatasetFiles(datasetId.toString(), fileIdsToDelete.build(), apiToken);
deleteFilesResponse.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.message", startsWith("2"));

// Verify files were deleted
getDatasetResponse = UtilIT.nativeGet(datasetId, apiToken);
getDatasetResponse.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.latestVersion.files.findAll { it.dataFile.id == " + file3Id + " }.size()", equalTo(0))
.body("data.latestVersion.files.findAll { it.dataFile.id == " + file4Id + " }.size()", equalTo(0))
.body("data.latestVersion.files.findAll { it.dataFile.id == " + file5Id + " }.size()", equalTo(1));

// Test error conditions

// Try to delete a non-existent file
fileIdsToDelete = Json.createArrayBuilder();
fileIdsToDelete.add(999999L);

deleteFilesResponse = UtilIT.deleteDatasetFiles(datasetId.toString(), fileIdsToDelete.build(), apiToken);
deleteFilesResponse.then().assertThat()
.statusCode(BAD_REQUEST.getStatusCode())
.body("message", containsString("No files"));

// Try to delete files from a non-existent dataset
deleteFilesResponse = UtilIT.deleteDatasetFiles("999999", fileIdsToDelete.build(), apiToken);
deleteFilesResponse.then().assertThat()
.statusCode(NOT_FOUND.getStatusCode());

// Try to delete files without proper permissions
// Create a second user
Response createSecondUser = UtilIT.createRandomUser();
String unauthorizedUsername = UtilIT.getUsernameFromResponse(createSecondUser);
String unauthorizedUserApiToken = UtilIT.getApiTokenFromResponse(createSecondUser);

//Reset to a valid file id
fileIdsToDelete = Json.createArrayBuilder();
fileIdsToDelete.add(file5Id);
deleteFilesResponse = UtilIT.deleteDatasetFiles(datasetId.toString(), fileIdsToDelete.build(), unauthorizedUserApiToken);
deleteFilesResponse.then().assertThat()
.statusCode(FORBIDDEN.getStatusCode());

// Make the user a superuser to destroy dataset
Response makeSuperUserResponse = UtilIT.setSuperuserStatus(username, true);
makeSuperUserResponse.then().assertThat()
.statusCode(OK.getStatusCode());

// Clean up
Response destroyDatasetResponse = UtilIT.destroyDataset(datasetId, apiToken);
assertEquals(200, destroyDatasetResponse.getStatusCode());

Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
assertEquals(200, deleteDataverseResponse.getStatusCode());

Response deleteUnauthorizedUserResponse = UtilIT.deleteUser(unauthorizedUsername);
assertEquals(200, deleteUnauthorizedUserResponse.getStatusCode());

Response deleteUserResponse = UtilIT.deleteUser(username);
assertEquals(200, deleteUserResponse.getStatusCode());
}
}
10 changes: 10 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.*;
import java.util.logging.Logger;
import jakarta.json.Json;
import jakarta.json.JsonArray;
import jakarta.json.JsonObjectBuilder;
import jakarta.json.JsonArrayBuilder;
import jakarta.json.JsonObject;
Expand Down Expand Up @@ -4570,4 +4571,13 @@ static Response deleteDataverseFeaturedItems(String dataverseAlias, String apiTo
.header(API_TOKEN_HTTP_HEADER, apiToken)
.delete("/api/dataverses/" + dataverseAlias + "/featuredItems");
}

public static Response deleteDatasetFiles(String datasetId, JsonArray fileIds, String apiToken) {
String path = String.format("/api/datasets/%s/deleteFiles", datasetId);
return given()
.header(API_TOKEN_HTTP_HEADER, apiToken)
.contentType(ContentType.JSON)
.body(fileIds.toString())
.put(path);
}
}

0 comments on commit b0d136c

Please sign in to comment.