-
Notifications
You must be signed in to change notification settings - Fork 9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HADOOP-19494: [ABFS] Fix Case Sensitivity Issue for hdi_isfolder metadata #7496
base: trunk
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1528,10 +1528,31 @@ public AbfsRestOperation deleteBlobPath(final Path blobPath, | |
*/ | ||
@Override | ||
public boolean checkIsDir(AbfsHttpOperation result) { | ||
String resourceType = result.getResponseHeader(X_MS_META_HDI_ISFOLDER); | ||
String dirHeaderName = getHeaderNameIgnoreCase(result, X_MS_META_HDI_ISFOLDER); | ||
// If the header is not found, return false (not a directory) | ||
if (dirHeaderName == null) { | ||
return false; | ||
} | ||
|
||
String resourceType = result.getResponseHeader(dirHeaderName); | ||
return resourceType != null && resourceType.equals(TRUE); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes since we already checked for null in the getHeaderNameIgnoreCase method, this repeated check may not be needed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since it was an existing line of code, I didn’t change it. However, it does make sense to remove this null check. |
||
} | ||
|
||
/** | ||
* Get the header name case-insensitively. | ||
* @param result executed rest operation containing response from server. | ||
* @param header The header to be checked. | ||
* @return Header if found, null otherwise. | ||
*/ | ||
private String getHeaderNameIgnoreCase(AbfsHttpOperation result, String header) { | ||
Map<String, List<String>> responseHeaders = result.getResponseHeaders(); | ||
// Search for the key case-insensitively in the headers | ||
return responseHeaders.keySet().stream() | ||
.filter(key -> key != null && key.equalsIgnoreCase(header)) | ||
.findFirst() | ||
.orElse(null); // Return null if no match is found | ||
} | ||
|
||
/** | ||
* Returns true if the status code lies in the range of user error. | ||
* In the case of HTTP_CONFLICT for PutBlockList we fall back to DFS and hence | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
import java.net.URL; | ||
import java.time.Duration; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
@@ -776,15 +777,16 @@ public AbfsHttpOperationWithFixedResultForGetFileStatus(final URL url, | |
@Override | ||
public String getResponseHeader(final String httpHeader) { | ||
// Directories on FNS-Blob are identified by a special metadata header. | ||
if (httpHeader.equals(X_MS_META_HDI_ISFOLDER)) { | ||
if (httpHeader.equalsIgnoreCase(X_MS_META_HDI_ISFOLDER)) { | ||
return TRUE; | ||
} | ||
return EMPTY_STRING; | ||
} | ||
|
||
@Override | ||
public Map<String, List<String>> getResponseHeaders() { | ||
return new HashMap<>(); | ||
return Collections.singletonMap(X_MS_META_HDI_ISFOLDER, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we adding this entry in the map for all getResponse header calls ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AbfsHttpOperationWithFixedResultForGetFileStatus is used in one place where we are checking for an implicit directory. In the case of an implicit directory, I am applying the same logic as in the method httpHeader.equalsIgnoreCase(X_MS_META_HDI_ISFOLDER), where we return true if the header is X_MS_META_HDI_ISFOLDER. |
||
Collections.singletonList(TRUE)); | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,8 +18,11 @@ | |
|
||
package org.apache.hadoop.fs.azurebfs; | ||
|
||
import java.net.URL; | ||
import java.util.List; | ||
import java.util.UUID; | ||
|
||
import org.assertj.core.api.Assertions; | ||
import org.junit.Assume; | ||
import org.junit.Test; | ||
import org.mockito.Mockito; | ||
|
@@ -28,14 +31,25 @@ | |
import org.apache.hadoop.fs.FileAlreadyExistsException; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; | ||
import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; | ||
import org.apache.hadoop.fs.azurebfs.services.AbfsClient; | ||
import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; | ||
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; | ||
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; | ||
import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType; | ||
|
||
import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; | ||
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; | ||
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; | ||
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; | ||
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE; | ||
import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX; | ||
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; | ||
import static org.apache.hadoop.test.LambdaTestUtils.intercept; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import static org.mockito.ArgumentMatchers.anyList; | ||
import static org.mockito.ArgumentMatchers.eq; | ||
|
||
/** | ||
* Test mkdir operation. | ||
|
@@ -167,4 +181,107 @@ public void testMkdirWithExistingFilename() throws Exception { | |
intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath"))); | ||
intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath/newDir"))); | ||
} | ||
|
||
/** | ||
* Test mkdirs with HDI folder configuration, | ||
* verifying the correct header and directory state. | ||
*/ | ||
@Test | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add test for other variations such as ALL Caps, only one later caps, all small. Also one test can be when we set multiple metadata keys, the directory one is correctly identified. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, will add the test cases. |
||
public void testMkdirsWithDifferentCaseHDIConfig() throws Exception { | ||
try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) { | ||
assumeBlobServiceType(); | ||
AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs); | ||
String configName = X_MS_METADATA_PREFIX + "Hdi_isfolder"; | ||
// Mock the operation to modify the headers | ||
mockAbfsRestOperation(abfsBlobClient, configName); | ||
|
||
// Create the path and invoke mkdirs method | ||
Path path = new Path("/testPath"); | ||
fs.mkdirs(path); | ||
|
||
// Assert that the response header has the updated value | ||
AbfsHttpOperation op = abfsBlobClient.getPathStatus(path.toUri().getPath(), | ||
true, getTestTracingContext(fs, true), | ||
null).getResult(); | ||
|
||
// Verify the header and directory state | ||
Assertions.assertThat(op.getResponseHeader(configName)) | ||
.describedAs("Header should be set to true") | ||
.isEqualTo(TRUE); | ||
Assertions.assertThat(abfsBlobClient.checkIsDir(op)) | ||
.describedAs("Directory should be marked as true") | ||
.isTrue(); | ||
} | ||
} | ||
|
||
/** | ||
* Test mkdirs with wrong HDI folder configuration, | ||
* verifying the correct header and directory state. | ||
*/ | ||
@Test | ||
public void testMkdirsWithWrongHDIConfig() throws Exception { | ||
try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) { | ||
assumeBlobServiceType(); | ||
AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs); | ||
String configName = X_MS_METADATA_PREFIX + "Hdi_isfolder1"; | ||
|
||
// Mock the operation to modify the headers | ||
mockAbfsRestOperation(abfsBlobClient, configName); | ||
|
||
// Create the path and invoke mkdirs method | ||
Path path = new Path("/testPath"); | ||
fs.mkdirs(path); | ||
|
||
// Assert the header and directory state | ||
AbfsHttpOperation op = abfsBlobClient.getPathStatus(path.toUri().getPath(), | ||
true, getTestTracingContext(fs, true), | ||
null).getResult(); | ||
|
||
// Verify the header and directory state | ||
Assertions.assertThat(op.getResponseHeader(configName)) | ||
.describedAs("Header should be set to TRUE") | ||
.isEqualTo(TRUE); | ||
Assertions.assertThat(abfsBlobClient.checkIsDir(op)) | ||
.describedAs("No Directory config set, should be marked as false") | ||
.isFalse(); | ||
} | ||
} | ||
|
||
/** | ||
* Helper method to mock the AbfsRestOperation and modify the request headers. | ||
* | ||
* @param abfsBlobClient the mocked AbfsBlobClient | ||
* @param newHeader the header to add in place of the old one | ||
*/ | ||
private void mockAbfsRestOperation(AbfsBlobClient abfsBlobClient, String newHeader) { | ||
Mockito.doAnswer(invocation -> { | ||
List<AbfsHttpHeader> requestHeaders = invocation.getArgument(3); | ||
|
||
// Remove the actual HDI config header and add the new one | ||
requestHeaders.removeIf(header -> | ||
HttpHeaderConfigurations.X_MS_META_HDI_ISFOLDER.equals(header.getName())); | ||
requestHeaders.add(new AbfsHttpHeader(newHeader, TRUE)); | ||
|
||
// Call the real method | ||
return invocation.callRealMethod(); | ||
}).when(abfsBlobClient).getAbfsRestOperation(eq(AbfsRestOperationType.PutBlob), | ||
eq(HTTP_METHOD_PUT), any(URL.class), anyList()); | ||
} | ||
|
||
/** | ||
* Helper method to mock the AbfsBlobClient and set up the client handler. | ||
* | ||
* @param fs the AzureBlobFileSystem instance | ||
* @return the mocked AbfsBlobClient | ||
*/ | ||
private AbfsBlobClient mockIngressClientHandler(AzureBlobFileSystem fs) { | ||
AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); | ||
AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); | ||
AbfsBlobClient abfsBlobClient = (AbfsBlobClient) Mockito.spy( | ||
clientHandler.getClient()); | ||
fs.getAbfsStore().setClient(abfsBlobClient); | ||
fs.getAbfsStore().setClientHandler(clientHandler); | ||
Mockito.doReturn(abfsBlobClient).when(clientHandler).getIngressClient(); | ||
return abfsBlobClient; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since getResponseHeader method returns either empty string or true- should we check for resourceType != EMPTY_STRING instead of null?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the case of an empty string, resourceType.equals(TRUE) will return false, so there’s no need for an explicit check for an empty string.
The null case occurs when the header we are looking for does not exist, which is why this check is needed.