Skip to content

Commit 4d78253

Browse files
committed
HADOOP-19291. RawLocalFileSystem to allow overlapping ranges (#7101)
ChecksumFileSystem creates the chunked ranges based on the checksum chunk size and then calls readVectored on Raw Local which may lead to overlapping ranges in some cases. Contributed by: Mukund Thakur
1 parent 21f1789 commit 4d78253

File tree

5 files changed

+46
-15
lines changed

5 files changed

+46
-15
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@
6868
import org.apache.hadoop.util.Shell;
6969
import org.apache.hadoop.util.StringUtils;
7070

71-
import static org.apache.hadoop.fs.VectoredReadUtils.validateAndSortRanges;
71+
import static org.apache.hadoop.fs.VectoredReadUtils.sortRangeList;
72+
import static org.apache.hadoop.fs.VectoredReadUtils.validateRangeRequest;
7273
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
7374
import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_BYTES;
7475
import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_EXCEPTIONS;
@@ -320,10 +321,10 @@ public void readVectored(List<? extends FileRange> ranges,
320321
IntFunction<ByteBuffer> allocate) throws IOException {
321322

322323
// Validate, but do not pass in a file length as it may change.
323-
List<? extends FileRange> sortedRanges = validateAndSortRanges(ranges,
324-
Optional.empty());
324+
List<? extends FileRange> sortedRanges = sortRangeList(ranges);
325325
// Set up all of the futures, so that we can use them if things fail
326326
for(FileRange range: sortedRanges) {
327+
validateRangeRequest(range);
327328
range.setData(new CompletableFuture<>());
328329
}
329330
try {

hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md

+7-2
Original file line numberDiff line numberDiff line change
@@ -623,8 +623,13 @@ support -and fallback everywhere else.
623623

624624
The restriction "no overlapping ranges" was only initially enforced in
625625
the S3A connector, which would raise `UnsupportedOperationException`.
626-
Adding the range check as a precondition for all implementations guarantees
627-
consistent behavior everywhere.
626+
Adding the range check as a precondition for all implementations (Raw Local
627+
being an exception) guarantees consistent behavior everywhere.
628+
The reason Raw Local doesn't have this precondition is ChecksumFileSystem
629+
creates the chunked ranges based on the checksum chunk size and then calls
630+
readVectored on Raw Local which may lead to overlapping ranges in some cases.
631+
For details see [HADOOP-19291](https://issues.apache.org/jira/browse/HADOOP-19291)
632+
628633
For reliable use with older hadoop releases with the API: sort the list of ranges
629634
and check for overlaps before calling `readVectored()`.
630635

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java

+28-10
Original file line numberDiff line numberDiff line change
@@ -270,23 +270,42 @@ public void testSomeRangesMergedSomeUnmerged() throws Exception {
270270
}
271271

272272
/**
273-
* Vectored IO doesn't support overlapping ranges.
273+
* Most file systems won't support overlapping ranges.
274+
* Currently, only Raw Local supports it.
274275
*/
275276
@Test
276277
public void testOverlappingRanges() throws Exception {
277-
verifyExceptionalVectoredRead(
278-
getSampleOverlappingRanges(),
279-
IllegalArgumentException.class);
278+
if (!isSupported(VECTOR_IO_OVERLAPPING_RANGES)) {
279+
verifyExceptionalVectoredRead(
280+
getSampleOverlappingRanges(),
281+
IllegalArgumentException.class);
282+
} else {
283+
try (FSDataInputStream in = openVectorFile()) {
284+
List<FileRange> fileRanges = getSampleOverlappingRanges();
285+
in.readVectored(fileRanges, allocate);
286+
validateVectoredReadResult(fileRanges, DATASET, 0);
287+
returnBuffersToPoolPostRead(fileRanges, pool);
288+
}
289+
}
280290
}
281291

282292
/**
283293
* Same ranges are special case of overlapping.
284294
*/
285295
@Test
286296
public void testSameRanges() throws Exception {
287-
verifyExceptionalVectoredRead(
288-
getSampleSameRanges(),
289-
IllegalArgumentException.class);
297+
if (!isSupported(VECTOR_IO_OVERLAPPING_RANGES)) {
298+
verifyExceptionalVectoredRead(
299+
getSampleSameRanges(),
300+
IllegalArgumentException.class);
301+
} else {
302+
try (FSDataInputStream in = openVectorFile()) {
303+
List<FileRange> fileRanges = getSampleSameRanges();
304+
in.readVectored(fileRanges, allocate);
305+
validateVectoredReadResult(fileRanges, DATASET, 0);
306+
returnBuffersToPoolPostRead(fileRanges, pool);
307+
}
308+
}
290309
}
291310

292311
/**
@@ -329,10 +348,9 @@ public void testSomeRandomNonOverlappingRanges() throws Exception {
329348
public void testConsecutiveRanges() throws Exception {
330349
List<FileRange> fileRanges = new ArrayList<>();
331350
final int offset = 500;
332-
final int length = 100;
351+
final int length = 2011;
333352
range(fileRanges, offset, length);
334-
range(fileRanges, 600, 200);
335-
range(fileRanges, 800, 100);
353+
range(fileRanges, offset + length, length);
336354
try (FSDataInputStream in = openVectorFile()) {
337355
in.readVectored(fileRanges, allocate);
338356
validateVectoredReadResult(fileRanges, DATASET, 0);

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java

+2
Original file line numberDiff line numberDiff line change
@@ -261,4 +261,6 @@ public interface ContractOptions {
261261
* Does vector read check file length on open rather than in the read call?
262262
*/
263263
String VECTOR_IO_EARLY_EOF_CHECK = "vector-io-early-eof-check";
264+
265+
String VECTOR_IO_OVERLAPPING_RANGES = "vector-io-overlapping-ranges";
264266
}

hadoop-common-project/hadoop-common/src/test/resources/contract/rawlocal.xml

+5
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,9 @@
142142
<value>true</value>
143143
</property>
144144

145+
<property>
146+
<name>fs.contract.vector-io-overlapping-ranges</name>
147+
<value>true</value>
148+
</property>
149+
145150
</configuration>

0 commit comments

Comments
 (0)