From f4b6267465d139bfdaf75e25761672eaf61d8a11 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Tue, 12 Sep 2017 17:35:30 -0700 Subject: [PATCH] HDFS-12222. Document and test BlockLocation for erasure-coded files. Contributed by Huafeng Wang. --- .../apache/hadoop/fs/AbstractFileSystem.java | 5 + .../org/apache/hadoop/fs/BlockLocation.java | 28 +++ .../org/apache/hadoop/fs/FileContext.java | 33 +++- .../java/org/apache/hadoop/fs/FileSystem.java | 32 ++- .../apache/hadoop/fs/LocatedFileStatus.java | 7 + .../main/java/org/apache/hadoop/fs/Hdfs.java | 15 ++ .../org/apache/hadoop/hdfs/DFSClient.java | 5 + .../hadoop/hdfs/DistributedFileSystem.java | 14 ++ .../hdfs/protocol/HdfsLocatedFileStatus.java | 12 +- .../hdfs/TestDistributedFileSystem.java | 1 - .../TestDistributedFileSystemWithECFile.java | 186 ++++++++++++++++++ 11 files changed, 333 insertions(+), 5 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java index df14ee8762..cf484cae75 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -928,6 +928,11 @@ public FileStatus next() { * The specification of this method matches that of * {@link FileContext#listLocatedStatus(Path)} except that Path f * must be for this file system. + * + * In HDFS implementation, the BlockLocation of returned LocatedFileStatus + * will have different formats for replicated and erasure coded file. Please + * refer to {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} + * for more details. */ public RemoteIterator listLocatedStatus(final Path f) throws AccessControlException, FileNotFoundException, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java index 591febf0fd..4dae2334d5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java @@ -28,6 +28,34 @@ * Represents the network location of a block, information about the hosts * that contain block replicas, and other block metadata (E.g. the file * offset associated with the block, length, whether it is corrupt, etc). + * + * For a single BlockLocation, it will have different meanings for replicated + * and erasure coded files. + * + * If the file is 3-replicated, offset and length of a BlockLocation represent + * the absolute value in the file and the hosts are the 3 datanodes that + * holding the replicas. Here is an example: + *
+ * BlockLocation(offset: 0, length: BLOCK_SIZE,
+ *   hosts: {"host1:9866", "host2:9866, host3:9866"})
+ * 
+ * + * And if the file is erasure-coded, each BlockLocation represents a logical + * block groups. Value offset is the offset of a block group in the file and + * value length is the total length of a block group. Hosts of a BlockLocation + * are the datanodes that holding all the data blocks and parity blocks of a + * block group. + * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + * A BlockLocation example will be like: + *
+ * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+ *   "host2:9866","host3:9866","host4:9866","host5:9866"})
+ * 
+ * + * Please refer to + * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + * {@link FileContext#getFileBlockLocations(Path, long, long)} + * for more examples. */ @InterfaceAudience.Public @InterfaceStability.Stable diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java index 21733b3e4c..a3cc550b48 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java @@ -1293,7 +1293,36 @@ public Path next(final AbstractFileSystem fs, final Path p) * * This call is most helpful with DFS, where it returns * hostnames of machines that contain the given file. - * + * + * In HDFS, if file is three-replicated, the returned array contains + * elements like: + *
+   * BlockLocation(offset: 0, length: BLOCK_SIZE,
+   *   hosts: {"host1:9866", "host2:9866, host3:9866"})
+   * BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
+   *   hosts: {"host2:9866", "host3:9866, host4:9866"})
+   * 
+ * + * And if a file is erasure-coded, the returned BlockLocation are logical + * block groups. + * + * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + * 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + * there will be one BlockLocation returned, with 0 offset, actual file size + * and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + * 3. If the file size is less than one group size but greater than one + * stripe size, then there will be one BlockLocation returned, with 0 offset, + * actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + * the actual blocks. + * 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + * for example, then the result will be like: + *
+   * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+   *   "host2:9866","host3:9866","host4:9866","host5:9866"})
+   * BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
+   *   "host4:9866", "host5:9866"})
+   * 
+ * * @param f - get blocklocations of this file * @param start position (byte offset) * @param len (in bytes) @@ -1527,7 +1556,7 @@ public RemoteIterator next(final AbstractFileSystem fs, * Return the file's status and block locations If the path is a file. * * If a returned status is a file, it contains the file's block locations. - * + * * @param f is the path * * @return an iterator that traverses statuses of the files/directories diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index fc7b9b2f50..d43e41d005 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -799,7 +799,36 @@ protected void checkPath(Path path) { * The default implementation returns an array containing one element: *
    * BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
-   * 
> + * + * + * In HDFS, if file is three-replicated, the returned array contains + * elements like: + *
+   * BlockLocation(offset: 0, length: BLOCK_SIZE,
+   *   hosts: {"host1:9866", "host2:9866, host3:9866"})
+   * BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
+   *   hosts: {"host2:9866", "host3:9866, host4:9866"})
+   * 
+ * + * And if a file is erasure-coded, the returned BlockLocation are logical + * block groups. + * + * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + * 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + * there will be one BlockLocation returned, with 0 offset, actual file size + * and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + * 3. If the file size is less than one group size but greater than one + * stripe size, then there will be one BlockLocation returned, with 0 offset, + * actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + * the actual blocks. + * 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + * for example, then the result will be like: + *
+   * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+   *   "host2:9866","host3:9866","host4:9866","host5:9866"})
+   * BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
+   *   "host4:9866", "host5:9866"})
+   * 
* * @param file FilesStatus to get data from * @param start offset into the given file @@ -2115,6 +2144,7 @@ public RemoteIterator listStatusIterator(final Path p) * List the statuses and block locations of the files in the given path. * Does not guarantee to return the iterator that traverses statuses * of the files in a sorted order. + * *
    * If the path is a directory,
    *   if recursive is false, returns files in the directory;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
index dbb751dc44..29e19989ed 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
@@ -123,6 +123,13 @@ public LocatedFileStatus(long length, boolean isdir,
 
   /**
    * Get the file's block locations
+   *
+   * In HDFS, the returned BlockLocation will have different formats for
+   * replicated and erasure coded file.
+   * Please refer to
+   * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+   * for more details.
+   *
    * @return the file's block locations
    */
   public BlockLocation[] getBlockLocations() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
index cd870ca5ae..0138195345 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
@@ -115,6 +115,14 @@ public boolean delete(Path f, boolean recursive)
     return dfs.delete(getUriPath(f), recursive);
   }
 
+  /**
+   * The returned BlockLocation will have different formats for replicated
+   * and erasure coded file.
+   *
+   * Please refer to
+   * {@link FileContext#getFileBlockLocations(Path, long, long)}
+   * for more details.
+   */
   @Override
   public BlockLocation[] getFileBlockLocations(Path p, long start, long len)
       throws IOException, UnresolvedLinkException {
@@ -165,6 +173,13 @@ public FsServerDefaults getServerDefaults(final Path f) throws IOException {
     return dfs.getServerDefaults();
   }
 
+  /**
+   * The BlockLocation of returned LocatedFileStatus will have different
+   * formats for replicated and erasure coded file.
+   * Please refer to
+   * {@link FileContext#getFileBlockLocations(Path, long, long)} for
+   * more details.
+   */
   @Override
   public RemoteIterator listLocatedStatus(
       final Path p)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 9239df39c5..772049d35d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -72,6 +72,7 @@
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.fs.FileEncryptionInfo;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.FsStatus;
@@ -866,6 +867,10 @@ boolean recoverLease(String src) throws IOException {
    * data-placement when performing operations.  For example, the
    * MapReduce system tries to schedule tasks on the same machines
    * as the data-block the task processes.
+   *
+   * Please refer to
+   * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+   * for more details.
    */
   public BlockLocation[] getBlockLocations(String src, long start,
       long length) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index 44caed60d6..f6331cf90d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -240,6 +240,13 @@ public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
     return getFileBlockLocations(file.getPath(), start, len);
   }
 
+  /**
+   * The returned BlockLocation will have different formats for replicated
+   * and erasure coded file.
+   * Please refer to
+   * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+   * for more details.
+   */
   @Override
   public BlockLocation[] getFileBlockLocations(Path p,
       final long start, final long len) throws IOException {
@@ -1040,6 +1047,13 @@ public FileStatus[] next(final FileSystem fs, final Path p)
     }.resolve(this, absF);
   }
 
+  /**
+   * The BlockLocation of returned LocatedFileStatus will have different
+   * formats for replicated and erasure coded file.
+   * Please refer to
+   * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} for
+   * more details.
+   */
   @Override
   protected RemoteIterator listLocatedStatus(final Path p,
       final PathFilter filter)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java
index b82a860cf4..193aae25a2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java
@@ -78,6 +78,17 @@ public LocatedBlocks getBlockLocations() {
     return locations;
   }
 
+  /**
+   * This function is used to transform the underlying HDFS LocatedBlocks to
+   * BlockLocations.
+   *
+   * The returned BlockLocation will have different formats for replicated
+   * and erasure coded file.
+   * Please refer to
+   * {@link org.apache.hadoop.fs.FileSystem#getFileBlockLocations
+   * (FileStatus, long, long)}
+   * for examples.
+   */
   public final LocatedFileStatus makeQualifiedLocated(URI defaultUri,
       Path path) {
     makeQualified(defaultUri, path);
@@ -96,5 +107,4 @@ public int hashCode() {
     // satisfy findbugs
     return super.hashCode();
   }
-
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java
index 8e54e5f833..987992e5a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java
@@ -50,7 +50,6 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.atomic.AtomicReference;
 
-import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java
new file mode 100644
index 0000000000..a6a3a80ba3
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Testing correctness of FileSystem.getFileBlockLocations and
+ * FileSystem.listFiles for erasure coded files.
+ */
+public class TestDistributedFileSystemWithECFile {
+  private final ErasureCodingPolicy ecPolicy =
+      StripedFileTestUtil.getDefaultECPolicy();
+  private final int cellSize = ecPolicy.getCellSize();
+  private final short dataBlocks = (short) ecPolicy.getNumDataUnits();
+  private final short parityBlocks = (short) ecPolicy.getNumParityUnits();
+  private final int numDNs = dataBlocks + parityBlocks;
+  private final int stripesPerBlock = 4;
+  private final int blockSize = stripesPerBlock * cellSize;
+  private final int blockGroupSize = blockSize * dataBlocks;
+
+  private MiniDFSCluster cluster;
+  private FileContext fileContext;
+  private DistributedFileSystem fs;
+  private Configuration conf = new HdfsConfiguration();
+
+  @Before
+  public void setup() throws IOException {
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
+    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY,
+        false);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
+        StripedFileTestUtil.getDefaultECPolicy().getName());
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build();
+    fileContext = FileContext.getFileContext(cluster.getURI(0), conf);
+    fs = cluster.getFileSystem();
+    fs.mkdirs(new Path("/ec"));
+    cluster.getFileSystem().getClient().setErasureCodingPolicy("/ec",
+        StripedFileTestUtil.getDefaultECPolicy().getName());
+  }
+
+  @After
+  public void tearDown() throws IOException {
+    if (cluster != null) {
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  private void createFile(String path, int size) throws Exception {
+    byte[] expected = StripedFileTestUtil.generateBytes(size);
+    Path src = new Path(path);
+    DFSTestUtil.writeFile(fs, src, new String(expected));
+    StripedFileTestUtil.waitBlockGroupsReported(fs, src.toString());
+    StripedFileTestUtil.verifyLength(fs, src, size);
+  }
+
+  @Test(timeout=60000)
+  public void testListECFilesSmallerThanOneCell() throws Exception {
+    createFile("/ec/smallcell", 1);
+    final List retVal = new ArrayList<>();
+    final RemoteIterator iter =
+        cluster.getFileSystem().listFiles(new Path("/ec"), true);
+    while (iter.hasNext()) {
+      retVal.add(iter.next());
+    }
+    assertTrue(retVal.size() == 1);
+    LocatedFileStatus fileStatus = retVal.get(0);
+    assertSmallerThanOneCell(fileStatus.getBlockLocations());
+
+    BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations(
+        fileStatus, 0, fileStatus.getLen());
+    assertSmallerThanOneCell(locations);
+
+    //Test FileContext
+    fileStatus = fileContext.listLocatedStatus(new Path("/ec")).next();
+    assertSmallerThanOneCell(fileStatus.getBlockLocations());
+    locations = fileContext.getFileBlockLocations(new Path("/ec/smallcell"),
+        0, fileStatus.getLen());
+    assertSmallerThanOneCell(locations);
+  }
+
+  private void assertSmallerThanOneCell(BlockLocation[] locations)
+      throws IOException {
+    assertTrue(locations.length == 1);
+    BlockLocation blockLocation = locations[0];
+    assertTrue(blockLocation.getOffset() == 0);
+    assertTrue(blockLocation.getLength() == 1);
+    assertTrue(blockLocation.getHosts().length == 1 + parityBlocks);
+  }
+
+  @Test(timeout=60000)
+  public void testListECFilesSmallerThanOneStripe() throws Exception {
+    int dataBlocksNum = 3;
+    createFile("/ec/smallstripe", cellSize * dataBlocksNum);
+    RemoteIterator iter =
+        cluster.getFileSystem().listFiles(new Path("/ec"), true);
+    LocatedFileStatus fileStatus = iter.next();
+    assertSmallerThanOneStripe(fileStatus.getBlockLocations(), dataBlocksNum);
+
+    BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations(
+        fileStatus, 0, fileStatus.getLen());
+    assertSmallerThanOneStripe(locations, dataBlocksNum);
+
+    //Test FileContext
+    fileStatus = fileContext.listLocatedStatus(new Path("/ec")).next();
+    assertSmallerThanOneStripe(fileStatus.getBlockLocations(), dataBlocksNum);
+    locations = fileContext.getFileBlockLocations(new Path("/ec/smallstripe"),
+        0, fileStatus.getLen());
+    assertSmallerThanOneStripe(locations, dataBlocksNum);
+  }
+
+  private void assertSmallerThanOneStripe(BlockLocation[] locations,
+      int dataBlocksNum) throws IOException {
+    int expectedHostNum = dataBlocksNum + parityBlocks;
+    assertTrue(locations.length == 1);
+    BlockLocation blockLocation = locations[0];
+    assertTrue(blockLocation.getHosts().length == expectedHostNum);
+    assertTrue(blockLocation.getOffset() == 0);
+    assertTrue(blockLocation.getLength() == dataBlocksNum * cellSize);
+  }
+
+  @Test(timeout=60000)
+  public void testListECFilesMoreThanOneBlockGroup() throws Exception {
+    createFile("/ec/group", blockGroupSize + 123);
+    RemoteIterator iter =
+        cluster.getFileSystem().listFiles(new Path("/ec"), true);
+    LocatedFileStatus fileStatus = iter.next();
+    assertMoreThanOneBlockGroup(fileStatus.getBlockLocations(), 123);
+
+    BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations(
+        fileStatus, 0, fileStatus.getLen());
+    assertMoreThanOneBlockGroup(locations, 123);
+
+    //Test FileContext
+    iter = fileContext.listLocatedStatus(new Path("/ec"));
+    fileStatus = iter.next();
+    assertMoreThanOneBlockGroup(fileStatus.getBlockLocations(), 123);
+    locations = fileContext.getFileBlockLocations(new Path("/ec/group"),
+        0, fileStatus.getLen());
+    assertMoreThanOneBlockGroup(locations, 123);
+  }
+
+  private void assertMoreThanOneBlockGroup(BlockLocation[] locations,
+      int lastBlockSize) throws IOException {
+    assertTrue(locations.length == 2);
+    BlockLocation fistBlockGroup = locations[0];
+    assertTrue(fistBlockGroup.getHosts().length == numDNs);
+    assertTrue(fistBlockGroup.getOffset() == 0);
+    assertTrue(fistBlockGroup.getLength() == blockGroupSize);
+    BlockLocation lastBlock = locations[1];
+    assertTrue(lastBlock.getHosts().length == 1 + parityBlocks);
+    assertTrue(lastBlock.getOffset() == blockGroupSize);
+    assertTrue(lastBlock.getLength() == lastBlockSize);
+  }
+}