diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java index 85bb11d94b..3f4f14627a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java @@ -33,8 +33,18 @@ public enum Op implements HttpOpParam.Op { GETHOMEDIRECTORY(false, HttpURLConnection.HTTP_OK), GETDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK, true), - /** GET_BLOCK_LOCATIONS is a private unstable op. */ + /** + * GET_BLOCK_LOCATIONS is a private/stable API op. It returns a + * {@link org.apache.hadoop.hdfs.protocol.LocatedBlocks} + * json object. + */ GET_BLOCK_LOCATIONS(false, HttpURLConnection.HTTP_OK), + /** + * GETFILEBLOCKLOCATIONS is the public op that complies with + * {@link org.apache.hadoop.fs.FileSystem#getFileBlockLocations} + * interface. + */ + GETFILEBLOCKLOCATIONS(false, HttpURLConnection.HTTP_OK), GETACLSTATUS(false, HttpURLConnection.HTTP_OK), GETXATTRS(false, HttpURLConnection.HTTP_OK), GETTRASHROOT(false, HttpURLConnection.HTTP_OK), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index 619d439fba..d73fd45901 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -59,6 +59,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileEncryptionInfo; import org.apache.hadoop.fs.FileStatus; @@ -72,6 +73,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.ClientProtocol; @@ -1075,6 +1077,18 @@ protected Response get( .build(); } } + case GETFILEBLOCKLOCATIONS: + { + final long offsetValue = offset.getValue(); + final Long lengthValue = length.getValue(); + LocatedBlocks locatedBlocks = getRpcClientProtocol() + .getBlockLocations(fullpath, offsetValue, lengthValue != null ? + lengthValue : Long.MAX_VALUE); + BlockLocation[] locations = + DFSUtilClient.locatedBlocks2Locations(locatedBlocks); + final String js = JsonUtil.toJsonString(locations); + return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); + } case GET_BLOCK_LOCATIONS: { final long offsetValue = offset.getValue(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 5c810bf0c2..f96664095a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -572,4 +572,36 @@ private static Object toJsonMap( m.put("dirStatus", toJsonMap(snapshottableDirectoryStatus.getDirStatus())); return m; } + + private static Map toJsonMap( + final BlockLocation blockLocation) throws IOException { + if (blockLocation == null) { + return null; + } + + final Map m = new HashMap<>(); + m.put("length", blockLocation.getLength()); + m.put("offset", blockLocation.getOffset()); + m.put("corrupt", blockLocation.isCorrupt()); + m.put("storageTypes", toJsonArray(blockLocation.getStorageTypes())); + m.put("cachedHosts", blockLocation.getCachedHosts()); + m.put("hosts", blockLocation.getHosts()); + m.put("names", blockLocation.getNames()); + m.put("topologyPaths", blockLocation.getTopologyPaths()); + return m; + } + + public static String toJsonString(BlockLocation[] locations) + throws IOException { + if (locations == null) { + return null; + } + final Map m = new HashMap<>(); + Object[] blockLocations = new Object[locations.length]; + for(int i=0; i:/webhdfs/v1/?op=GETFILEBLOCKLOCATIONS + + The client receives a response with a [`BlockLocations` JSON Object](#Block_Locations_JSON_Schema): + + HTTP/1.1 200 OK + Content-Type: application/json + Transfer-Encoding: chunked + + { + "BlockLocations" : + { + "BlockLocation": + [ + { + "cachedHosts" : [], + "corrupt" : false, + "hosts" : ["host"], + "length" : 134217728, // length of this block + "names" : ["host:ip"], + "offset" : 0, // offset of the block in the file + "storageTypes" : ["DISK"], // enum {RAM_DISK, SSD, DISK, ARCHIVE} + "topologyPaths" : ["/default-rack/hostname:ip"] + }, { + "cachedHosts" : [], + "corrupt" : false, + "hosts" : ["host"], + "length" : 62599364, + "names" : ["host:ip"], + "offset" : 134217728, + "storageTypes" : ["DISK"], + "topologyPaths" : ["/default-rack/hostname:ip"] + }, + ... + ] + } + } + +See also: [`offset`](#Offset), [`length`](#Length), [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileBlockLocations + Extended Attributes(XAttrs) Operations -------------------------------------- @@ -2227,6 +2271,135 @@ var snapshottableDirectoryStatus = } ``` +### BlockLocations JSON Schema + +A `BlockLocations` JSON object represents an array of `BlockLocation` JSON objects. + +```json +{ + "name" : "BlockLocations", + "properties": + { + "BlockLocations": + { + "type" : "object", + "properties": + { + "BlockLocation": + { + "description": "An array of BlockLocation", + "type" : "array", + "items" : blockLocationProperties //See BlockLocation Properties + } + } + } + } +} +``` + +See also [`BlockLocation` Properties](#BlockLocation_Properties), [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations), [BlockLocation](../../api/org/apache/hadoop/fs/BlockLocation.html) + +### BlockLocation JSON Schema + +```json +{ + "name" : "BlockLocation", + "properties": + { + "BlockLocation": blockLocationProperties //See BlockLocation Properties + } +} +``` + +See also [`BlockLocation` Properties](#BlockLocation_Properties), [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations), [BlockLocation](../../api/org/apache/hadoop/fs/BlockLocation.html) + +#### BlockLocation Properties + +JavaScript syntax is used to define `blockLocationProperties` so that it can be referred in both `BlockLocation` and `BlockLocations` JSON schemas. + +```javascript +var blockLocationProperties = +{ + "type" : "object", + "properties": + { + "cachedHosts": + { + "description": "Datanode hostnames with a cached replica", + "type" : "array", + "required" : "true", + "items" : + { + "description": "A datanode hostname", + "type" : "string" + } + }, + "corrupt": + { + "description": "True if the block is corrupted", + "type" : "boolean", + "required" : "true" + }, + "hosts": + { + "description": "Datanode hostnames store the block", + "type" : "array", + "required" : "true", + "items" : + { + "description": "A datanode hostname", + "type" : "string" + } + }, + "length": + { + "description": "Length of the block", + "type" : "integer", + "required" : "true" + }, + "names": + { + "description": "Datanode IP:xferPort for accessing the block", + "type" : "array", + "required" : "true", + "items" : + { + "description": "DatanodeIP:xferPort", + "type" : "string" + } + }, + "offset": + { + "description": "Offset of the block in the file", + "type" : "integer", + "required" : "true" + }, + "storageTypes": + { + "description": "Storage type of each replica", + "type" : "array", + "required" : "true", + "items" : + { + "description": "Storage type", + "enum" : ["RAM_DISK", "SSD", "DISK", "ARCHIVE"] + } + }, + "topologyPaths": + { + "description": "Datanode addresses in network topology", + "type" : "array", + "required" : "true", + "items" : + { + "description": "/rack/host:ip", + "type" : "string" + } + } + } +}; +``` + HTTP Query Parameter Dictionary ------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index d7ed46b442..71948d6421 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -47,6 +47,7 @@ import java.nio.charset.StandardCharsets; import java.security.PrivilegedExceptionAction; import java.util.Arrays; +import java.util.Map; import java.util.Random; import com.google.common.collect.ImmutableList; @@ -118,6 +119,9 @@ import org.junit.Test; import org.mockito.Mockito; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.type.MapType; + import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyInt; import static org.mockito.Mockito.doReturn; @@ -1136,6 +1140,150 @@ public void testWebHdfsGetBlockLocationsWithStorageType() throws Exception{ } } + @Test + public void testWebHdfsGetBlockLocations() throws Exception{ + MiniDFSCluster cluster = null; + final Configuration conf = WebHdfsTestUtil.createConf(); + final int offset = 42; + final int length = 512; + final Path path = new Path("/foo"); + byte[] contents = new byte[1024]; + RANDOM.nextBytes(contents); + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + final WebHdfsFileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, + WebHdfsConstants.WEBHDFS_SCHEME); + try (OutputStream os = fs.create(path)) { + os.write(contents); + } + BlockLocation[] locations = fs.getFileBlockLocations(path, offset, + length); + + // Query webhdfs REST API to get block locations + InetSocketAddress addr = cluster.getNameNode().getHttpAddress(); + + // Case 1 + // URL without length or offset parameters + URL url1 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS"); + + String response1 = getResponse(url1, "GET"); + // Parse BlockLocation array from json output using object mapper + BlockLocation[] locationArray1 = toBlockLocationArray(response1); + + // Verify the result from rest call is same as file system api + verifyEquals(locations, locationArray1); + + // Case 2 + // URL contains length and offset parameters + URL url2 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" + + "&length=" + length + "&offset=" + offset); + + String response2 = getResponse(url2, "GET"); + BlockLocation[] locationArray2 = toBlockLocationArray(response2); + + verifyEquals(locations, locationArray2); + + // Case 3 + // URL contains length parameter but without offset parameters + URL url3 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" + + "&length=" + length); + + String response3 = getResponse(url3, "GET"); + BlockLocation[] locationArray3 = toBlockLocationArray(response3); + + verifyEquals(locations, locationArray3); + + // Case 4 + // URL contains offset parameter but without length parameter + URL url4 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" + + "&offset=" + offset); + + String response4 = getResponse(url4, "GET"); + BlockLocation[] locationArray4 = toBlockLocationArray(response4); + + verifyEquals(locations, locationArray4); + + // Case 5 + // URL specifies offset exceeds the file length + URL url5 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" + + "&offset=1200"); + + String response5 = getResponse(url5, "GET"); + BlockLocation[] locationArray5 = toBlockLocationArray(response5); + + // Expected an empty array of BlockLocation + verifyEquals(new BlockLocation[] {}, locationArray5); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + + private BlockLocation[] toBlockLocationArray(String json) + throws IOException { + ObjectMapper mapper = new ObjectMapper(); + MapType subType = mapper.getTypeFactory().constructMapType( + Map.class, + String.class, + BlockLocation[].class); + MapType rootType = mapper.getTypeFactory().constructMapType( + Map.class, + mapper.constructType(String.class), + mapper.constructType(subType)); + + Map> jsonMap = mapper + .readValue(json, rootType); + Map locationMap = jsonMap + .get("BlockLocations"); + BlockLocation[] locationArray = locationMap.get( + BlockLocation.class.getSimpleName()); + return locationArray; + } + + private void verifyEquals(BlockLocation[] locations1, + BlockLocation[] locations2) throws IOException { + for(int i=0; i