From db6d243cf89d25fefbffd4c8721e14d9246b5a16 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Tue, 13 Sep 2016 11:02:36 -0700 Subject: [PATCH] HDFS-10837. Standardize serializiation of WebHDFS DirectoryListing. --- .../hadoop/hdfs/web/JsonUtilClient.java | 42 +++- .../hadoop/hdfs/web/WebHdfsFileSystem.java | 19 +- .../org/apache/hadoop/hdfs/web/JsonUtil.java | 34 ++-- .../hadoop-hdfs/src/site/markdown/WebHDFS.md | 183 ++++++++++++------ 4 files changed, 182 insertions(+), 96 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java index 35f0f9ade7..4d1d4e8815 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java @@ -143,23 +143,36 @@ static HdfsFileStatus toFileStatus(final Map json, storagePolicy, null); } + static HdfsFileStatus[] toHdfsFileStatusArray(final Map json) { + Preconditions.checkNotNull(json); + final Map rootmap = + (Map)json.get(FileStatus.class.getSimpleName() + "es"); + final List array = JsonUtilClient.getList(rootmap, + FileStatus.class.getSimpleName()); + + // convert FileStatus + Preconditions.checkNotNull(array); + final HdfsFileStatus[] statuses = new HdfsFileStatus[array.size()]; + int i = 0; + for (Object object : array) { + final Map m = (Map) object; + statuses[i++] = JsonUtilClient.toFileStatus(m, false); + } + return statuses; + } + static DirectoryListing toDirectoryListing(final Map json) { if (json == null) { return null; } - final List list = JsonUtilClient.getList(json, - "partialListing"); + final Map listing = getMap(json, "DirectoryListing"); + final Map partialListing = getMap(listing, "partialListing"); + HdfsFileStatus[] fileStatuses = toHdfsFileStatusArray(partialListing); - HdfsFileStatus[] partialListing = new HdfsFileStatus[list.size()]; - int i = 0; - for (Object o : list) { - final Map m = (Map) o; - partialListing[i++] = toFileStatus(m, false); - } - int remainingEntries = getInt(json, "remainingEntries", -1); + int remainingEntries = getInt(listing, "remainingEntries", -1); Preconditions.checkState(remainingEntries != -1, "remainingEntries was not set"); - return new DirectoryListing(partialListing, remainingEntries); + return new DirectoryListing(fileStatuses, remainingEntries); } /** Convert a Json map to an ExtendedBlock object. */ @@ -210,6 +223,15 @@ static List getList(Map m, String key) { } } + static Map getMap(Map m, String key) { + Object map = m.get(key); + if (map instanceof Map) { + return (Map) map; + } else { + return null; + } + } + /** Convert a Json map to an DatanodeInfo object. */ static DatanodeInfo toDatanodeInfo(final Map m) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index 5389a02bab..9a9edc8462 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -1491,20 +1491,13 @@ public FileStatus[] listStatus(final Path f) throws IOException { return new FsPathResponseRunner(op, f) { @Override FileStatus[] decodeResponse(Map json) { - final Map rootmap = - (Map)json.get(FileStatus.class.getSimpleName() + "es"); - final List array = JsonUtilClient.getList(rootmap, - FileStatus.class.getSimpleName()); - - //convert FileStatus - assert array != null; - final FileStatus[] statuses = new FileStatus[array.size()]; - int i = 0; - for (Object object : array) { - final Map m = (Map) object; - statuses[i++] = makeQualified(JsonUtilClient.toFileStatus(m, false), - f); + HdfsFileStatus[] hdfsStatuses = + JsonUtilClient.toHdfsFileStatusArray(json); + final FileStatus[] statuses = new FileStatus[hdfsStatuses.length]; + for (int i = 0; i < hdfsStatuses.length; i++) { + statuses[i] = makeQualified(hdfsStatuses[i], f); } + return statuses; } }.run(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 2960f14067..6b6cca62f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -232,32 +232,42 @@ private static Map toJsonMap(final LocatedBlock locatedblock return m; } + private static Map toJson(final DirectoryListing listing) + throws IOException { + final Map m = new TreeMap<>(); + // Serialize FileStatus[] to a FileStatuses map + m.put("partialListing", toJsonMap(listing.getPartialListing())); + // Simple int + m.put("remainingEntries", listing.getRemainingEntries()); + + return m; + } + public static String toJsonString(final DirectoryListing listing) throws IOException { if (listing == null) { return null; } - - final Map m = new TreeMap<>(); - m.put("partialListing", toJsonArray(listing.getPartialListing())); - m.put("remainingEntries", listing.getRemainingEntries()); - return MAPPER.writeValueAsString(m); + return toJsonString(DirectoryListing.class, toJson(listing)); } - private static Object[] toJsonArray(HdfsFileStatus[] statuses) throws + private static Map toJsonMap(HdfsFileStatus[] statuses) throws IOException { if (statuses == null) { return null; } - if (statuses.length == 0) { - return EMPTY_OBJECT_ARRAY; - } - final Object[] a = new Object[statuses.length]; + + final Map fileStatuses = new TreeMap<>(); + final Map fileStatus = new TreeMap<>(); + fileStatuses.put("FileStatuses", fileStatus); + final Object[] array = new Object[statuses.length]; + fileStatus.put("FileStatus", array); for (int i = 0; i < statuses.length; i++) { - a[i] = toJsonMap(statuses[i]); + array[i] = toJsonMap(statuses[i]); } - return a; + + return fileStatuses; } /** Convert a LocatedBlock[] to a Json array. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md index c62fb2b7c7..546f99e96b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md @@ -597,15 +597,15 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu curl -i "http://:/webhdfs/v1/?op=LISTSTATUS_BATCH&startAfter=" - The client receives a response with a batch of [`FileStatuses` JSON object](#FileStatuses_JSON_Schema), as well as iteration information: + The client receives a response with a [`DirectoryListing` JSON object](#DirectoryListing_JSON_Schema), which contains a [`FileStatuses` JSON object](#FileStatuses_JSON_Schema), as well as iteration information: HTTP/1.1 200 OK Cache-Control: no-cache - Expires: Tue, 30 Aug 2016 16:42:16 GMT - Date: Tue, 30 Aug 2016 16:42:16 GMT + Expires: Thu, 08 Sep 2016 03:40:38 GMT + Date: Thu, 08 Sep 2016 03:40:38 GMT Pragma: no-cache - Expires: Tue, 30 Aug 2016 16:42:16 GMT - Date: Tue, 30 Aug 2016 16:42:16 GMT + Expires: Thu, 08 Sep 2016 03:40:38 GMT + Date: Thu, 08 Sep 2016 03:40:38 GMT Pragma: no-cache Content-Type: application/json X-FRAME-OPTIONS: SAMEORIGIN @@ -613,56 +613,61 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu Server: Jetty(6.1.26) { - "partialListing": [ - { - "accessTime": 0, - "blockSize": 0, - "childrenNum": 0, - "fileId": 16389, - "group": "supergroup", - "length": 0, - "modificationTime": 1472575493064, - "owner": "andrew", - "pathSuffix": "anotherdir", - "permission": "755", - "replication": 0, - "storagePolicy": 0, - "type": "DIRECTORY" + "DirectoryListing": { + "partialListing": { + "FileStatuses": { + "FileStatus": [ + { + "accessTime": 0, + "blockSize": 0, + "childrenNum": 0, + "fileId": 16387, + "group": "supergroup", + "length": 0, + "modificationTime": 1473305882563, + "owner": "andrew", + "pathSuffix": "bardir", + "permission": "755", + "replication": 0, + "storagePolicy": 0, + "type": "DIRECTORY" + }, + { + "accessTime": 1473305896945, + "blockSize": 1024, + "childrenNum": 0, + "fileId": 16388, + "group": "supergroup", + "length": 0, + "modificationTime": 1473305896965, + "owner": "andrew", + "pathSuffix": "bazfile", + "permission": "644", + "replication": 3, + "storagePolicy": 0, + "type": "FILE" + } + ] + } }, - { - "accessTime": 0, - "blockSize": 0, - "childrenNum": 0, - "fileId": 16386, - "group": "supergroup", - "length": 0, - "modificationTime": 1472575274776, - "owner": "andrew", - "pathSuffix": "somedir", - "permission": "755", - "replication": 0, - "storagePolicy": 0, - "type": "DIRECTORY" - } - ], - "remainingEntries": 1 + "remainingEntries": 2 + } } - If `remainingEntries` is non-zero, there are additional entries in the directory. To query the next batch, set the `startAfter` parameter to the `pathSuffix` of the last item returned in the current batch. For example: - curl -i "http://:/webhdfs/v1/?op=LISTSTATUS_BATCH&startAfter=somedir" + curl -i "http://:/webhdfs/v1/?op=LISTSTATUS_BATCH&startAfter=bazfile" Which will return the next batch of directory entries: HTTP/1.1 200 OK Cache-Control: no-cache - Expires: Tue, 30 Aug 2016 16:46:23 GMT - Date: Tue, 30 Aug 2016 16:46:23 GMT + Expires: Thu, 08 Sep 2016 03:43:20 GMT + Date: Thu, 08 Sep 2016 03:43:20 GMT Pragma: no-cache - Expires: Tue, 30 Aug 2016 16:46:23 GMT - Date: Tue, 30 Aug 2016 16:46:23 GMT + Expires: Thu, 08 Sep 2016 03:43:20 GMT + Date: Thu, 08 Sep 2016 03:43:20 GMT Pragma: no-cache Content-Type: application/json X-FRAME-OPTIONS: SAMEORIGIN @@ -670,24 +675,45 @@ Which will return the next batch of directory entries: Server: Jetty(6.1.26) { - "partialListing": [ - { - "accessTime": 1472575333568, - "blockSize": 1024, - "childrenNum": 0, - "fileId": 16388, - "group": "supergroup", - "length": 224, - "modificationTime": 1472575334222, - "owner": "andrew", - "pathSuffix": "somefile", - "permission": "644", - "replication": 3, - "storagePolicy": 0, - "type": "FILE" - } - ], - "remainingEntries": 0 + "DirectoryListing": { + "partialListing": { + "FileStatuses": { + "FileStatus": [ + { + "accessTime": 0, + "blockSize": 0, + "childrenNum": 0, + "fileId": 16386, + "group": "supergroup", + "length": 0, + "modificationTime": 1473305878951, + "owner": "andrew", + "pathSuffix": "foodir", + "permission": "755", + "replication": 0, + "storagePolicy": 0, + "type": "DIRECTORY" + }, + { + "accessTime": 1473305902864, + "blockSize": 1024, + "childrenNum": 0, + "fileId": 16389, + "group": "supergroup", + "length": 0, + "modificationTime": 1473305902878, + "owner": "andrew", + "pathSuffix": "quxfile", + "permission": "644", + "replication": 3, + "storagePolicy": 0, + "type": "FILE" + } + ] + } + }, + "remainingEntries": 0 + } } Batch size is controlled by the `dfs.ls.limit` option on the NameNode. @@ -1672,6 +1698,41 @@ A `FileStatuses` JSON object represents an array of `FileStatus` JSON objects. See also: [`FileStatus` Properties](#FileStatus_Properties), [`LISTSTATUS`](#List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html) +### DirectoryListing JSON Schema + +A `DirectoryListing` JSON object represents a batch of directory entries while iteratively listing a directory. It contains a `FileStatuses` JSON object as well as iteration information. + +```json +{ + "name" : "DirectoryListing", + "properties": + { + "DirectoryListing": + { + "type" : "object", + "properties": + { + "partialListing": + { + "description": "A partial directory listing", + "type" : "object", // A FileStatuses object + "required" : true + }, + "remainingEntries": + { + "description": "Number of remaining entries", + "type" : "integer", + "required" : true + } + } + } + } + +} +``` + +See also: [`FileStatuses` JSON Schema](#FileStatuses_JSON_Schema), [`LISTSTATUS_BATCH`](#Iteratively_List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html) + ### Long JSON Schema ```json