HDFS-10837. Standardize serializiation of WebHDFS DirectoryListing.

This commit is contained in:
Andrew Wang 2016-09-13 11:02:36 -07:00
parent e3f7f58a5f
commit db6d243cf8
4 changed files with 182 additions and 96 deletions

View File

@ -143,23 +143,36 @@ static HdfsFileStatus toFileStatus(final Map<?, ?> json,
storagePolicy, null); storagePolicy, null);
} }
static HdfsFileStatus[] toHdfsFileStatusArray(final Map<?, ?> json) {
Preconditions.checkNotNull(json);
final Map<?, ?> rootmap =
(Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
final List<?> array = JsonUtilClient.getList(rootmap,
FileStatus.class.getSimpleName());
// convert FileStatus
Preconditions.checkNotNull(array);
final HdfsFileStatus[] statuses = new HdfsFileStatus[array.size()];
int i = 0;
for (Object object : array) {
final Map<?, ?> m = (Map<?, ?>) object;
statuses[i++] = JsonUtilClient.toFileStatus(m, false);
}
return statuses;
}
static DirectoryListing toDirectoryListing(final Map<?, ?> json) { static DirectoryListing toDirectoryListing(final Map<?, ?> json) {
if (json == null) { if (json == null) {
return null; return null;
} }
final List<?> list = JsonUtilClient.getList(json, final Map<?, ?> listing = getMap(json, "DirectoryListing");
"partialListing"); final Map<?, ?> partialListing = getMap(listing, "partialListing");
HdfsFileStatus[] fileStatuses = toHdfsFileStatusArray(partialListing);
HdfsFileStatus[] partialListing = new HdfsFileStatus[list.size()]; int remainingEntries = getInt(listing, "remainingEntries", -1);
int i = 0;
for (Object o : list) {
final Map<?, ?> m = (Map<?, ?>) o;
partialListing[i++] = toFileStatus(m, false);
}
int remainingEntries = getInt(json, "remainingEntries", -1);
Preconditions.checkState(remainingEntries != -1, Preconditions.checkState(remainingEntries != -1,
"remainingEntries was not set"); "remainingEntries was not set");
return new DirectoryListing(partialListing, remainingEntries); return new DirectoryListing(fileStatuses, remainingEntries);
} }
/** Convert a Json map to an ExtendedBlock object. */ /** Convert a Json map to an ExtendedBlock object. */
@ -210,6 +223,15 @@ static List<?> getList(Map<?, ?> m, String key) {
} }
} }
static Map<?, ?> getMap(Map<?, ?> m, String key) {
Object map = m.get(key);
if (map instanceof Map<?, ?>) {
return (Map<?, ?>) map;
} else {
return null;
}
}
/** Convert a Json map to an DatanodeInfo object. */ /** Convert a Json map to an DatanodeInfo object. */
static DatanodeInfo toDatanodeInfo(final Map<?, ?> m) static DatanodeInfo toDatanodeInfo(final Map<?, ?> m)
throws IOException { throws IOException {

View File

@ -1491,20 +1491,13 @@ public FileStatus[] listStatus(final Path f) throws IOException {
return new FsPathResponseRunner<FileStatus[]>(op, f) { return new FsPathResponseRunner<FileStatus[]>(op, f) {
@Override @Override
FileStatus[] decodeResponse(Map<?,?> json) { FileStatus[] decodeResponse(Map<?,?> json) {
final Map<?, ?> rootmap = HdfsFileStatus[] hdfsStatuses =
(Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es"); JsonUtilClient.toHdfsFileStatusArray(json);
final List<?> array = JsonUtilClient.getList(rootmap, final FileStatus[] statuses = new FileStatus[hdfsStatuses.length];
FileStatus.class.getSimpleName()); for (int i = 0; i < hdfsStatuses.length; i++) {
statuses[i] = makeQualified(hdfsStatuses[i], f);
//convert FileStatus
assert array != null;
final FileStatus[] statuses = new FileStatus[array.size()];
int i = 0;
for (Object object : array) {
final Map<?, ?> m = (Map<?, ?>) object;
statuses[i++] = makeQualified(JsonUtilClient.toFileStatus(m, false),
f);
} }
return statuses; return statuses;
} }
}.run(); }.run();

View File

@ -232,32 +232,42 @@ private static Map<String, Object> toJsonMap(final LocatedBlock locatedblock
return m; return m;
} }
private static Map<String, Object> toJson(final DirectoryListing listing)
throws IOException {
final Map<String, Object> m = new TreeMap<>();
// Serialize FileStatus[] to a FileStatuses map
m.put("partialListing", toJsonMap(listing.getPartialListing()));
// Simple int
m.put("remainingEntries", listing.getRemainingEntries());
return m;
}
public static String toJsonString(final DirectoryListing listing) throws public static String toJsonString(final DirectoryListing listing) throws
IOException { IOException {
if (listing == null) { if (listing == null) {
return null; return null;
} }
return toJsonString(DirectoryListing.class, toJson(listing));
final Map<String, Object> m = new TreeMap<>();
m.put("partialListing", toJsonArray(listing.getPartialListing()));
m.put("remainingEntries", listing.getRemainingEntries());
return MAPPER.writeValueAsString(m);
} }
private static Object[] toJsonArray(HdfsFileStatus[] statuses) throws private static Map<String, Object> toJsonMap(HdfsFileStatus[] statuses) throws
IOException { IOException {
if (statuses == null) { if (statuses == null) {
return null; return null;
} }
if (statuses.length == 0) {
return EMPTY_OBJECT_ARRAY; final Map<String, Object> fileStatuses = new TreeMap<>();
} final Map<String, Object> fileStatus = new TreeMap<>();
final Object[] a = new Object[statuses.length]; fileStatuses.put("FileStatuses", fileStatus);
final Object[] array = new Object[statuses.length];
fileStatus.put("FileStatus", array);
for (int i = 0; i < statuses.length; i++) { for (int i = 0; i < statuses.length; i++) {
a[i] = toJsonMap(statuses[i]); array[i] = toJsonMap(statuses[i]);
} }
return a;
return fileStatuses;
} }
/** Convert a LocatedBlock[] to a Json array. */ /** Convert a LocatedBlock[] to a Json array. */

View File

@ -597,15 +597,15 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu
curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=<CHILD>" curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=<CHILD>"
The client receives a response with a batch of [`FileStatuses` JSON object](#FileStatuses_JSON_Schema), as well as iteration information: The client receives a response with a [`DirectoryListing` JSON object](#DirectoryListing_JSON_Schema), which contains a [`FileStatuses` JSON object](#FileStatuses_JSON_Schema), as well as iteration information:
HTTP/1.1 200 OK HTTP/1.1 200 OK
Cache-Control: no-cache Cache-Control: no-cache
Expires: Tue, 30 Aug 2016 16:42:16 GMT Expires: Thu, 08 Sep 2016 03:40:38 GMT
Date: Tue, 30 Aug 2016 16:42:16 GMT Date: Thu, 08 Sep 2016 03:40:38 GMT
Pragma: no-cache Pragma: no-cache
Expires: Tue, 30 Aug 2016 16:42:16 GMT Expires: Thu, 08 Sep 2016 03:40:38 GMT
Date: Tue, 30 Aug 2016 16:42:16 GMT Date: Thu, 08 Sep 2016 03:40:38 GMT
Pragma: no-cache Pragma: no-cache
Content-Type: application/json Content-Type: application/json
X-FRAME-OPTIONS: SAMEORIGIN X-FRAME-OPTIONS: SAMEORIGIN
@ -613,22 +613,72 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu
Server: Jetty(6.1.26) Server: Jetty(6.1.26)
{ {
"partialListing": [ "DirectoryListing": {
"partialListing": {
"FileStatuses": {
"FileStatus": [
{ {
"accessTime": 0, "accessTime": 0,
"blockSize": 0, "blockSize": 0,
"childrenNum": 0, "childrenNum": 0,
"fileId": 16389, "fileId": 16387,
"group": "supergroup", "group": "supergroup",
"length": 0, "length": 0,
"modificationTime": 1472575493064, "modificationTime": 1473305882563,
"owner": "andrew", "owner": "andrew",
"pathSuffix": "anotherdir", "pathSuffix": "bardir",
"permission": "755", "permission": "755",
"replication": 0, "replication": 0,
"storagePolicy": 0, "storagePolicy": 0,
"type": "DIRECTORY" "type": "DIRECTORY"
}, },
{
"accessTime": 1473305896945,
"blockSize": 1024,
"childrenNum": 0,
"fileId": 16388,
"group": "supergroup",
"length": 0,
"modificationTime": 1473305896965,
"owner": "andrew",
"pathSuffix": "bazfile",
"permission": "644",
"replication": 3,
"storagePolicy": 0,
"type": "FILE"
}
]
}
},
"remainingEntries": 2
}
}
If `remainingEntries` is non-zero, there are additional entries in the directory.
To query the next batch, set the `startAfter` parameter to the `pathSuffix` of the last item returned in the current batch. For example:
curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=bazfile"
Which will return the next batch of directory entries:
HTTP/1.1 200 OK
Cache-Control: no-cache
Expires: Thu, 08 Sep 2016 03:43:20 GMT
Date: Thu, 08 Sep 2016 03:43:20 GMT
Pragma: no-cache
Expires: Thu, 08 Sep 2016 03:43:20 GMT
Date: Thu, 08 Sep 2016 03:43:20 GMT
Pragma: no-cache
Content-Type: application/json
X-FRAME-OPTIONS: SAMEORIGIN
Transfer-Encoding: chunked
Server: Jetty(6.1.26)
{
"DirectoryListing": {
"partialListing": {
"FileStatuses": {
"FileStatus": [
{ {
"accessTime": 0, "accessTime": 0,
"blockSize": 0, "blockSize": 0,
@ -636,59 +686,35 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu
"fileId": 16386, "fileId": 16386,
"group": "supergroup", "group": "supergroup",
"length": 0, "length": 0,
"modificationTime": 1472575274776, "modificationTime": 1473305878951,
"owner": "andrew", "owner": "andrew",
"pathSuffix": "somedir", "pathSuffix": "foodir",
"permission": "755", "permission": "755",
"replication": 0, "replication": 0,
"storagePolicy": 0, "storagePolicy": 0,
"type": "DIRECTORY" "type": "DIRECTORY"
} },
],
"remainingEntries": 1
}
If `remainingEntries` is non-zero, there are additional entries in the directory.
To query the next batch, set the `startAfter` parameter to the `pathSuffix` of the last item returned in the current batch. For example:
curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=somedir"
Which will return the next batch of directory entries:
HTTP/1.1 200 OK
Cache-Control: no-cache
Expires: Tue, 30 Aug 2016 16:46:23 GMT
Date: Tue, 30 Aug 2016 16:46:23 GMT
Pragma: no-cache
Expires: Tue, 30 Aug 2016 16:46:23 GMT
Date: Tue, 30 Aug 2016 16:46:23 GMT
Pragma: no-cache
Content-Type: application/json
X-FRAME-OPTIONS: SAMEORIGIN
Transfer-Encoding: chunked
Server: Jetty(6.1.26)
{ {
"partialListing": [ "accessTime": 1473305902864,
{
"accessTime": 1472575333568,
"blockSize": 1024, "blockSize": 1024,
"childrenNum": 0, "childrenNum": 0,
"fileId": 16388, "fileId": 16389,
"group": "supergroup", "group": "supergroup",
"length": 224, "length": 0,
"modificationTime": 1472575334222, "modificationTime": 1473305902878,
"owner": "andrew", "owner": "andrew",
"pathSuffix": "somefile", "pathSuffix": "quxfile",
"permission": "644", "permission": "644",
"replication": 3, "replication": 3,
"storagePolicy": 0, "storagePolicy": 0,
"type": "FILE" "type": "FILE"
} }
], ]
}
},
"remainingEntries": 0 "remainingEntries": 0
} }
}
Batch size is controlled by the `dfs.ls.limit` option on the NameNode. Batch size is controlled by the `dfs.ls.limit` option on the NameNode.
@ -1672,6 +1698,41 @@ A `FileStatuses` JSON object represents an array of `FileStatus` JSON objects.
See also: [`FileStatus` Properties](#FileStatus_Properties), [`LISTSTATUS`](#List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html) See also: [`FileStatus` Properties](#FileStatus_Properties), [`LISTSTATUS`](#List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html)
### DirectoryListing JSON Schema
A `DirectoryListing` JSON object represents a batch of directory entries while iteratively listing a directory. It contains a `FileStatuses` JSON object as well as iteration information.
```json
{
"name" : "DirectoryListing",
"properties":
{
"DirectoryListing":
{
"type" : "object",
"properties":
{
"partialListing":
{
"description": "A partial directory listing",
"type" : "object", // A FileStatuses object
"required" : true
},
"remainingEntries":
{
"description": "Number of remaining entries",
"type" : "integer",
"required" : true
}
}
}
}
}
```
See also: [`FileStatuses` JSON Schema](#FileStatuses_JSON_Schema), [`LISTSTATUS_BATCH`](#Iteratively_List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html)
### Long JSON Schema ### Long JSON Schema
```json ```json