HDFS-2348. Support getContentSummary and getFileChecksum in webhdfs.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1177905 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2011-10-01 04:49:00 +00:00
parent f3b98a26c4
commit dc8464f943
8 changed files with 209 additions and 14 deletions

View File

@ -19,6 +19,9 @@ Trunk (unreleased changes)
HDFS-2340. Support getFileBlockLocations and getDelegationToken in webhdfs.
(szetszwo)
HDFS-2348. Support getContentSummary and getFileChecksum in webhdfs.
(szetszwo)
IMPROVEMENTS
HADOOP-7524 Change RPC to allow multiple protocols including multuple

View File

@ -46,10 +46,12 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSClient.DFSDataInputStream;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.web.JsonUtil;
import org.apache.hadoop.hdfs.web.ParamFilter;
import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
@ -219,13 +221,13 @@ public Response run() throws IOException {
final String fullpath = path.getAbsolutePath();
final DataNode datanode = (DataNode)context.getAttribute("datanode");
final Configuration conf = new Configuration(datanode.getConf());
final InetSocketAddress nnRpcAddr = NameNode.getAddress(conf);
final DFSClient dfsclient = new DFSClient(nnRpcAddr, conf);
switch(op.getValue()) {
case OPEN:
{
final Configuration conf = new Configuration(datanode.getConf());
final InetSocketAddress nnRpcAddr = NameNode.getAddress(conf);
final DFSClient dfsclient = new DFSClient(nnRpcAddr, conf);
final int b = bufferSize.getValue(conf);
final DFSDataInputStream in = new DFSClient.DFSDataInputStream(
dfsclient.open(fullpath, b, true));
@ -244,6 +246,12 @@ public void write(final OutputStream out) throws IOException {
};
return Response.ok(streaming).type(MediaType.APPLICATION_OCTET_STREAM).build();
}
case GETFILECHECKSUM:
{
final MD5MD5CRC32FileChecksum checksum = dfsclient.getFileChecksum(fullpath);
final String js = JsonUtil.toJsonString(checksum);
return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
}
default:
throw new UnsupportedOperationException(op + " is not supported");
}

View File

@ -46,6 +46,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
@ -112,7 +113,9 @@ public static String getRemoteAddress() {
private static DatanodeInfo chooseDatanode(final NameNode namenode,
final String path, final HttpOpParam.Op op, final long openOffset
) throws IOException {
if (op == GetOpParam.Op.OPEN || op == PostOpParam.Op.APPEND) {
if (op == GetOpParam.Op.OPEN
|| op == GetOpParam.Op.GETFILECHECKSUM
|| op == PostOpParam.Op.APPEND) {
final NamenodeProtocols np = namenode.getRpcServer();
final HdfsFileStatus status = np.getFileInfo(path);
final long len = status.getLen();
@ -431,6 +434,18 @@ public Response run() throws IOException, URISyntaxException {
final StreamingOutput streaming = getListingStream(np, fullpath);
return Response.ok(streaming).type(MediaType.APPLICATION_JSON).build();
}
case GETCONTENTSUMMARY:
{
final ContentSummary contentsummary = np.getContentSummary(fullpath);
final String js = JsonUtil.toJsonString(contentsummary);
return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
}
case GETFILECHECKSUM:
{
final URI uri = redirectURI(namenode, ugi, delegation, fullpath,
op.getValue(), -1L);
return Response.temporaryRedirect(uri).build();
}
case GETDELEGATIONTOKEN:
{
final Token<? extends TokenIdentifier> token = generateDelegationToken(

View File

@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.web;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@ -24,6 +26,8 @@
import java.util.Map;
import java.util.TreeMap;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@ -34,6 +38,7 @@
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
@ -398,4 +403,73 @@ public static LocatedBlocks toLocatedBlocks(final Map<String, Object> m
return new LocatedBlocks(fileLength, isUnderConstruction, locatedBlocks,
lastLocatedBlock, isLastBlockComplete);
}
/** Convert a ContentSummary to a Json string. */
public static String toJsonString(final ContentSummary contentsummary
) throws IOException {
if (contentsummary == null) {
return null;
}
final Map<String, Object> m = jsonMap.get();
m.put("length", contentsummary.getLength());
m.put("fileCount", contentsummary.getFileCount());
m.put("directoryCount", contentsummary.getDirectoryCount());
m.put("quota", contentsummary.getQuota());
m.put("spaceConsumed", contentsummary.getSpaceConsumed());
m.put("spaceQuota", contentsummary.getSpaceQuota());
return JSON.toString(m);
}
/** Convert a Json map to a ContentSummary. */
public static ContentSummary toContentSummary(final Map<String, Object> m
) throws IOException {
if (m == null) {
return null;
}
final long length = (Long)m.get("length");
final long fileCount = (Long)m.get("fileCount");
final long directoryCount = (Long)m.get("directoryCount");
final long quota = (Long)m.get("quota");
final long spaceConsumed = (Long)m.get("spaceConsumed");
final long spaceQuota = (Long)m.get("spaceQuota");
return new ContentSummary(length, fileCount, directoryCount,
quota, spaceConsumed, spaceQuota);
}
/** Convert a MD5MD5CRC32FileChecksum to a Json string. */
public static String toJsonString(final MD5MD5CRC32FileChecksum checksum
) throws IOException {
if (checksum == null) {
return null;
}
final Map<String, Object> m = jsonMap.get();
final byte[] bytes = checksum.getBytes();
final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
final int bytesPerCRC = in.readInt();
final long crcPerBlock = in.readLong();
final MD5Hash md5 = MD5Hash.read(in);
m.put("bytesPerCRC", bytesPerCRC);
m.put("crcPerBlock", crcPerBlock);
m.put("md5", "" + md5);
return JSON.toString(m);
}
/** Convert a Json map to a MD5MD5CRC32FileChecksum. */
public static MD5MD5CRC32FileChecksum toMD5MD5CRC32FileChecksum(
final Map<String, Object> m) throws IOException {
if (m == null) {
return null;
}
final int bytesPerCRC = (int)(long)(Long)m.get("bytesPerCRC");
final long crcPerBlock = (Long)m.get("crcPerBlock");
final String md5 = (String)m.get("md5");
return new MD5MD5CRC32FileChecksum(bytesPerCRC, crcPerBlock,
new MD5Hash(md5));
}
}

View File

@ -33,10 +33,12 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.ParentNotDirectoryException;
import org.apache.hadoop.fs.Path;
@ -449,4 +451,23 @@ public BlockLocation[] getFileBlockLocations(final Path p,
new LengthParam(length));
return DFSUtil.locatedBlocks2Locations(JsonUtil.toLocatedBlocks(m));
}
@Override
public ContentSummary getContentSummary(final Path p) throws IOException {
statistics.incrementReadOps(1);
final HttpOpParam.Op op = GetOpParam.Op.GETCONTENTSUMMARY;
final Map<String, Object> m = run(op, p);
return JsonUtil.toContentSummary(m);
}
@Override
public MD5MD5CRC32FileChecksum getFileChecksum(final Path p
) throws IOException {
statistics.incrementReadOps(1);
final HttpOpParam.Op op = GetOpParam.Op.GETFILECHECKSUM;
final Map<String, Object> m = run(op, p);
return JsonUtil.toMD5MD5CRC32FileChecksum(m);
}
}

View File

@ -28,6 +28,8 @@ public static enum Op implements HttpOpParam.Op {
GETFILESTATUS(HttpURLConnection.HTTP_OK),
LISTSTATUS(HttpURLConnection.HTTP_OK),
GETCONTENTSUMMARY(HttpURLConnection.HTTP_OK),
GETFILECHECKSUM(HttpURLConnection.HTTP_OK),
GETDELEGATIONTOKEN(HttpURLConnection.HTTP_OK),

View File

@ -26,6 +26,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.util.Random;
import org.apache.commons.logging.impl.Log4JLogger;
@ -37,6 +38,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.log4j.Level;
import org.junit.Test;
@ -399,15 +401,40 @@ public void testFileChecksum() throws Exception {
RAN.setSeed(seed);
final Configuration conf = getTestConfiguration();
conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
conf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, "localhost");
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
final FileSystem hdfs = cluster.getFileSystem();
final String hftpuri = "hftp://" + conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
System.out.println("hftpuri=" + hftpuri);
final FileSystem hftp = new Path(hftpuri).getFileSystem(conf);
final String dir = "/filechecksum";
final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
final UserGroupInformation current = UserGroupInformation.getCurrentUser();
final UserGroupInformation ugi = UserGroupInformation.createUserForTesting(
current.getShortUserName() + "x", new String[]{"user"});
//hftp
final String hftpuri = "hftp://" + nnAddr;
System.out.println("hftpuri=" + hftpuri);
final FileSystem hftp = ugi.doAs(
new PrivilegedExceptionAction<FileSystem>() {
@Override
public FileSystem run() throws Exception {
return new Path(hftpuri).getFileSystem(conf);
}
});
//webhdfs
final String webhdfsuri = WebHdfsFileSystem.SCHEME + "://" + nnAddr;
System.out.println("webhdfsuri=" + webhdfsuri);
final FileSystem webhdfs = ugi.doAs(
new PrivilegedExceptionAction<FileSystem>() {
@Override
public FileSystem run() throws Exception {
return new Path(webhdfsuri).getFileSystem(conf);
}
});
final Path dir = new Path("/filechecksum");
final int block_size = 1024;
final int buffer_size = conf.getInt("io.file.buffer.size", 4096);
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 512);
@ -431,7 +458,8 @@ public void testFileChecksum() throws Exception {
//compute checksum
final FileChecksum hdfsfoocs = hdfs.getFileChecksum(foo);
System.out.println("hdfsfoocs=" + hdfsfoocs);
//hftp
final FileChecksum hftpfoocs = hftp.getFileChecksum(foo);
System.out.println("hftpfoocs=" + hftpfoocs);
@ -439,6 +467,14 @@ public void testFileChecksum() throws Exception {
final FileChecksum qfoocs = hftp.getFileChecksum(qualified);
System.out.println("qfoocs=" + qfoocs);
//webhdfs
final FileChecksum webhdfsfoocs = webhdfs.getFileChecksum(foo);
System.out.println("webhdfsfoocs=" + webhdfsfoocs);
final Path webhdfsqualified = new Path(webhdfsuri + dir, "foo" + n);
final FileChecksum webhdfs_qfoocs = webhdfs.getFileChecksum(webhdfsqualified);
System.out.println("webhdfs_qfoocs=" + webhdfs_qfoocs);
//write another file
final Path bar = new Path(dir, "bar" + n);
{
@ -454,24 +490,40 @@ public void testFileChecksum() throws Exception {
assertEquals(hdfsfoocs.hashCode(), barhashcode);
assertEquals(hdfsfoocs, barcs);
//hftp
assertEquals(hftpfoocs.hashCode(), barhashcode);
assertEquals(hftpfoocs, barcs);
assertEquals(qfoocs.hashCode(), barhashcode);
assertEquals(qfoocs, barcs);
//webhdfs
assertEquals(webhdfsfoocs.hashCode(), barhashcode);
assertEquals(webhdfsfoocs, barcs);
assertEquals(webhdfs_qfoocs.hashCode(), barhashcode);
assertEquals(webhdfs_qfoocs, barcs);
}
hdfs.setPermission(dir, new FsPermission((short)0));
{ //test permission error on hftp
hdfs.setPermission(new Path(dir), new FsPermission((short)0));
try {
final String username = UserGroupInformation.getCurrentUser().getShortUserName() + "1";
final HftpFileSystem hftp2 = cluster.getHftpFileSystemAs(username, conf, 0, "somegroup");
hftp2.getFileChecksum(qualified);
hftp.getFileChecksum(qualified);
fail();
} catch(IOException ioe) {
FileSystem.LOG.info("GOOD: getting an exception", ioe);
}
}
{ //test permission error on webhdfs
try {
webhdfs.getFileChecksum(webhdfsqualified);
fail();
} catch(IOException ioe) {
FileSystem.LOG.info("GOOD: getting an exception", ioe);
}
}
hdfs.setPermission(dir, new FsPermission((short)0777));
}
cluster.shutdown();
}

View File

@ -34,6 +34,7 @@
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.Test;
@ -770,6 +771,11 @@ public void testSpaceCommands() throws Exception {
}
}
private static void checkContentSummary(final ContentSummary expected,
final ContentSummary computed) {
assertEquals(expected.toString(), computed.toString());
}
/**
* Violate a space quota using files of size < 1 block. Test that block
* allocation conservatively assumes that for quota checking the entire
@ -781,12 +787,18 @@ public void testBlockAllocationAdjustsUsageConservatively()
Configuration conf = new HdfsConfiguration();
final int BLOCK_SIZE = 6 * 1024;
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
DFSAdmin admin = new DFSAdmin(conf);
final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
final String webhdfsuri = WebHdfsFileSystem.SCHEME + "://" + nnAddr;
System.out.println("webhdfsuri=" + webhdfsuri);
final FileSystem webhdfs = new Path(webhdfsuri).getFileSystem(conf);
try {
Path dir = new Path("/test");
Path file1 = new Path("/test/test1");
@ -806,6 +818,7 @@ public void testBlockAllocationAdjustsUsageConservatively()
DFSTestUtil.createFile(fs, file1, FILE_SIZE, (short) 3, 1L);
DFSTestUtil.waitReplication(fs, file1, (short) 3);
c = fs.getContentSummary(dir);
checkContentSummary(c, webhdfs.getContentSummary(dir));
assertEquals("Quota is half consumed", QUOTA_SIZE / 2,
c.getSpaceConsumed());
@ -836,12 +849,18 @@ public void testMultipleFilesSmallerThanOneBlock() throws Exception {
Configuration conf = new HdfsConfiguration();
final int BLOCK_SIZE = 6 * 1024;
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
DFSAdmin admin = new DFSAdmin(conf);
final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
final String webhdfsuri = WebHdfsFileSystem.SCHEME + "://" + nnAddr;
System.out.println("webhdfsuri=" + webhdfsuri);
final FileSystem webhdfs = new Path(webhdfsuri).getFileSystem(conf);
try {
//Test for deafult NameSpace Quota
@ -882,6 +901,7 @@ public void testMultipleFilesSmallerThanOneBlock() throws Exception {
// Should account for all 59 files (almost QUOTA_SIZE)
c = fs.getContentSummary(dir);
checkContentSummary(c, webhdfs.getContentSummary(dir));
assertEquals("Invalid space consumed", 59 * FILE_SIZE * 3,
c.getSpaceConsumed());
assertEquals("Invalid space consumed", QUOTA_SIZE - (59 * FILE_SIZE * 3),