HDFS-5933. Optimize the FSImage layout for ACLs. Contributed by Haohui Mai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-4685@1567785 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris Nauroth 2014-02-12 22:56:32 +00:00
parent fc14360b03
commit ea0b21af15
7 changed files with 79 additions and 110 deletions

View File

@ -78,6 +78,8 @@ HDFS-4685 (Unreleased)
HDFS-5923. Do not persist the ACL bit in the FsPermission.
(Haohui Mai via cnauroth)
HDFS-5933. Optimize the FSImage layout for ACLs (Haohui Mai via cnauroth)
OPTIMIZATIONS
BUG FIXES

View File

@ -93,7 +93,6 @@
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclEditLogProto;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclFeatureProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.util.XMLUtils;
@ -468,14 +467,7 @@ public void writeFields(DataOutputStream out) throws IOException {
permissions.write(out);
if (this.opCode == OP_ADD) {
boolean hasAcl = aclEntries != null;
out.writeBoolean(hasAcl);
if (hasAcl) {
AclFeatureProto.newBuilder()
.addAllEntries(PBHelper.convertAclEntryProto(aclEntries)).build()
.writeDelimitedTo(out);
}
AclEditLogUtil.write(aclEntries, out);
FSImageSerialization.writeString(clientName,out);
FSImageSerialization.writeString(clientMachine,out);
// write clientId and callId
@ -535,7 +527,6 @@ void readFields(DataInputStream in, int logVersion)
// clientname, clientMachine and block locations of last block.
if (this.opCode == OP_ADD) {
aclEntries = AclEditLogUtil.read(in, logVersion);
this.clientName = FSImageSerialization.readString(in);
this.clientMachine = FSImageSerialization.readString(in);
// read clientId and callId
@ -1364,14 +1355,7 @@ void writeFields(DataOutputStream out) throws IOException {
FSImageSerialization.writeLong(timestamp, out); // mtime
FSImageSerialization.writeLong(timestamp, out); // atime, unused at this
permissions.write(out);
boolean hasAcl = aclEntries != null;
out.writeBoolean(hasAcl);
if (hasAcl) {
AclFeatureProto.newBuilder()
.addAllEntries(PBHelper.convertAclEntryProto(aclEntries)).build()
.writeDelimitedTo(out);
}
AclEditLogUtil.write(aclEntries, out);
}
@Override

View File

@ -31,10 +31,12 @@
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclEntryScope;
import org.apache.hadoop.fs.permission.AclEntryType;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclEntryProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
@ -46,6 +48,7 @@
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.AclFeatureProto;
import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference;
import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName;
@ -61,6 +64,20 @@ public final class FSImageFormatPBINode {
private final static long USER_GROUP_STRID_MASK = (1 << 24) - 1;
private final static int USER_STRID_OFFSET = 40;
private final static int GROUP_STRID_OFFSET = 16;
private static final int ACL_ENTRY_NAME_MASK = (1 << 24) - 1;
private static final int ACL_ENTRY_NAME_OFFSET = 6;
private static final int ACL_ENTRY_TYPE_OFFSET = 3;
private static final int ACL_ENTRY_SCOPE_OFFSET = 5;
private static final int ACL_ENTRY_PERM_MASK = 7;
private static final int ACL_ENTRY_TYPE_MASK = 3;
private static final int ACL_ENTRY_SCOPE_MASK = 1;
private static final FsAction[] FSACTION_VALUES = FsAction.values();
private static final AclEntryScope[] ACL_ENTRY_SCOPE_VALUES = AclEntryScope
.values();
private static final AclEntryType[] ACL_ENTRY_TYPE_VALUES = AclEntryType
.values();
private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
public final static class Loader {
@ -73,9 +90,21 @@ public static PermissionStatus loadPermission(long id,
new FsPermission(perm));
}
public static ImmutableList<AclEntry> loadAclEntries(int id,
final ImmutableList<AclEntry>[] aclTable) {
return aclTable[id];
public static ImmutableList<AclEntry> loadAclEntries(
AclFeatureProto proto, final String[] stringTable) {
ImmutableList.Builder<AclEntry> b = ImmutableList.builder();
for (int v : proto.getEntriesList()) {
int p = v & ACL_ENTRY_PERM_MASK;
int t = (v >> ACL_ENTRY_TYPE_OFFSET) & ACL_ENTRY_TYPE_MASK;
int s = (v >> ACL_ENTRY_SCOPE_OFFSET) & ACL_ENTRY_SCOPE_MASK;
int nid = (v >> ACL_ENTRY_NAME_OFFSET) & ACL_ENTRY_NAME_MASK;
String name = stringTable[nid];
b.add(new AclEntry.Builder().setName(name)
.setPermission(FSACTION_VALUES[p])
.setScope(ACL_ENTRY_SCOPE_VALUES[s])
.setType(ACL_ENTRY_TYPE_VALUES[t]).build());
}
return b.build();
}
public static INodeReference loadINodeReference(
@ -112,9 +141,9 @@ public static INodeDirectory loadINodeDirectory(INodeSection.INode n,
dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota);
}
if (d.hasAclId()) {
dir.addAclFeature(new AclFeature(loadAclEntries(d.getAclId(),
state.getExtendedAclTable())));
if (d.hasAcl()) {
dir.addAclFeature(new AclFeature(loadAclEntries(d.getAcl(),
state.getStringTable())));
}
return dir;
}
@ -249,9 +278,9 @@ private INodeFile loadINodeFile(INodeSection.INode n) {
n.getName().toByteArray(), permissions, f.getModificationTime(),
f.getAccessTime(), blocks, replication, f.getPreferredBlockSize());
if (f.hasAclId()) {
file.addAclFeature(new AclFeature(loadAclEntries(f.getAclId(),
state.getExtendedAclTable())));
if (f.hasAcl()) {
file.addAclFeature(new AclFeature(loadAclEntries(f.getAcl(),
state.getStringTable())));
}
// under-construction information
@ -305,14 +334,17 @@ private static long buildPermissionStatus(INodeAttributes n,
| n.getFsPermissionShort();
}
/**
* Get a unique id for the AclEntry list. Notice that the code does not
* deduplicate the list of aclentry yet.
*/
private static int buildAclEntries(AclFeature f,
final SaverContext.DeduplicationMap<ImmutableList<AclEntryProto>> map) {
return map.getId(ImmutableList.copyOf(PBHelper.convertAclEntryProto(f
.getEntries())));
private static AclFeatureProto.Builder buildAclEntries(AclFeature f,
final SaverContext.DeduplicationMap<String> map) {
AclFeatureProto.Builder b = AclFeatureProto.newBuilder();
for (AclEntry e : f.getEntries()) {
int v = ((map.getId(e.getName()) & ACL_ENTRY_NAME_MASK) << ACL_ENTRY_NAME_OFFSET)
| (e.getType().ordinal() << ACL_ENTRY_TYPE_OFFSET)
| (e.getScope().ordinal() << ACL_ENTRY_SCOPE_OFFSET)
| (e.getPermission().ordinal());
b.addEntries(v);
}
return b;
}
public static INodeSection.INodeFile.Builder buildINodeFile(
@ -326,7 +358,7 @@ public static INodeSection.INodeFile.Builder buildINodeFile(
AclFeature f = file.getAclFeature();
if (f != null) {
b.setAclId(buildAclEntries(f, state.getExtendedAclMap()));
b.setAcl(buildAclEntries(f, state.getStringMap()));
}
return b;
}
@ -342,7 +374,7 @@ public static INodeSection.INodeDirectory.Builder buildINodeDirectory(
AclFeature f = dir.getAclFeature();
if (f != null) {
b.setAclId(buildAclEntries(f, state.getExtendedAclMap()));
b.setAcl(buildAclEntries(f, state.getStringMap()));
}
return b;
}

View File

@ -42,16 +42,11 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclEntryProto;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclFeatureProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.ExtendedAclSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
@ -66,7 +61,6 @@
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressorStream;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.LimitInputStream;
@ -81,15 +75,10 @@ public final class FSImageFormatProtobuf {
public static final class LoaderContext {
private String[] stringTable;
private ImmutableList<AclEntry>[] extendedAclTable;
public String[] getStringTable() {
return stringTable;
}
public ImmutableList<AclEntry>[] getExtendedAclTable() {
return extendedAclTable;
}
}
public static final class SaverContext {
@ -125,16 +114,10 @@ Set<Entry<E, Integer>> entrySet() {
private final DeduplicationMap<String> stringMap = DeduplicationMap
.newMap();
private final DeduplicationMap<ImmutableList<AclEntryProto>> extendedAclMap = DeduplicationMap
.newMap();
public DeduplicationMap<String> getStringMap() {
return stringMap;
}
public DeduplicationMap<ImmutableList<AclEntryProto>> getExtendedAclMap() {
return extendedAclMap;
}
}
public static final class Loader implements FSImageFormat.AbstractLoader {
@ -239,9 +222,6 @@ public int compare(FileSummary.Section s1, FileSummary.Section s2) {
case STRING_TABLE:
loadStringTableSection(in);
break;
case EXTENDED_ACL:
loadExtendedAclSection(in);
break;
case INODE: {
currentStep = new Step(StepType.INODES);
prog.beginStep(Phase.LOADING_FSIMAGE, currentStep);
@ -301,18 +281,6 @@ private void loadStringTableSection(InputStream in) throws IOException {
}
}
@SuppressWarnings("unchecked")
private void loadExtendedAclSection(InputStream in) throws IOException {
ExtendedAclSection s = ExtendedAclSection.parseDelimitedFrom(in);
ctx.extendedAclTable = new ImmutableList[s.getNumEntry() + 1];
for (int i = 0; i < s.getNumEntry(); ++i) {
ExtendedAclSection.Entry e = ExtendedAclSection.Entry
.parseDelimitedFrom(in);
ctx.extendedAclTable[e.getId()] = ImmutableList.copyOf(PBHelper
.convertAclEntry(e.getAcl().getEntriesList()));
}
}
private void loadSecretManagerSection(InputStream in) throws IOException {
SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(in);
int numKeys = s.getNumKeys(), numTokens = s.getNumTokens();
@ -481,7 +449,6 @@ private void saveInternal(FileOutputStream fout,
saveCacheManagerSection(b);
prog.endStep(Phase.SAVING_CHECKPOINT, step);
saveExtendedAclSection(b);
saveStringTableSection(b);
// We use the underlyingOutputStream to write the header. Therefore flush
@ -560,22 +527,6 @@ private void saveStringTableSection(FileSummary.Builder summary)
}
commitSection(summary, SectionName.STRING_TABLE);
}
private void saveExtendedAclSection(FileSummary.Builder summary)
throws IOException {
OutputStream out = sectionOutputStream;
ExtendedAclSection.Builder b = ExtendedAclSection.newBuilder()
.setNumEntry(saverContext.extendedAclMap.size());
b.build().writeDelimitedTo(out);
for (Entry<ImmutableList<AclEntryProto>, Integer> e : saverContext.extendedAclMap
.entrySet()) {
ExtendedAclSection.Entry.Builder eb = ExtendedAclSection.Entry
.newBuilder().setId(e.getValue())
.setAcl(AclFeatureProto.newBuilder().addAllEntries(e.getKey()));
eb.build().writeDelimitedTo(out);
}
commitSection(summary, SectionName.EXTENDED_ACL);
}
}
/**

View File

@ -169,9 +169,9 @@ private void loadFileDiffList(InputStream in, INodeFile file, int size)
fileInPb.getPermission(), state.getStringTable());
AclFeature acl = null;
if (fileInPb.hasAclId()) {
if (fileInPb.hasAcl()) {
acl = new AclFeature(FSImageFormatPBINode.Loader.loadAclEntries(
fileInPb.getAclId(), state.getExtendedAclTable()));
fileInPb.getAcl(), state.getStringTable()));
}
copy = new INodeFileAttributes.SnapshotCopy(pbf.getName()
@ -265,9 +265,9 @@ private void loadDirectoryDiffList(InputStream in, INodeDirectory dir,
PermissionStatus permission = loadPermission(
dirCopyInPb.getPermission(), state.getStringTable());
AclFeature acl = null;
if (dirCopyInPb.hasAclId()) {
if (dirCopyInPb.hasAcl()) {
acl = new AclFeature(FSImageFormatPBINode.Loader.loadAclEntries(
dirCopyInPb.getAclId(), state.getExtendedAclTable()));
dirCopyInPb.getAcl(), state.getStringTable()));
}
long modTime = dirCopyInPb.getModificationTime();

View File

@ -60,10 +60,6 @@ message AclStatusProto {
repeated AclEntryProto entries = 4;
}
message AclFeatureProto {
repeated AclEntryProto entries = 1;
}
message AclEditLogProto {
required string src = 1;
repeated AclEntryProto entries = 2;

View File

@ -89,6 +89,23 @@ message INodeSection {
optional string clientMachine = 2;
}
message AclFeatureProto {
/**
* An ACL entry is represented by a 32-bit integer in Big Endian
* format. The bits can be divided in four segments:
* [0:2) || [2:26) || [26:27) || [27:29) || [29:32)
*
* [0:2) -- reserved for futute uses.
* [2:26) -- the name of the entry, which is an ID that points to a
* string in the StringTableSection.
* [26:27) -- the scope of the entry (AclEntryScopeProto)
* [27:29) -- the type of the entry (AclEntryTypeProto)
* [29:32) -- the permission of the entry (FsActionProto)
*
*/
repeated fixed32 entries = 2 [packed = true];
}
message INodeFile {
optional uint32 replication = 1;
optional uint64 modificationTime = 2;
@ -97,7 +114,7 @@ message INodeSection {
optional fixed64 permission = 5;
repeated BlockProto blocks = 6;
optional FileUnderConstructionFeature fileUC = 7;
optional uint32 aclId = 8;
optional AclFeatureProto acl = 8;
}
message INodeDirectory {
@ -107,7 +124,7 @@ message INodeSection {
// diskspace quota
optional uint64 dsQuota = 3;
optional fixed64 permission = 4;
optional uint32 aclId = 5;
optional AclFeatureProto acl = 5;
}
message INodeSymlink {
@ -279,17 +296,4 @@ message CacheManagerSection {
required uint32 numDirectives = 3;
// repeated CachePoolInfoProto pools
// repeated CacheDirectiveInfoProto directives
}
/**
* This section maps string to id
* NAME: EXTENDED_ACL
*/
message ExtendedAclSection {
message Entry {
required uint32 id = 1;
optional AclFeatureProto acl = 2;
}
optional uint32 numEntry = 1;
// repeated Entry entries
}