diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/EtagChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/EtagChecksum.java new file mode 100644 index 0000000000..cc29f1b68a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/EtagChecksum.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.hadoop.fs.FileChecksum; + +/** + * An etag as a checksum. + * Consider these suitable for checking if an object has changed, but + * not suitable for comparing two different objects for equivalence, + * especially between object stores. + */ +public class EtagChecksum extends FileChecksum { + + /** The algorithm name: {@value}. */ + private static final String ETAG = "etag"; + + /** + * Etag string. + */ + private String eTag = ""; + + /** + * Create with an empty etag. + */ + public EtagChecksum() { + } + + /** + * Create with a string etag. + * @param eTag etag + */ + public EtagChecksum(String eTag) { + this.eTag = eTag; + } + + @Override + public String getAlgorithmName() { + return ETAG; + } + + @Override + public int getLength() { + return eTag.getBytes(StandardCharsets.UTF_8).length; + } + + @Override + public byte[] getBytes() { + return eTag != null + ? eTag.getBytes(StandardCharsets.UTF_8) + : new byte[0]; + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeUTF(eTag != null ? eTag : ""); + } + + @Override + public void readFields(DataInput in) throws IOException { + eTag = in.readUTF(); + } + + @Override + public String toString() { + return "etag: \"" + eTag + '"'; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/package-info.java new file mode 100644 index 0000000000..ebe1db479b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/package-info.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package is for classes to be shared across object stores; for internal + * use within the hadoop-* modules only. No stability guarantees. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.store; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestEtagChecksum.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestEtagChecksum.java new file mode 100644 index 0000000000..ef9613f5af --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestEtagChecksum.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store; + +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; + +/** + * Unit test of etag operations. + */ +public class TestEtagChecksum extends Assert { + + private final EtagChecksum empty1 = tag(""); + private final EtagChecksum empty2 = tag(""); + private final EtagChecksum valid1 = tag("valid"); + private final EtagChecksum valid2 = tag("valid"); + + @Test + public void testEmptyTagsEqual() { + assertEquals(empty1, empty2); + } + + @Test + public void testEmptyTagRoundTrip() throws Throwable { + assertEquals(empty1, roundTrip(empty1)); + } + + @Test + public void testValidTagsEqual() { + assertEquals(valid1, valid2); + } + + @Test + public void testValidTagRoundTrip() throws Throwable { + assertEquals(valid1, roundTrip(valid1)); + } + + @Test + public void testValidAndEmptyTagsDontMatch() { + assertNotEquals(valid1, empty1); + assertNotEquals(valid1, tag("other valid one")); + } + + @Test + public void testDifferentTagsDontMatch() { + assertNotEquals(valid1, tag("other valid one")); + } + + private EtagChecksum tag(String t) { + return new EtagChecksum(t); + } + + private EtagChecksum roundTrip(EtagChecksum tag) throws IOException { + try (DataOutputBuffer dob = new DataOutputBuffer(); + DataInputBuffer dib = new DataInputBuffer()) { + tag.write(dob); + dib.reset(dob.getData(), dob.getLength()); + EtagChecksum t2 = new EtagChecksum(); + t2.readFields(dib); + return t2; + } + } + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index f461c9e1ee..a8147ed1c9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -112,6 +112,7 @@ import org.apache.hadoop.fs.s3a.s3guard.S3Guard; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.fs.store.EtagChecksum; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.BlockingThreadPoolExecutorService; import org.apache.hadoop.util.Progressable; @@ -538,6 +539,14 @@ public S3AInputPolicy getInputPolicy() { return inputPolicy; } + /** + * Get the encryption algorithm of this endpoint. + * @return the encryption algorithm. + */ + public S3AEncryptionMethods getServerSideEncryptionAlgorithm() { + return serverSideEncryptionAlgorithm; + } + /** * Demand create the directory allocator, then create a temporary file. * {@link LocalDirAllocator#createTmpFileForWrite(String, long, Configuration)}. @@ -1069,6 +1078,7 @@ private boolean innerRename(Path source, Path dest) * @throws IOException IO and object access problems. */ @VisibleForTesting + @Retries.RetryRaw public ObjectMetadata getObjectMetadata(Path path) throws IOException { return getObjectMetadata(pathToKey(path)); } @@ -2934,6 +2944,36 @@ public boolean isFile(Path f) throws IOException { return super.isFile(f); } + /** + * Get the etag of a object at the path via HEAD request and return it + * as a checksum object. This has the whatever guarantees about equivalence + * the S3 implementation offers. + *