From 8fdef0bd9d1ece560ab4e1a1ec7fc77c46a034bb Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Thu, 11 Feb 2016 17:20:10 -0800 Subject: [PATCH] HADOOP-12699. TestKMS#testKMSProvider intermittently fails during 'test rollover draining'. Contributed by Xiao Chen. --- .../hadoop-common/CHANGES.txt | 3 + ...eyGeneratorKeyProviderCryptoExtension.java | 10 +++ .../hadoop-kms/src/site/markdown/index.md.vm | 77 +++++++++++++++++-- .../hadoop/crypto/key/kms/server/TestKMS.java | 21 ++++- 4 files changed, 103 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 0a19800803..fe71564aba 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1117,6 +1117,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12788. OpensslAesCtrCryptoCodec should log which random number generator is used. (Wei-Chiu Chuang via umamahesh) + HADOOP-12699. TestKMS#testKMSProvider intermittently fails during + 'test rollover draining'. (Xiao Chen via wang) + OPTIMIZATIONS HADOOP-11785. Reduce the number of listStatus operation in distcp diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/EagerKeyGeneratorKeyProviderCryptoExtension.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/EagerKeyGeneratorKeyProviderCryptoExtension.java index a33f4f1f74..04cd065e60 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/EagerKeyGeneratorKeyProviderCryptoExtension.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/EagerKeyGeneratorKeyProviderCryptoExtension.java @@ -153,6 +153,16 @@ public EagerKeyGeneratorKeyProviderCryptoExtension(Configuration conf, new CryptoExtension(conf, keyProviderCryptoExtension)); } + /** + * Roll a new version of the given key generating the material for it. + *

+ * Due to the caching on the ValueQueue, even after a rollNewVersion call, + * {@link #generateEncryptedKey(String)} may still return an old key - even + * when we drain the queue here, the async thread may later fill in old keys. + * This is acceptable since old version keys are still able to decrypt, and + * client shall make no assumptions that it will get a new versioned key + * after rollNewVersion. + */ @Override public KeyVersion rollNewVersion(String name) throws NoSuchAlgorithmException, IOException { diff --git a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm index f0958be710..1472ba2a51 100644 --- a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm +++ b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm @@ -59,15 +59,21 @@ NOTE: You need to restart the KMS for the configuration changes to take effect. $H3 KMS Cache -KMS caches keys for short period of time to avoid excessive hits to the underlying key provider. +KMS has two kinds of caching: a CachingKeyProvider for caching the encryption keys, and a KeyProvider for caching the EEKs. -The Cache is enabled by default (can be dissabled by setting the `hadoop.kms.cache.enable` boolean property to false) +$H4 CachingKeyProvider -The cache is used with the following 3 methods only, `getCurrentKey()` and `getKeyVersion()` and `getMetadata()`. +KMS caches encryption keys for a short period of time to avoid excessive hits to the underlying KeyProvider. -For the `getCurrentKey()` method, cached entries are kept for a maximum of 30000 millisecond regardless the number of times the key is being access (to avoid stale keys to be considered current). +This Cache is enabled by default (can be disabled by setting the `hadoop.kms.cache.enable` boolean property to false) -For the `getKeyVersion()` method, cached entries are kept with a default inactivity timeout of 600000 milliseconds (10 mins). This time out is configurable via the following property in the `etc/hadoop/kms-site.xml` configuration file: +This cache is used with the following 3 methods only, `getCurrentKey()` and `getKeyVersion()` and `getMetadata()`. + +For the `getCurrentKey()` method, cached entries are kept for a maximum of 30000 milliseconds regardless the number of times the key is being accessed (to avoid stale keys to be considered current). + +For the `getKeyVersion()` method, cached entries are kept with a default inactivity timeout of 600000 milliseconds (10 mins). + +These configurations can be changed via the following properties in the `etc/hadoop/kms-site.xml` configuration file: ```xml @@ -86,6 +92,67 @@ For the `getKeyVersion()` method, cached entries are kept with a default inactiv ``` +$H4 KeyProvider + +Architecturally, both server-side (e.g. KMS) and client-side (e.g. NameNode) have a cache for EEKs. The following are configurable on the cache: + +* The size of the cache. This is the maximum number of EEKs that can be cached under each key name. +* A low watermark on the cache. For each key name, if after a get call, the number of cached EEKs are less than (size * low watermark), then the cache under this key name will be filled asynchronously. For each key name, only 1 thread could be running for the asynchronous filling. +* The maximum number of asynchronous threads overall, across key names, allowed to fill the queue in a cache. +* The cache expiry time, in milliseconds. Internally Guava cache is used as the cache implementation. The expiry approach is [expireAfterAccess](https://code.google.com/p/guava-libraries/wiki/CachesExplained). + +Note that due to the asynchronous filling mechanism, it is possible that after rollNewVersion(), the caller still gets the old EEKs. In the worst case, the caller may get up to (server-side cache size + client-side cache size) number of old EEKs, or until both caches expire. This behavior is a trade off to avoid locking on the cache, and is acceptable since the old version EEKs can still be used to decrypt. + +Below are the configurations and their default values: + +Server-side can be changed via the following properties in the `etc/hadoop/kms-site.xml` configuration file: + +```xml + + hadoop.security.kms.encrypted.key.cache.size + 500 + + + + hadoop.security.kms.encrypted.key.cache.low.watermark + 0.3 + + + + hadoop.security.kms.encrypted.key.cache.num.fill.threads + 2 + + + + hadoop.security.kms.encrypted.key.cache.expiry + 43200000 + +``` + +Client-side can be changed via the following properties in the `etc/hadoop/core-site.xml` configuration file: + +```xml + + hadoop.security.kms.client.encrypted.key.cache.size + 500 + + + + hadoop.security.kms.client.encrypted.key.cache.low-watermark + 0.3 + + + + hadoop.security.kms.client.encrypted.key.cache.num.refill.threads + 2 + + + + hadoop.security.kms.client.encrypted.key.cache.expiry + 43200000 + +``` + $H3 KMS Aggregated Audit logs Audit logs are aggregated for API accesses to the GET\_KEY\_VERSION, GET\_CURRENT\_KEY, DECRYPT\_EEK, GENERATE\_EEK operations. diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java index 7131b7c9a4..9b75ee11a0 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java @@ -29,6 +29,7 @@ import org.apache.hadoop.crypto.key.KeyProviderDelegationTokenExtension; import org.apache.hadoop.crypto.key.kms.KMSClientProvider; import org.apache.hadoop.crypto.key.kms.LoadBalancingKMSClientProvider; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.minikdc.MiniKdc; @@ -630,10 +631,24 @@ public Void call() throws Exception { EncryptedKeyVersion ekv1 = kpce.generateEncryptedKey("k6"); kpce.rollNewVersion("k6"); - EncryptedKeyVersion ekv2 = kpce.generateEncryptedKey("k6"); - Assert.assertNotEquals(ekv1.getEncryptionKeyVersionName(), - ekv2.getEncryptionKeyVersionName()); + /** + * due to the cache on the server side, client may get old keys. + * @see EagerKeyGeneratorKeyProviderCryptoExtension#rollNewVersion(String) + */ + boolean rollSucceeded = false; + for (int i = 0; i <= EagerKeyGeneratorKeyProviderCryptoExtension + .KMS_KEY_CACHE_SIZE_DEFAULT + CommonConfigurationKeysPublic. + KMS_CLIENT_ENC_KEY_CACHE_SIZE_DEFAULT; ++i) { + EncryptedKeyVersion ekv2 = kpce.generateEncryptedKey("k6"); + if (!(ekv1.getEncryptionKeyVersionName() + .equals(ekv2.getEncryptionKeyVersionName()))) { + rollSucceeded = true; + break; + } + } + Assert.assertTrue("rollover did not generate a new key even after" + + " queue is drained", rollSucceeded); return null; } });