From 1cb9e747eb770f9023ccbbb544feb44be6b2cfb8 Mon Sep 17 00:00:00 2001 From: sumangala-patki <70206833+sumangala-patki@users.noreply.github.com> Date: Thu, 9 Sep 2021 18:34:12 +0530 Subject: [PATCH] HADOOP-17618. ABFS: Partially obfuscate SAS object IDs in Logs (#2845) Contributed by Sumangala Patki (cherry picked from commit 3450522c2f5a4cf9b54dce4c25a71f1b4b98c446) --- .../azurebfs/constants/HttpQueryParams.java | 6 ++ .../AbfsRestOperationException.java | 4 +- .../azurebfs/services/AbfsHttpOperation.java | 70 ++++++-------- .../azurebfs/services/AbfsRestOperation.java | 1 + .../hadoop/fs/azurebfs/utils/UriUtils.java | 96 +++++++++++++++++++ ...ITestAzureBlobFileSystemDelegationSAS.java | 8 +- .../services/TestAbfsHttpOperation.java | 79 +++++++++------ .../fs/azurebfs/utils/TestUriUtils.java | 87 +++++++++++++++++ 8 files changed, 273 insertions(+), 78 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java index 8a4ca90f35..e9bb95cad2 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java @@ -41,5 +41,11 @@ public final class HttpQueryParams { public static final String QUERY_PARAM_UPN = "upn"; public static final String QUERY_PARAM_BLOBTYPE = "blobtype"; + //query params for SAS + public static final String QUERY_PARAM_SAOID = "saoid"; + public static final String QUERY_PARAM_SKOID = "skoid"; + public static final String QUERY_PARAM_SUOID = "suoid"; + public static final String QUERY_PARAM_SIGNATURE = "sig"; + private HttpQueryParams() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java index 61b625657c..6c53762363 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java @@ -87,7 +87,7 @@ private static String formatMessage(final AbfsHttpOperation abfsHttpOperation) { "Operation failed: \"%1$s\", %2$s, HEAD, %3$s", abfsHttpOperation.getStatusDescription(), abfsHttpOperation.getStatusCode(), - abfsHttpOperation.getSignatureMaskedUrl()); + abfsHttpOperation.getMaskedUrl()); } return String.format( @@ -95,7 +95,7 @@ private static String formatMessage(final AbfsHttpOperation abfsHttpOperation) { abfsHttpOperation.getStatusDescription(), abfsHttpOperation.getStatusCode(), abfsHttpOperation.getMethod(), - abfsHttpOperation.getSignatureMaskedUrl(), + abfsHttpOperation.getMaskedUrl(), abfsHttpOperation.getStorageErrorCode(), // Remove break line to ensure the request id and timestamp can be shown in console. abfsHttpOperation.getStorageErrorMessage().replaceAll("\\n", " ")); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 720b99b9f8..f10097ded1 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -21,16 +21,15 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; -import java.net.URLEncoder; import java.util.List; import java.util.UUID; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLSocketFactory; +import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonParser; @@ -51,8 +50,6 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { private static final Logger LOG = LoggerFactory.getLogger(AbfsHttpOperation.class); - public static final String SIGNATURE_QUERY_PARAM_KEY = "sig="; - private static final int CONNECT_TIMEOUT = 30 * 1000; private static final int READ_TIMEOUT = 30 * 1000; @@ -85,6 +82,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { private long connectionTimeMs; private long sendRequestTimeMs; private long recvResponseTimeMs; + private boolean shouldMask = false; public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult( final URL url, @@ -150,6 +148,10 @@ public String getRequestId() { return requestId; } + public void setMaskForSAS() { + shouldMask = true; + } + public int getBytesSent() { return bytesSent; } @@ -194,7 +196,7 @@ public String toString() { sb.append(","); sb.append(method); sb.append(","); - sb.append(getSignatureMaskedUrl()); + sb.append(getMaskedUrl()); return sb.toString(); } @@ -227,11 +229,30 @@ public String getLogString() { .append(" m=") .append(method) .append(" u=") - .append(getSignatureMaskedEncodedUrl()); + .append(getMaskedEncodedUrl()); return sb.toString(); } + public String getMaskedUrl() { + if (!shouldMask) { + return url.toString(); + } + if (maskedUrl != null) { + return maskedUrl; + } + maskedUrl = UriUtils.getMaskedUrl(url); + return maskedUrl; + } + + public String getMaskedEncodedUrl() { + if (maskedEncodedUrl != null) { + return maskedEncodedUrl; + } + maskedEncodedUrl = UriUtils.encodedUrlStr(getMaskedUrl()); + return maskedEncodedUrl; + } + /** * Initializes a new HTTP request and opens the connection. * @@ -521,43 +542,6 @@ private boolean isNullInputStream(InputStream stream) { return stream == null ? true : false; } - public static String getSignatureMaskedUrl(String url) { - int qpStrIdx = url.indexOf('?' + SIGNATURE_QUERY_PARAM_KEY); - if (qpStrIdx == -1) { - qpStrIdx = url.indexOf('&' + SIGNATURE_QUERY_PARAM_KEY); - } - if (qpStrIdx == -1) { - return url; - } - final int sigStartIdx = qpStrIdx + SIGNATURE_QUERY_PARAM_KEY.length() + 1; - final int ampIdx = url.indexOf("&", sigStartIdx); - final int sigEndIndex = (ampIdx != -1) ? ampIdx : url.length(); - String signature = url.substring(sigStartIdx, sigEndIndex); - return url.replace(signature, "XXXX"); - } - - public static String encodedUrlStr(String url) { - try { - return URLEncoder.encode(url, "UTF-8"); - } catch (UnsupportedEncodingException e) { - return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl"; - } - } - - public String getSignatureMaskedUrl() { - if (this.maskedUrl == null) { - this.maskedUrl = getSignatureMaskedUrl(this.url.toString()); - } - return this.maskedUrl; - } - - public String getSignatureMaskedEncodedUrl() { - if (this.maskedEncodedUrl == null) { - this.maskedEncodedUrl = encodedUrlStr(getSignatureMaskedUrl()); - } - return this.maskedEncodedUrl; - } - public static class AbfsHttpOperationWithFixedResult extends AbfsHttpOperation { /** * Creates an instance to represent fixed results. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index 4c24c37a0d..0951f3670c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -242,6 +242,7 @@ private boolean executeHttpOperation(final int retryCount) throws AzureBlobFileS break; case SAS: // do nothing; the SAS token should already be appended to the query string + httpOperation.setMaskForSAS(); //mask sig/oid from url for logs break; case SharedKey: // sign the HTTP request diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java index 1bbc1b39e1..e27d54b443 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java @@ -18,14 +18,42 @@ package org.apache.hadoop.fs.azurebfs.utils; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AND_MARK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SAOID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SIGNATURE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SKOID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SUOID; + /** * Utility class to help with Abfs url transformation to blob urls. */ public final class UriUtils { private static final String ABFS_URI_REGEX = "[^.]+\\.dfs\\.(preprod\\.){0,1}core\\.windows\\.net"; private static final Pattern ABFS_URI_PATTERN = Pattern.compile(ABFS_URI_REGEX); + private static final Set FULL_MASK_PARAM_KEYS = new HashSet<>( + Collections.singleton(QUERY_PARAM_SIGNATURE)); + private static final Set PARTIAL_MASK_PARAM_KEYS = new HashSet<>( + Arrays.asList(QUERY_PARAM_SKOID, QUERY_PARAM_SAOID, QUERY_PARAM_SUOID)); + private static final Character CHAR_MASK = 'X'; + private static final String FULL_MASK = "XXXXX"; + private static final int DEFAULT_QUERY_STRINGBUILDER_CAPACITY = 550; + private static final int PARTIAL_MASK_VISIBLE_LEN = 18; /** * Checks whether a string includes abfs url. @@ -73,6 +101,74 @@ public static String generateUniqueTestPath() { return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId + "/test"; } + public static String maskUrlQueryParameters(List keyValueList, + Set queryParamsForFullMask, + Set queryParamsForPartialMask) { + return maskUrlQueryParameters(keyValueList, queryParamsForFullMask, + queryParamsForPartialMask, DEFAULT_QUERY_STRINGBUILDER_CAPACITY); + } + + /** + * Generic function to mask a set of query parameters partially/fully and + * return the resultant query string + * @param keyValueList List of NameValuePair instances for query keys/values + * @param queryParamsForFullMask values for these params will appear as "XXXX" + * @param queryParamsForPartialMask values will be masked with 'X', except for + * the last PARTIAL_MASK_VISIBLE_LEN characters + * @param queryLen to initialize StringBuilder for the masked query + * @return the masked url query part + */ + public static String maskUrlQueryParameters(List keyValueList, + Set queryParamsForFullMask, + Set queryParamsForPartialMask, int queryLen) { + StringBuilder maskedUrl = new StringBuilder(queryLen); + for (NameValuePair keyValuePair : keyValueList) { + String key = keyValuePair.getName(); + if (key.isEmpty()) { + throw new IllegalArgumentException("Query param key should not be empty"); + } + String value = keyValuePair.getValue(); + maskedUrl.append(key); + maskedUrl.append(EQUAL); + if (value != null && !value.isEmpty()) { //no mask + if (queryParamsForFullMask.contains(key)) { + maskedUrl.append(FULL_MASK); + } else if (queryParamsForPartialMask.contains(key)) { + int valueLen = value.length(); + int maskedLen = valueLen > PARTIAL_MASK_VISIBLE_LEN + ? PARTIAL_MASK_VISIBLE_LEN : valueLen / 2; + maskedUrl.append(value, 0, valueLen - maskedLen); + maskedUrl.append(StringUtils.repeat(CHAR_MASK, maskedLen)); + } else { + maskedUrl.append(value); + } + } + maskedUrl.append(AND_MARK); + } + maskedUrl.deleteCharAt(maskedUrl.length() - 1); + return maskedUrl.toString(); + } + + public static String encodedUrlStr(String url) { + try { + return URLEncoder.encode(url, "UTF-8"); + } catch (UnsupportedEncodingException e) { + return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl"; + } + } + + public static String getMaskedUrl(URL url) { + String queryString = url.getQuery(); + if (queryString == null) { + return url.toString(); + } + List queryKeyValueList = URLEncodedUtils + .parse(queryString, StandardCharsets.UTF_8); + String maskedQueryString = maskUrlQueryParameters(queryKeyValueList, + FULL_MASK_PARAM_KEYS, PARTIAL_MASK_PARAM_KEYS, queryString.length()); + return url.toString().replace(queryString, maskedQueryString); + } + private UriUtils() { } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java index 50ce257b4a..82773809df 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java @@ -400,14 +400,14 @@ public void testSignatureMask() throws Exception { AbfsRestOperation abfsHttpRestOperation = fs.getAbfsClient() .renamePath(src, "/testABC" + "/abc.txt", null); AbfsHttpOperation result = abfsHttpRestOperation.getResult(); - String url = result.getSignatureMaskedUrl(); - String encodedUrl = result.getSignatureMaskedEncodedUrl(); + String url = result.getMaskedUrl(); + String encodedUrl = result.getMaskedEncodedUrl(); Assertions.assertThat(url.substring(url.indexOf("sig="))) .describedAs("Signature query param should be masked") - .startsWith("sig=XXXX"); + .startsWith("sig=XXXXX"); Assertions.assertThat(encodedUrl.substring(encodedUrl.indexOf("sig%3D"))) .describedAs("Signature query param should be masked") - .startsWith("sig%3DXXXX"); + .startsWith("sig%3DXXXXX"); } @Test diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsHttpOperation.java index bb7059a7a5..36914a4e4f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsHttpOperation.java @@ -20,72 +20,93 @@ import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.net.URL; import java.net.URLEncoder; import org.assertj.core.api.Assertions; import org.junit.Test; +import org.apache.hadoop.fs.azurebfs.utils.UriUtils; + public class TestAbfsHttpOperation { @Test public void testMaskingAndEncoding() throws MalformedURLException, UnsupportedEncodingException { testIfMaskAndEncodeSuccessful("Where sig is the only query param", - "http://www.testurl.net?sig=abcd", "http://www.testurl.net?sig=XXXX"); + "http://www.testurl.net?sig=abcd", "http://www.testurl.net?sig=XXXXX"); - testIfMaskAndEncodeSuccessful("Where sig is the first query param", - "http://www.testurl.net?sig=abcd&abc=xyz", - "http://www.testurl.net?sig=XXXX&abc=xyz"); + testIfMaskAndEncodeSuccessful("Where oid is the only query param", + "http://www.testurl.net?saoid=abcdef", + "http://www.testurl.net?saoid=abcXXX"); + + testIfMaskAndEncodeSuccessful("Where sig is the first query param, oid is last", + "http://www.testurl.net?sig=abcd&abc=xyz&saoid=pqrs456", + "http://www.testurl.net?sig=XXXXX&abc=xyz&saoid=pqrsXXX"); testIfMaskAndEncodeSuccessful( - "Where sig is neither first nor last query param", - "http://www.testurl.net?lmn=abc&sig=abcd&abc=xyz", - "http://www.testurl.net?lmn=abc&sig=XXXX&abc=xyz"); + "Where sig/oid are neither first nor last query param", + "http://www.testurl.net?lmn=abc&sig=abcd&suoid=mnop789&abc=xyz", + "http://www.testurl.net?lmn=abc&sig=XXXXX&suoid=mnopXXX&abc=xyz"); - testIfMaskAndEncodeSuccessful("Where sig is the last query param", - "http://www.testurl.net?abc=xyz&sig=abcd", - "http://www.testurl.net?abc=xyz&sig=XXXX"); + testIfMaskAndEncodeSuccessful("Where sig is the last query param, oid is first", + "http://www.testurl.net?skoid=pqrs123&abc=xyz&sig=abcd", + "http://www.testurl.net?skoid=pqrsXXX&abc=xyz&sig=XXXXX"); - testIfMaskAndEncodeSuccessful("Where sig query param is not present", + testIfMaskAndEncodeSuccessful("Where sig/oid query param are not present", "http://www.testurl.net?abc=xyz", "http://www.testurl.net?abc=xyz"); testIfMaskAndEncodeSuccessful( - "Where sig query param is not present but mysig", - "http://www.testurl.net?abc=xyz&mysig=qwerty", - "http://www.testurl.net?abc=xyz&mysig=qwerty"); + "Where sig/oid query param are not present but mysig and myoid", + "http://www.testurl.net?abc=xyz&mysig=qwerty&mysaoid=uvw", + "http://www.testurl.net?abc=xyz&mysig=qwerty&mysaoid=uvw"); testIfMaskAndEncodeSuccessful( - "Where sig query param is not present but sigmy", - "http://www.testurl.net?abc=xyz&sigmy=qwerty", - "http://www.testurl.net?abc=xyz&sigmy=qwerty"); + "Where sig/oid query param is not present but sigmy and oidmy", + "http://www.testurl.net?abc=xyz&sigmy=qwerty&skoidmy=uvw", + "http://www.testurl.net?abc=xyz&sigmy=qwerty&skoidmy=uvw"); testIfMaskAndEncodeSuccessful( - "Where sig query param is not present but a " + "value sig", - "http://www.testurl.net?abc=xyz&mnop=sig", - "http://www.testurl.net?abc=xyz&mnop=sig"); + "Where sig/oid query param is not present but values sig and oid", + "http://www.testurl.net?abc=xyz&mnop=sig&pqr=saoid", + "http://www.testurl.net?abc=xyz&mnop=sig&pqr=saoid"); testIfMaskAndEncodeSuccessful( - "Where sig query param is not present but a " + "value ends with sig", - "http://www.testurl.net?abc=xyz&mnop=abcsig", - "http://www.testurl.net?abc=xyz&mnop=abcsig"); + "Where sig/oid query param is not present but a value ends with sig/oid", + "http://www.testurl.net?abc=xyzsaoid&mnop=abcsig", + "http://www.testurl.net?abc=xyzsaoid&mnop=abcsig"); testIfMaskAndEncodeSuccessful( - "Where sig query param is not present but a " + "value starts with sig", - "http://www.testurl.net?abc=xyz&mnop=sigabc", - "http://www.testurl.net?abc=xyz&mnop=sigabc"); + "Where sig/oid query param is not present but a value starts with sig/oid", + "http://www.testurl.net?abc=saoidxyz&mnop=sigabc", + "http://www.testurl.net?abc=saoidxyz&mnop=sigabc"); + } + + @Test + public void testUrlWithNullValues() + throws MalformedURLException, UnsupportedEncodingException { + testIfMaskAndEncodeSuccessful("Where param to be masked has null value", + "http://www.testurl.net?abc=xyz&saoid=&mnop=abcsig", + "http://www.testurl.net?abc=xyz&saoid=&mnop=abcsig"); + testIfMaskAndEncodeSuccessful("Where visible param has null value", + "http://www.testurl.net?abc=xyz&pqr=&mnop=abcd", + "http://www.testurl.net?abc=xyz&pqr=&mnop=abcd"); + testIfMaskAndEncodeSuccessful("Where last param has null value", + "http://www.testurl.net?abc=xyz&pqr=&mnop=", + "http://www.testurl.net?abc=xyz&pqr=&mnop="); } private void testIfMaskAndEncodeSuccessful(final String scenario, final String url, final String expectedMaskedUrl) - throws UnsupportedEncodingException { + throws UnsupportedEncodingException, MalformedURLException { - Assertions.assertThat(AbfsHttpOperation.getSignatureMaskedUrl(url)) + Assertions.assertThat(UriUtils.getMaskedUrl(new URL(url))) .describedAs(url + " (" + scenario + ") after masking should be: " + expectedMaskedUrl).isEqualTo(expectedMaskedUrl); final String expectedMaskedEncodedUrl = URLEncoder .encode(expectedMaskedUrl, "UTF-8"); - Assertions.assertThat(AbfsHttpOperation.encodedUrlStr(expectedMaskedUrl)) + Assertions.assertThat(UriUtils.encodedUrlStr(expectedMaskedUrl)) .describedAs( url + " (" + scenario + ") after masking and encoding should " + "be: " + expectedMaskedEncodedUrl) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestUriUtils.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestUriUtils.java index 690e56c510..25d3f7caa4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestUriUtils.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestUriUtils.java @@ -18,9 +18,21 @@ package org.apache.hadoop.fs.azurebfs.utils; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + import org.junit.Assert; import org.junit.Test; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; +import org.apache.http.message.BasicNameValuePair; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + /** * Test ABFS UriUtils. */ @@ -45,4 +57,79 @@ public void testExtractRawAccountName() throws Exception { Assert.assertEquals(null, UriUtils.extractAccountNameFromHostName(null)); Assert.assertEquals(null, UriUtils.extractAccountNameFromHostName("abfs.dfs.cores.windows.net")); } + + @Test + // If a config for partial masking is introduced, this test will have to be + // modified for the config-controlled partial mask length + public void testMaskUrlQueryParameters() throws Exception { + Set fullMask = new HashSet<>(Arrays.asList("abc", "bcd")); + Set partialMask = new HashSet<>(Arrays.asList("pqr", "xyz")); + + //Partial and full masking test + List keyValueList = URLEncodedUtils + .parse("abc=123&pqr=45678&def=789&bcd=012&xyz=678", + StandardCharsets.UTF_8); + Assert.assertEquals("Incorrect masking", + "abc=XXXXX&pqr=456XX&def=789&bcd=XXXXX&xyz=67X", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Mask GUIDs + keyValueList = URLEncodedUtils + .parse("abc=123&pqr=256877f2-c094-48c8-83df-ddb5825694fd&def=789", + StandardCharsets.UTF_8); + Assert.assertEquals("Incorrect partial masking for guid", + "abc=XXXXX&pqr=256877f2-c094-48c8XXXXXXXXXXXXXXXXXX&def=789", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //For params entered for both full and partial masks, full mask applies + partialMask.add("abc"); + Assert.assertEquals("Full mask should apply", + "abc=XXXXX&pqr=256877f2-c094-48c8XXXXXXXXXXXXXXXXXX&def=789", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Duplicate key (to be masked) with different values + keyValueList = URLEncodedUtils + .parse("abc=123&pqr=4561234&abc=789", StandardCharsets.UTF_8); + Assert.assertEquals("Duplicate key: Both values should get masked", + "abc=XXXXX&pqr=4561XXX&abc=XXXXX", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Duplicate key (not to be masked) with different values + keyValueList = URLEncodedUtils + .parse("abc=123&def=456&pqrs=789&def=000", StandardCharsets.UTF_8); + Assert.assertEquals("Duplicate key: Values should not get masked", + "abc=XXXXX&def=456&pqrs=789&def=000", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Empty param value + keyValueList = URLEncodedUtils + .parse("abc=123&def=&pqr=789&s=1", StandardCharsets.UTF_8); + Assert.assertEquals("Incorrect url with empty query value", + "abc=XXXXX&def=&pqr=78X&s=1", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Empty param key + keyValueList = URLEncodedUtils + .parse("def=2&pqr=789&s=1", StandardCharsets.UTF_8); + keyValueList.add(new BasicNameValuePair("", "m1")); + List finalKeyValueList = keyValueList; + intercept(IllegalArgumentException.class, () -> UriUtils + .maskUrlQueryParameters(finalKeyValueList, fullMask, partialMask)); + + //Param (not to be masked) with null value + keyValueList = URLEncodedUtils + .parse("abc=123&s=1", StandardCharsets.UTF_8); + keyValueList.add(new BasicNameValuePair("null1", null)); + Assert.assertEquals("Null value, incorrect query construction", + "abc=XXXXX&s=1&null1=", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Param (to be masked) with null value + keyValueList.add(new BasicNameValuePair("null2", null)); + fullMask.add("null2"); + Assert.assertEquals("No mask should be added for null value", + "abc=XXXXX&s=1&null1=&null2=", UriUtils + .maskUrlQueryParameters(keyValueList, fullMask, + partialMask)); //no mask + } }