HADOOP-17618. ABFS: Partially obfuscate SAS object IDs in Logs (#2845)

Contributed by Sumangala Patki

(cherry picked from commit 3450522c2f)
This commit is contained in:
sumangala-patki 2021-09-09 18:34:12 +05:30 committed by GitHub
parent 59a955dfa0
commit 1cb9e747eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 273 additions and 78 deletions

View File

@ -41,5 +41,11 @@ public final class HttpQueryParams {
public static final String QUERY_PARAM_UPN = "upn";
public static final String QUERY_PARAM_BLOBTYPE = "blobtype";
//query params for SAS
public static final String QUERY_PARAM_SAOID = "saoid";
public static final String QUERY_PARAM_SKOID = "skoid";
public static final String QUERY_PARAM_SUOID = "suoid";
public static final String QUERY_PARAM_SIGNATURE = "sig";
private HttpQueryParams() {}
}

View File

@ -87,7 +87,7 @@ private static String formatMessage(final AbfsHttpOperation abfsHttpOperation) {
"Operation failed: \"%1$s\", %2$s, HEAD, %3$s",
abfsHttpOperation.getStatusDescription(),
abfsHttpOperation.getStatusCode(),
abfsHttpOperation.getSignatureMaskedUrl());
abfsHttpOperation.getMaskedUrl());
}
return String.format(
@ -95,7 +95,7 @@ private static String formatMessage(final AbfsHttpOperation abfsHttpOperation) {
abfsHttpOperation.getStatusDescription(),
abfsHttpOperation.getStatusCode(),
abfsHttpOperation.getMethod(),
abfsHttpOperation.getSignatureMaskedUrl(),
abfsHttpOperation.getMaskedUrl(),
abfsHttpOperation.getStorageErrorCode(),
// Remove break line to ensure the request id and timestamp can be shown in console.
abfsHttpOperation.getStorageErrorMessage().replaceAll("\\n", " "));

View File

@ -21,16 +21,15 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.List;
import java.util.UUID;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSocketFactory;
import org.apache.hadoop.fs.azurebfs.utils.UriUtils;
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParser;
@ -51,8 +50,6 @@
public class AbfsHttpOperation implements AbfsPerfLoggable {
private static final Logger LOG = LoggerFactory.getLogger(AbfsHttpOperation.class);
public static final String SIGNATURE_QUERY_PARAM_KEY = "sig=";
private static final int CONNECT_TIMEOUT = 30 * 1000;
private static final int READ_TIMEOUT = 30 * 1000;
@ -85,6 +82,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable {
private long connectionTimeMs;
private long sendRequestTimeMs;
private long recvResponseTimeMs;
private boolean shouldMask = false;
public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult(
final URL url,
@ -150,6 +148,10 @@ public String getRequestId() {
return requestId;
}
public void setMaskForSAS() {
shouldMask = true;
}
public int getBytesSent() {
return bytesSent;
}
@ -194,7 +196,7 @@ public String toString() {
sb.append(",");
sb.append(method);
sb.append(",");
sb.append(getSignatureMaskedUrl());
sb.append(getMaskedUrl());
return sb.toString();
}
@ -227,11 +229,30 @@ public String getLogString() {
.append(" m=")
.append(method)
.append(" u=")
.append(getSignatureMaskedEncodedUrl());
.append(getMaskedEncodedUrl());
return sb.toString();
}
public String getMaskedUrl() {
if (!shouldMask) {
return url.toString();
}
if (maskedUrl != null) {
return maskedUrl;
}
maskedUrl = UriUtils.getMaskedUrl(url);
return maskedUrl;
}
public String getMaskedEncodedUrl() {
if (maskedEncodedUrl != null) {
return maskedEncodedUrl;
}
maskedEncodedUrl = UriUtils.encodedUrlStr(getMaskedUrl());
return maskedEncodedUrl;
}
/**
* Initializes a new HTTP request and opens the connection.
*
@ -521,43 +542,6 @@ private boolean isNullInputStream(InputStream stream) {
return stream == null ? true : false;
}
public static String getSignatureMaskedUrl(String url) {
int qpStrIdx = url.indexOf('?' + SIGNATURE_QUERY_PARAM_KEY);
if (qpStrIdx == -1) {
qpStrIdx = url.indexOf('&' + SIGNATURE_QUERY_PARAM_KEY);
}
if (qpStrIdx == -1) {
return url;
}
final int sigStartIdx = qpStrIdx + SIGNATURE_QUERY_PARAM_KEY.length() + 1;
final int ampIdx = url.indexOf("&", sigStartIdx);
final int sigEndIndex = (ampIdx != -1) ? ampIdx : url.length();
String signature = url.substring(sigStartIdx, sigEndIndex);
return url.replace(signature, "XXXX");
}
public static String encodedUrlStr(String url) {
try {
return URLEncoder.encode(url, "UTF-8");
} catch (UnsupportedEncodingException e) {
return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl";
}
}
public String getSignatureMaskedUrl() {
if (this.maskedUrl == null) {
this.maskedUrl = getSignatureMaskedUrl(this.url.toString());
}
return this.maskedUrl;
}
public String getSignatureMaskedEncodedUrl() {
if (this.maskedEncodedUrl == null) {
this.maskedEncodedUrl = encodedUrlStr(getSignatureMaskedUrl());
}
return this.maskedEncodedUrl;
}
public static class AbfsHttpOperationWithFixedResult extends AbfsHttpOperation {
/**
* Creates an instance to represent fixed results.

View File

@ -242,6 +242,7 @@ private boolean executeHttpOperation(final int retryCount) throws AzureBlobFileS
break;
case SAS:
// do nothing; the SAS token should already be appended to the query string
httpOperation.setMaskForSAS(); //mask sig/oid from url for logs
break;
case SharedKey:
// sign the HTTP request

View File

@ -18,14 +18,42 @@
package org.apache.hadoop.fs.azurebfs.utils;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AND_MARK;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SAOID;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SIGNATURE;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SKOID;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SUOID;
/**
* Utility class to help with Abfs url transformation to blob urls.
*/
public final class UriUtils {
private static final String ABFS_URI_REGEX = "[^.]+\\.dfs\\.(preprod\\.){0,1}core\\.windows\\.net";
private static final Pattern ABFS_URI_PATTERN = Pattern.compile(ABFS_URI_REGEX);
private static final Set<String> FULL_MASK_PARAM_KEYS = new HashSet<>(
Collections.singleton(QUERY_PARAM_SIGNATURE));
private static final Set<String> PARTIAL_MASK_PARAM_KEYS = new HashSet<>(
Arrays.asList(QUERY_PARAM_SKOID, QUERY_PARAM_SAOID, QUERY_PARAM_SUOID));
private static final Character CHAR_MASK = 'X';
private static final String FULL_MASK = "XXXXX";
private static final int DEFAULT_QUERY_STRINGBUILDER_CAPACITY = 550;
private static final int PARTIAL_MASK_VISIBLE_LEN = 18;
/**
* Checks whether a string includes abfs url.
@ -73,6 +101,74 @@ public static String generateUniqueTestPath() {
return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId + "/test";
}
public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
Set<String> queryParamsForFullMask,
Set<String> queryParamsForPartialMask) {
return maskUrlQueryParameters(keyValueList, queryParamsForFullMask,
queryParamsForPartialMask, DEFAULT_QUERY_STRINGBUILDER_CAPACITY);
}
/**
* Generic function to mask a set of query parameters partially/fully and
* return the resultant query string
* @param keyValueList List of NameValuePair instances for query keys/values
* @param queryParamsForFullMask values for these params will appear as "XXXX"
* @param queryParamsForPartialMask values will be masked with 'X', except for
* the last PARTIAL_MASK_VISIBLE_LEN characters
* @param queryLen to initialize StringBuilder for the masked query
* @return the masked url query part
*/
public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
Set<String> queryParamsForFullMask,
Set<String> queryParamsForPartialMask, int queryLen) {
StringBuilder maskedUrl = new StringBuilder(queryLen);
for (NameValuePair keyValuePair : keyValueList) {
String key = keyValuePair.getName();
if (key.isEmpty()) {
throw new IllegalArgumentException("Query param key should not be empty");
}
String value = keyValuePair.getValue();
maskedUrl.append(key);
maskedUrl.append(EQUAL);
if (value != null && !value.isEmpty()) { //no mask
if (queryParamsForFullMask.contains(key)) {
maskedUrl.append(FULL_MASK);
} else if (queryParamsForPartialMask.contains(key)) {
int valueLen = value.length();
int maskedLen = valueLen > PARTIAL_MASK_VISIBLE_LEN
? PARTIAL_MASK_VISIBLE_LEN : valueLen / 2;
maskedUrl.append(value, 0, valueLen - maskedLen);
maskedUrl.append(StringUtils.repeat(CHAR_MASK, maskedLen));
} else {
maskedUrl.append(value);
}
}
maskedUrl.append(AND_MARK);
}
maskedUrl.deleteCharAt(maskedUrl.length() - 1);
return maskedUrl.toString();
}
public static String encodedUrlStr(String url) {
try {
return URLEncoder.encode(url, "UTF-8");
} catch (UnsupportedEncodingException e) {
return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl";
}
}
public static String getMaskedUrl(URL url) {
String queryString = url.getQuery();
if (queryString == null) {
return url.toString();
}
List<NameValuePair> queryKeyValueList = URLEncodedUtils
.parse(queryString, StandardCharsets.UTF_8);
String maskedQueryString = maskUrlQueryParameters(queryKeyValueList,
FULL_MASK_PARAM_KEYS, PARTIAL_MASK_PARAM_KEYS, queryString.length());
return url.toString().replace(queryString, maskedQueryString);
}
private UriUtils() {
}
}

View File

@ -400,14 +400,14 @@ public void testSignatureMask() throws Exception {
AbfsRestOperation abfsHttpRestOperation = fs.getAbfsClient()
.renamePath(src, "/testABC" + "/abc.txt", null);
AbfsHttpOperation result = abfsHttpRestOperation.getResult();
String url = result.getSignatureMaskedUrl();
String encodedUrl = result.getSignatureMaskedEncodedUrl();
String url = result.getMaskedUrl();
String encodedUrl = result.getMaskedEncodedUrl();
Assertions.assertThat(url.substring(url.indexOf("sig=")))
.describedAs("Signature query param should be masked")
.startsWith("sig=XXXX");
.startsWith("sig=XXXXX");
Assertions.assertThat(encodedUrl.substring(encodedUrl.indexOf("sig%3D")))
.describedAs("Signature query param should be masked")
.startsWith("sig%3DXXXX");
.startsWith("sig%3DXXXXX");
}
@Test

View File

@ -20,72 +20,93 @@
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import org.assertj.core.api.Assertions;
import org.junit.Test;
import org.apache.hadoop.fs.azurebfs.utils.UriUtils;
public class TestAbfsHttpOperation {
@Test
public void testMaskingAndEncoding()
throws MalformedURLException, UnsupportedEncodingException {
testIfMaskAndEncodeSuccessful("Where sig is the only query param",
"http://www.testurl.net?sig=abcd", "http://www.testurl.net?sig=XXXX");
"http://www.testurl.net?sig=abcd", "http://www.testurl.net?sig=XXXXX");
testIfMaskAndEncodeSuccessful("Where sig is the first query param",
"http://www.testurl.net?sig=abcd&abc=xyz",
"http://www.testurl.net?sig=XXXX&abc=xyz");
testIfMaskAndEncodeSuccessful("Where oid is the only query param",
"http://www.testurl.net?saoid=abcdef",
"http://www.testurl.net?saoid=abcXXX");
testIfMaskAndEncodeSuccessful("Where sig is the first query param, oid is last",
"http://www.testurl.net?sig=abcd&abc=xyz&saoid=pqrs456",
"http://www.testurl.net?sig=XXXXX&abc=xyz&saoid=pqrsXXX");
testIfMaskAndEncodeSuccessful(
"Where sig is neither first nor last query param",
"http://www.testurl.net?lmn=abc&sig=abcd&abc=xyz",
"http://www.testurl.net?lmn=abc&sig=XXXX&abc=xyz");
"Where sig/oid are neither first nor last query param",
"http://www.testurl.net?lmn=abc&sig=abcd&suoid=mnop789&abc=xyz",
"http://www.testurl.net?lmn=abc&sig=XXXXX&suoid=mnopXXX&abc=xyz");
testIfMaskAndEncodeSuccessful("Where sig is the last query param",
"http://www.testurl.net?abc=xyz&sig=abcd",
"http://www.testurl.net?abc=xyz&sig=XXXX");
testIfMaskAndEncodeSuccessful("Where sig is the last query param, oid is first",
"http://www.testurl.net?skoid=pqrs123&abc=xyz&sig=abcd",
"http://www.testurl.net?skoid=pqrsXXX&abc=xyz&sig=XXXXX");
testIfMaskAndEncodeSuccessful("Where sig query param is not present",
testIfMaskAndEncodeSuccessful("Where sig/oid query param are not present",
"http://www.testurl.net?abc=xyz", "http://www.testurl.net?abc=xyz");
testIfMaskAndEncodeSuccessful(
"Where sig query param is not present but mysig",
"http://www.testurl.net?abc=xyz&mysig=qwerty",
"http://www.testurl.net?abc=xyz&mysig=qwerty");
"Where sig/oid query param are not present but mysig and myoid",
"http://www.testurl.net?abc=xyz&mysig=qwerty&mysaoid=uvw",
"http://www.testurl.net?abc=xyz&mysig=qwerty&mysaoid=uvw");
testIfMaskAndEncodeSuccessful(
"Where sig query param is not present but sigmy",
"http://www.testurl.net?abc=xyz&sigmy=qwerty",
"http://www.testurl.net?abc=xyz&sigmy=qwerty");
"Where sig/oid query param is not present but sigmy and oidmy",
"http://www.testurl.net?abc=xyz&sigmy=qwerty&skoidmy=uvw",
"http://www.testurl.net?abc=xyz&sigmy=qwerty&skoidmy=uvw");
testIfMaskAndEncodeSuccessful(
"Where sig query param is not present but a " + "value sig",
"http://www.testurl.net?abc=xyz&mnop=sig",
"http://www.testurl.net?abc=xyz&mnop=sig");
"Where sig/oid query param is not present but values sig and oid",
"http://www.testurl.net?abc=xyz&mnop=sig&pqr=saoid",
"http://www.testurl.net?abc=xyz&mnop=sig&pqr=saoid");
testIfMaskAndEncodeSuccessful(
"Where sig query param is not present but a " + "value ends with sig",
"http://www.testurl.net?abc=xyz&mnop=abcsig",
"http://www.testurl.net?abc=xyz&mnop=abcsig");
"Where sig/oid query param is not present but a value ends with sig/oid",
"http://www.testurl.net?abc=xyzsaoid&mnop=abcsig",
"http://www.testurl.net?abc=xyzsaoid&mnop=abcsig");
testIfMaskAndEncodeSuccessful(
"Where sig query param is not present but a " + "value starts with sig",
"http://www.testurl.net?abc=xyz&mnop=sigabc",
"http://www.testurl.net?abc=xyz&mnop=sigabc");
"Where sig/oid query param is not present but a value starts with sig/oid",
"http://www.testurl.net?abc=saoidxyz&mnop=sigabc",
"http://www.testurl.net?abc=saoidxyz&mnop=sigabc");
}
@Test
public void testUrlWithNullValues()
throws MalformedURLException, UnsupportedEncodingException {
testIfMaskAndEncodeSuccessful("Where param to be masked has null value",
"http://www.testurl.net?abc=xyz&saoid=&mnop=abcsig",
"http://www.testurl.net?abc=xyz&saoid=&mnop=abcsig");
testIfMaskAndEncodeSuccessful("Where visible param has null value",
"http://www.testurl.net?abc=xyz&pqr=&mnop=abcd",
"http://www.testurl.net?abc=xyz&pqr=&mnop=abcd");
testIfMaskAndEncodeSuccessful("Where last param has null value",
"http://www.testurl.net?abc=xyz&pqr=&mnop=",
"http://www.testurl.net?abc=xyz&pqr=&mnop=");
}
private void testIfMaskAndEncodeSuccessful(final String scenario,
final String url, final String expectedMaskedUrl)
throws UnsupportedEncodingException {
throws UnsupportedEncodingException, MalformedURLException {
Assertions.assertThat(AbfsHttpOperation.getSignatureMaskedUrl(url))
Assertions.assertThat(UriUtils.getMaskedUrl(new URL(url)))
.describedAs(url + " (" + scenario + ") after masking should be: "
+ expectedMaskedUrl).isEqualTo(expectedMaskedUrl);
final String expectedMaskedEncodedUrl = URLEncoder
.encode(expectedMaskedUrl, "UTF-8");
Assertions.assertThat(AbfsHttpOperation.encodedUrlStr(expectedMaskedUrl))
Assertions.assertThat(UriUtils.encodedUrlStr(expectedMaskedUrl))
.describedAs(
url + " (" + scenario + ") after masking and encoding should "
+ "be: " + expectedMaskedEncodedUrl)

View File

@ -18,9 +18,21 @@
package org.apache.hadoop.fs.azurebfs.utils;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.junit.Assert;
import org.junit.Test;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.message.BasicNameValuePair;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Test ABFS UriUtils.
*/
@ -45,4 +57,79 @@ public void testExtractRawAccountName() throws Exception {
Assert.assertEquals(null, UriUtils.extractAccountNameFromHostName(null));
Assert.assertEquals(null, UriUtils.extractAccountNameFromHostName("abfs.dfs.cores.windows.net"));
}
@Test
// If a config for partial masking is introduced, this test will have to be
// modified for the config-controlled partial mask length
public void testMaskUrlQueryParameters() throws Exception {
Set<String> fullMask = new HashSet<>(Arrays.asList("abc", "bcd"));
Set<String> partialMask = new HashSet<>(Arrays.asList("pqr", "xyz"));
//Partial and full masking test
List<NameValuePair> keyValueList = URLEncodedUtils
.parse("abc=123&pqr=45678&def=789&bcd=012&xyz=678",
StandardCharsets.UTF_8);
Assert.assertEquals("Incorrect masking",
"abc=XXXXX&pqr=456XX&def=789&bcd=XXXXX&xyz=67X",
UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask));
//Mask GUIDs
keyValueList = URLEncodedUtils
.parse("abc=123&pqr=256877f2-c094-48c8-83df-ddb5825694fd&def=789",
StandardCharsets.UTF_8);
Assert.assertEquals("Incorrect partial masking for guid",
"abc=XXXXX&pqr=256877f2-c094-48c8XXXXXXXXXXXXXXXXXX&def=789",
UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask));
//For params entered for both full and partial masks, full mask applies
partialMask.add("abc");
Assert.assertEquals("Full mask should apply",
"abc=XXXXX&pqr=256877f2-c094-48c8XXXXXXXXXXXXXXXXXX&def=789",
UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask));
//Duplicate key (to be masked) with different values
keyValueList = URLEncodedUtils
.parse("abc=123&pqr=4561234&abc=789", StandardCharsets.UTF_8);
Assert.assertEquals("Duplicate key: Both values should get masked",
"abc=XXXXX&pqr=4561XXX&abc=XXXXX",
UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask));
//Duplicate key (not to be masked) with different values
keyValueList = URLEncodedUtils
.parse("abc=123&def=456&pqrs=789&def=000", StandardCharsets.UTF_8);
Assert.assertEquals("Duplicate key: Values should not get masked",
"abc=XXXXX&def=456&pqrs=789&def=000",
UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask));
//Empty param value
keyValueList = URLEncodedUtils
.parse("abc=123&def=&pqr=789&s=1", StandardCharsets.UTF_8);
Assert.assertEquals("Incorrect url with empty query value",
"abc=XXXXX&def=&pqr=78X&s=1",
UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask));
//Empty param key
keyValueList = URLEncodedUtils
.parse("def=2&pqr=789&s=1", StandardCharsets.UTF_8);
keyValueList.add(new BasicNameValuePair("", "m1"));
List<NameValuePair> finalKeyValueList = keyValueList;
intercept(IllegalArgumentException.class, () -> UriUtils
.maskUrlQueryParameters(finalKeyValueList, fullMask, partialMask));
//Param (not to be masked) with null value
keyValueList = URLEncodedUtils
.parse("abc=123&s=1", StandardCharsets.UTF_8);
keyValueList.add(new BasicNameValuePair("null1", null));
Assert.assertEquals("Null value, incorrect query construction",
"abc=XXXXX&s=1&null1=",
UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask));
//Param (to be masked) with null value
keyValueList.add(new BasicNameValuePair("null2", null));
fullMask.add("null2");
Assert.assertEquals("No mask should be added for null value",
"abc=XXXXX&s=1&null1=&null2=", UriUtils
.maskUrlQueryParameters(keyValueList, fullMask,
partialMask)); //no mask
}
}