HDFS-14323. Distcp fails in Hadoop 3.x when 2.x source webhdfs url has special characters in hdfs file path. Contributed by Srinivasu Majeti.

Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
This commit is contained in:
Srinivasu Majeti 2019-05-17 19:19:12 +02:00 committed by Wei-Chiu Chuang
parent 4cb3da6ac7
commit 3e5e5b028a

View File

@ -609,7 +609,13 @@ URL toUrl(final HttpOpParam.Op op, final Path fspath,
boolean pathAlreadyEncoded = false;
try {
fspathUriDecoded = URLDecoder.decode(fspathUri.getPath(), "UTF-8");
pathAlreadyEncoded = true;
//below condition check added as part of fixing HDFS-14323 to make
//sure pathAlreadyEncoded is not set in the case the input url does
//not have any encoded sequence already.This will help pulling data
//from 2.x hadoop cluster to 3.x using 3.x distcp client operation
if(!fspathUri.getPath().equals(fspathUriDecoded)) {
pathAlreadyEncoded = true;
}
} catch (IllegalArgumentException ex) {
LOG.trace("Cannot decode URL encoded file", ex);
}