diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d923b87238..45f226fc4c 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -574,6 +574,9 @@ Release 2.7.0 - UNRELEASED HADOOP-11211. mapreduce.job.classloader.system.classes semantics should be be order-independent. (Yitong Zhou via gera) + + HADOOP-11389. Clean up byte to string encoding issues in hadoop-common. + (wheat9) Release 2.6.0 - 2014-11-18 diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index d2c805280d..c71f35a08c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -67,6 +67,7 @@ import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import com.google.common.base.Charsets; import org.apache.commons.collections.map.UnmodifiableMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -2263,7 +2264,7 @@ public Reader getConfResourceAsReader(String name) { LOG.info("found resource " + name + " at " + url); } - return new InputStreamReader(url.openStream()); + return new InputStreamReader(url.openStream(), Charsets.UTF_8); } catch (Exception e) { return null; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java index dd2d5b99fb..a0675c2f16 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java @@ -32,6 +32,7 @@ import com.google.gson.stream.JsonReader; import com.google.gson.stream.JsonWriter; +import org.apache.commons.io.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -207,7 +208,8 @@ protected int addVersion() { */ protected byte[] serialize() throws IOException { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - JsonWriter writer = new JsonWriter(new OutputStreamWriter(buffer)); + JsonWriter writer = new JsonWriter( + new OutputStreamWriter(buffer, Charsets.UTF_8)); try { writer.beginObject(); if (cipher != null) { @@ -251,7 +253,7 @@ protected Metadata(byte[] bytes) throws IOException { String description = null; Map attributes = null; JsonReader reader = new JsonReader(new InputStreamReader - (new ByteArrayInputStream(bytes))); + (new ByteArrayInputStream(bytes), Charsets.UTF_8)); try { reader.beginObject(); while (reader.hasNext()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index 50dd1ad239..0464f55376 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -18,6 +18,7 @@ package org.apache.hadoop.crypto.key.kms; import org.apache.commons.codec.binary.Base64; +import org.apache.commons.io.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; @@ -209,7 +210,7 @@ private static Metadata parseJSONMetadata(Map valueMap) { } private static void writeJson(Map map, OutputStream os) throws IOException { - Writer writer = new OutputStreamWriter(os); + Writer writer = new OutputStreamWriter(os, Charsets.UTF_8); ObjectMapper jsonMapper = new ObjectMapper(); jsonMapper.writerWithDefaultPrettyPrinter().writeValue(writer, map); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java index ba65cd2e3b..f0d7b8de44 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java @@ -32,6 +32,7 @@ import org.apache.avro.io.DatumWriter; import org.apache.avro.io.EncoderFactory; import org.apache.avro.io.JsonEncoder; +import org.apache.commons.io.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -234,10 +235,10 @@ public int read() throws IOException { if (!r.next(key, val)) { return -1; } - byte[] tmp = key.toString().getBytes(); + byte[] tmp = key.toString().getBytes(Charsets.UTF_8); outbuf.write(tmp, 0, tmp.length); outbuf.write('\t'); - tmp = val.toString().getBytes(); + tmp = val.toString().getBytes(Charsets.UTF_8); outbuf.write(tmp, 0, tmp.length); outbuf.write('\n'); inbuf.reset(outbuf.getData(), outbuf.getLength()); @@ -299,7 +300,8 @@ public int read() throws IOException { encoder.flush(); if (!fileReader.hasNext()) { // Write a new line after the last Avro record. - output.write(System.getProperty("line.separator").getBytes()); + output.write(System.getProperty("line.separator") + .getBytes(Charsets.UTF_8)); output.flush(); } pos = 0; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java index 8bc16af2af..00c6401d88 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.io.InputStreamReader; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; /** @@ -76,7 +77,8 @@ void start() { } protected void pump() throws IOException { - InputStreamReader inputStreamReader = new InputStreamReader(stream); + InputStreamReader inputStreamReader = new InputStreamReader( + stream, Charsets.UTF_8); BufferedReader br = new BufferedReader(inputStreamReader); String line = null; while ((line = br.readLine()) != null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java index 99befeea6e..57acebd85f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.http; +import org.apache.commons.io.Charsets; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -25,11 +27,11 @@ * This class is responsible for quoting HTML characters. */ public class HtmlQuoting { - private static final byte[] ampBytes = "&".getBytes(); - private static final byte[] aposBytes = "'".getBytes(); - private static final byte[] gtBytes = ">".getBytes(); - private static final byte[] ltBytes = "<".getBytes(); - private static final byte[] quotBytes = """.getBytes(); + private static final byte[] ampBytes = "&".getBytes(Charsets.UTF_8); + private static final byte[] aposBytes = "'".getBytes(Charsets.UTF_8); + private static final byte[] gtBytes = ">".getBytes(Charsets.UTF_8); + private static final byte[] ltBytes = "<".getBytes(Charsets.UTF_8); + private static final byte[] quotBytes = """.getBytes(Charsets.UTF_8); /** * Does the given string need to be quoted? @@ -63,7 +65,7 @@ public static boolean needsQuoting(String str) { if (str == null) { return false; } - byte[] bytes = str.getBytes(); + byte[] bytes = str.getBytes(Charsets.UTF_8); return needsQuoting(bytes, 0 , bytes.length); } @@ -98,15 +100,16 @@ public static String quoteHtmlChars(String item) { if (item == null) { return null; } - byte[] bytes = item.getBytes(); + byte[] bytes = item.getBytes(Charsets.UTF_8); if (needsQuoting(bytes, 0, bytes.length)) { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); try { quoteHtmlChars(buffer, bytes, 0, bytes.length); + return buffer.toString("UTF-8"); } catch (IOException ioe) { // Won't happen, since it is a bytearrayoutputstream + return null; } - return buffer.toString(); } else { return item; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java index 45b6419570..63a32fbc2b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java @@ -20,6 +20,8 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InterruptedIOException; +import java.io.OutputStream; +import java.io.PrintStream; import java.io.PrintWriter; import java.net.BindException; import java.net.InetSocketAddress; @@ -1065,13 +1067,14 @@ public static class StackServlet extends HttpServlet { public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), - request, response)) { + request, response)) { return; } response.setContentType("text/plain; charset=UTF-8"); - PrintWriter out = response.getWriter(); - ReflectionUtils.printThreadInfo(out, ""); - out.close(); + try (PrintStream out = new PrintStream( + response.getOutputStream(), false, "UTF-8")) { + ReflectionUtils.printThreadInfo(out, ""); + } ReflectionUtils.logThreadInfo(LOG, "jsp requested", 1); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DefaultStringifier.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DefaultStringifier.java index d32d58b600..3ba577fc4f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DefaultStringifier.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DefaultStringifier.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import org.apache.commons.codec.binary.Base64; +import org.apache.commons.io.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -90,7 +91,7 @@ public String toString(T obj) throws IOException { serializer.serialize(obj); byte[] buf = new byte[outBuf.getLength()]; System.arraycopy(outBuf.getData(), 0, buf, 0, buf.length); - return new String(Base64.encodeBase64(buf)); + return new String(Base64.encodeBase64(buf), Charsets.UTF_8); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index 4cda107748..7a59149ff0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -22,6 +22,8 @@ import java.util.*; import java.rmi.server.UID; import java.security.MessageDigest; + +import org.apache.commons.io.Charsets; import org.apache.commons.logging.*; import org.apache.hadoop.util.Options; import org.apache.hadoop.fs.*; @@ -849,7 +851,7 @@ public static class Writer implements java.io.Closeable, Syncable { try { MessageDigest digester = MessageDigest.getInstance("MD5"); long time = Time.now(); - digester.update((new UID()+"@"+time).getBytes()); + digester.update((new UID()+"@"+time).getBytes(Charsets.UTF_8)); sync = digester.digest(); } catch (Exception e) { throw new RuntimeException(e); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java index 37b97f2a64..91178ecdc2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java @@ -23,6 +23,7 @@ import java.io.InputStream; import java.io.OutputStream; +import org.apache.commons.io.Charsets; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; @@ -281,7 +282,7 @@ private void writeStreamHeader() throws IOException { // The compressed bzip2 stream should start with the // identifying characters BZ. Caller of CBZip2OutputStream // i.e. this class must write these characters. - out.write(HEADER.getBytes()); + out.write(HEADER.getBytes(Charsets.UTF_8)); } } @@ -415,7 +416,7 @@ private BufferedInputStream readStreamHeader() throws IOException { byte[] headerBytes = new byte[HEADER_LEN]; int actualRead = bufferedIn.read(headerBytes, 0, HEADER_LEN); if (actualRead != -1) { - String header = new String(headerBytes); + String header = new String(headerBytes, Charsets.UTF_8); if (header.compareTo(HEADER) != 0) { bufferedIn.reset(); } else { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFileDumper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFileDumper.java index ad94c4297b..aabdf57a26 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFileDumper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFileDumper.java @@ -24,6 +24,7 @@ import java.util.Map; import java.util.Set; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -233,7 +234,7 @@ static public void dumpInfo(String file, PrintStream out, Configuration conf) out.printf("%X", b); } } else { - out.print(new String(key, 0, sampleLen)); + out.print(new String(key, 0, sampleLen, Charsets.UTF_8)); } if (sampleLen < key.length) { out.print("..."); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcConstants.java index c457500e90..d5e795b92f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcConstants.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcConstants.java @@ -19,6 +19,7 @@ import java.nio.ByteBuffer; +import org.apache.commons.io.Charsets; import org.apache.hadoop.classification.InterfaceAudience; @InterfaceAudience.Private @@ -53,7 +54,8 @@ private RpcConstants() { /** * The first four bytes of Hadoop RPC connections */ - public static final ByteBuffer HEADER = ByteBuffer.wrap("hrpc".getBytes()); + public static final ByteBuffer HEADER = ByteBuffer.wrap("hrpc".getBytes + (Charsets.UTF_8)); public static final int HEADER_LEN_AFTER_HRPC_PART = 3; // 3 bytes that follow // 1 : Introduce ping and server does not throw away RPCs diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index a4d669ae74..e508d4e01f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -69,6 +69,7 @@ import javax.security.sasl.SaslException; import javax.security.sasl.SaslServer; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -182,7 +183,7 @@ boolean isTerse(Class t) { * and send back a nicer response. */ private static final ByteBuffer HTTP_GET_BYTES = ByteBuffer.wrap( - "GET ".getBytes()); + "GET ".getBytes(Charsets.UTF_8)); /** * An HTTP response to send back if we detect an HTTP request to our IPC @@ -1709,7 +1710,7 @@ private void setupBadVersionResponse(int clientVersion) throws IOException { private void setupHttpRequestOnIpcPortResponse() throws IOException { Call fakeCall = new Call(0, RpcConstants.INVALID_RETRY_COUNT, null, this); fakeCall.setResponse(ByteBuffer.wrap( - RECEIVED_HTTP_REQ_RESPONSE.getBytes())); + RECEIVED_HTTP_REQ_RESPONSE.getBytes(Charsets.UTF_8))); responder.doRespond(fakeCall); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java index 77f74cc404..4749ce19a6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java @@ -24,6 +24,7 @@ import javax.servlet.*; import javax.servlet.http.*; +import com.google.common.base.Charsets; import org.apache.commons.logging.*; import org.apache.commons.logging.impl.*; import org.apache.hadoop.classification.InterfaceAudience; @@ -66,7 +67,7 @@ private static void process(String urlstring) { connection.connect(); BufferedReader in = new BufferedReader(new InputStreamReader( - connection.getInputStream())); + connection.getInputStream(), Charsets.UTF_8)); for(String line; (line = in.readLine()) != null; ) if (line.startsWith(MARKER)) { System.out.println(TAG.matcher(line).replaceAll("")); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java index 841874fc08..0e707780c4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -225,7 +226,7 @@ protected int getDmax(String metricName) { * a multiple of 4. */ protected void xdr_string(String s) { - byte[] bytes = s.getBytes(); + byte[] bytes = s.getBytes(Charsets.UTF_8); int len = bytes.length; xdr_int(len); System.arraycopy(bytes, 0, buffer, offset, len); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsConfig.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsConfig.java index e4b5580536..167205e93e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsConfig.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsConfig.java @@ -269,14 +269,14 @@ public String toString() { static String toString(Configuration c) { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(buffer); - PropertiesConfiguration tmp = new PropertiesConfiguration(); - tmp.copy(c); try { + PrintStream ps = new PrintStream(buffer, false, "UTF-8"); + PropertiesConfiguration tmp = new PropertiesConfiguration(); + tmp.copy(c); tmp.save(ps); + return buffer.toString("UTF-8"); } catch (Exception e) { throw new MetricsConfigException(e); } - return buffer.toString(); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/FileSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/FileSink.java index d1364160e2..ab121bcf67 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/FileSink.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/FileSink.java @@ -20,9 +20,9 @@ import java.io.Closeable; import java.io.File; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; -import java.io.PrintWriter; +import java.io.PrintStream; import org.apache.commons.configuration.SubsetConfiguration; import org.apache.hadoop.classification.InterfaceAudience; @@ -40,15 +40,15 @@ @InterfaceStability.Evolving public class FileSink implements MetricsSink, Closeable { private static final String FILENAME_KEY = "filename"; - private PrintWriter writer; + private PrintStream writer; @Override public void init(SubsetConfiguration conf) { String filename = conf.getString(FILENAME_KEY); try { - writer = filename == null - ? new PrintWriter(System.out) - : new PrintWriter(new FileWriter(new File(filename), true)); + writer = filename == null ? System.out + : new PrintStream(new FileOutputStream(new File(filename)), + true, "UTF-8"); } catch (Exception e) { throw new MetricsException("Error creating "+ filename, e); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/GraphiteSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/GraphiteSink.java index 9bc3f15d97..e72fe24844 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/GraphiteSink.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/GraphiteSink.java @@ -25,6 +25,7 @@ import java.net.Socket; import org.apache.commons.configuration.SubsetConfiguration; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -64,7 +65,8 @@ public void init(SubsetConfiguration conf) { try { // Open an connection to Graphite server. socket = new Socket(serverHost, serverPort); - writer = new OutputStreamWriter(socket.getOutputStream()); + writer = new OutputStreamWriter( + socket.getOutputStream(), Charsets.UTF_8); } catch (Exception e) { throw new MetricsException("Error creating connection, " + serverHost + ":" + serverPort, e); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java index b3581f9e8d..164ea085d7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java @@ -29,6 +29,7 @@ import java.util.Map; import org.apache.commons.configuration.SubsetConfiguration; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.metrics2.MetricsSink; @@ -223,7 +224,7 @@ protected String getHostName() { * @param s the string to be written to buffer at offset location */ protected void xdr_string(String s) { - byte[] bytes = s.getBytes(); + byte[] bytes = s.getBytes(Charsets.UTF_8); int len = bytes.length; xdr_int(len); System.arraycopy(bytes, 0, buffer, offset, len); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/TableMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/TableMapping.java index 2662108124..59c0ca9675 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/TableMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/TableMapping.java @@ -20,13 +20,16 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY; import java.io.BufferedReader; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.commons.io.Charsets; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -96,9 +99,10 @@ private Map load() { return null; } - BufferedReader reader = null; - try { - reader = new BufferedReader(new FileReader(filename)); + + try (BufferedReader reader = + new BufferedReader(new InputStreamReader( + new FileInputStream(filename), Charsets.UTF_8))) { String line = reader.readLine(); while (line != null) { line = line.trim(); @@ -115,15 +119,6 @@ private Map load() { } catch (Exception e) { LOG.warn(filename + " cannot be read.", e); return null; - } finally { - if (reader != null) { - try { - reader.close(); - } catch (IOException e) { - LOG.warn(filename + " cannot be read.", e); - return null; - } - } } return loadMap; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java index 4fb9e45614..43d1b66d44 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.security; +import com.google.common.base.Charsets; import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.security.authentication.server.AuthenticationFilter; import org.apache.hadoop.conf.Configuration; @@ -24,8 +25,10 @@ import org.apache.hadoop.http.FilterInitializer; import org.apache.hadoop.security.authentication.server.KerberosAuthenticationHandler; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.Reader; import java.util.HashMap; import java.util.Map; @@ -78,10 +81,10 @@ public void initFilter(FilterContainer container, Configuration conf) { if (signatureSecretFile == null) { throw new RuntimeException("Undefined property: " + SIGNATURE_SECRET_FILE); } - - try { - StringBuilder secret = new StringBuilder(); - Reader reader = new FileReader(signatureSecretFile); + + StringBuilder secret = new StringBuilder(); + try (Reader reader = new InputStreamReader( + new FileInputStream(signatureSecretFile), Charsets.UTF_8)) { int c = reader.read(); while (c > -1) { secret.append((char)c); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java index b81e810f19..e6b8722c35 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.Map; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -217,7 +218,8 @@ public void readTokenStorageStream(DataInputStream in) throws IOException { readFields(in); } - private static final byte[] TOKEN_STORAGE_MAGIC = "HDTS".getBytes(); + private static final byte[] TOKEN_STORAGE_MAGIC = + "HDTS".getBytes(Charsets.UTF_8); private static final byte TOKEN_STORAGE_VERSION = 0; public void writeTokenStorageToStream(DataOutputStream os) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java index e72d988bdf..c0c8d2b64f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java @@ -17,8 +17,10 @@ */ package org.apache.hadoop.security; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.Reader; import java.util.ArrayList; import java.util.Hashtable; @@ -34,6 +36,7 @@ import javax.naming.directory.SearchControls; import javax.naming.directory.SearchResult; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -366,9 +369,10 @@ String extractPassword(String pwFile) { // an anonymous bind return ""; } - - try (Reader reader = new FileReader(pwFile)) { - StringBuilder password = new StringBuilder(); + + StringBuilder password = new StringBuilder(); + try (Reader reader = new InputStreamReader( + new FileInputStream(pwFile), Charsets.UTF_8)) { int c = reader.read(); while (c > -1) { password.append((char)c); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java index 83f46efd6e..f2b21e851b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java @@ -44,6 +44,7 @@ import javax.security.sasl.SaslServerFactory; import org.apache.commons.codec.binary.Base64; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -184,11 +185,11 @@ public static void init(Configuration conf) { } static String encodeIdentifier(byte[] identifier) { - return new String(Base64.encodeBase64(identifier)); + return new String(Base64.encodeBase64(identifier), Charsets.UTF_8); } static byte[] decodeIdentifier(String identifier) { - return Base64.decodeBase64(identifier.getBytes()); + return Base64.decodeBase64(identifier.getBytes(Charsets.UTF_8)); } public static T getIdentifier(String id, @@ -206,7 +207,8 @@ public static T getIdentifier(String id, } static char[] encodePassword(byte[] password) { - return new String(Base64.encodeBase64(password)).toCharArray(); + return new String(Base64.encodeBase64(password), + Charsets.UTF_8).toCharArray(); } /** Splitting fully qualified Kerberos name into parts */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedIdMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedIdMapping.java index e152d46c49..e995cb6cc6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedIdMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedIdMapping.java @@ -22,11 +22,13 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.Charset; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -217,7 +219,9 @@ public static boolean updateMapInternal(BiMap map, try { Process process = Runtime.getRuntime().exec( new String[] { "bash", "-c", command }); - br = new BufferedReader(new InputStreamReader(process.getInputStream())); + br = new BufferedReader( + new InputStreamReader(process.getInputStream(), + Charset.defaultCharset())); String line = null; while ((line = br.readLine()) != null) { String[] nameId = line.split(regex); @@ -552,7 +556,7 @@ static StaticMapping parseStaticMap(File staticMapFile) Map gidMapping = new HashMap(); BufferedReader in = new BufferedReader(new InputStreamReader( - new FileInputStream(staticMapFile))); + new FileInputStream(staticMapFile), Charsets.UTF_8)); try { String line = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java index 5dc2abfd13..05958a058a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java @@ -18,6 +18,7 @@ package org.apache.hadoop.security.alias; +import org.apache.commons.io.Charsets; import org.apache.commons.io.IOUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -165,7 +166,7 @@ public CredentialEntry getCredentialEntry(String alias) throws IOException { } public static char[] bytesToChars(byte[] bytes) { - String pass = new String(bytes); + String pass = new String(bytes, Charsets.UTF_8); return pass.toCharArray(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/UserProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/UserProvider.java index 262cbadd71..127ccf005d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/UserProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/UserProvider.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.commons.io.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; @@ -56,7 +57,8 @@ public synchronized CredentialEntry getCredentialEntry(String alias) { if (bytes == null) { return null; } - return new CredentialEntry(alias, new String(bytes).toCharArray()); + return new CredentialEntry( + alias, new String(bytes, Charsets.UTF_8).toCharArray()); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java index 81993e9af2..f461dacab2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java @@ -31,6 +31,7 @@ import java.util.TreeMap; import java.util.UUID; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -100,7 +101,8 @@ private static String getUniqueLocalTraceFileName() { // out of /proc/self/stat. (There isn't any portable way to get the // process ID from Java.) reader = new BufferedReader( - new InputStreamReader(new FileInputStream("/proc/self/stat"))); + new InputStreamReader(new FileInputStream("/proc/self/stat"), + Charsets.UTF_8)); String line = reader.readLine(); if (line == null) { throw new EOFException(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdmin.java index 4ae5aedccf..5fdfbfadd2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdmin.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdmin.java @@ -25,6 +25,7 @@ import java.util.LinkedList; import java.util.List; +import org.apache.commons.io.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; @@ -91,7 +92,7 @@ private int addSpanReceiver(List args) throws IOException { return 1; } ByteArrayOutputStream configStream = new ByteArrayOutputStream(); - PrintStream configsOut = new PrintStream(configStream); + PrintStream configsOut = new PrintStream(configStream, false, "UTF-8"); SpanReceiverInfoBuilder factory = new SpanReceiverInfoBuilder(className); String prefix = ""; for (int i = 0; i < args.size(); ++i) { @@ -113,13 +114,15 @@ private int addSpanReceiver(List args) throws IOException { configsOut.print(prefix + key + " = " + value); prefix = ", "; } + + String configStreamStr = configStream.toString("UTF-8"); try { long id = remote.addSpanReceiver(factory.build()); System.out.println("Added trace span receiver " + id + - " with configuration " + configStream.toString()); + " with configuration " + configStreamStr); } catch (IOException e) { System.out.println("addSpanReceiver error with configuration " + - configStream.toString()); + configStreamStr); throw e; } return 0; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/FileBasedIPList.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/FileBasedIPList.java index 8bfb5d93ae..8020b7a10f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/FileBasedIPList.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/FileBasedIPList.java @@ -19,13 +19,18 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; +import org.apache.commons.io.Charsets; +import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -78,7 +83,8 @@ private static String[] readLines(String fileName) { if (fileName != null) { File file = new File (fileName); if (file.exists()) { - FileReader fileReader = new FileReader(file); + Reader fileReader = new InputStreamReader( + new FileInputStream(file), Charsets.UTF_8); BufferedReader bufferedReader = new BufferedReader(fileReader); List lines = new ArrayList(); String line = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java index b012add42c..ae77e6c333 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java @@ -22,6 +22,7 @@ import java.util.Set; import java.util.HashSet; +import org.apache.commons.io.Charsets; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.Log; import org.apache.hadoop.classification.InterfaceAudience; @@ -72,7 +73,8 @@ public static void readFileToSetWithFileInputStream(String type, throws IOException { BufferedReader reader = null; try { - reader = new BufferedReader(new InputStreamReader(fileInputStream)); + reader = new BufferedReader( + new InputStreamReader(fileInputStream, Charsets.UTF_8)); String line; while ((line = reader.readLine()) != null) { String[] nodes = line.split("[ \t\n\f\r]+"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java index 3977e60287..d9a73263d8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java @@ -20,13 +20,16 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.PrintStream; import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; import java.lang.management.ThreadMXBean; import java.lang.reflect.Constructor; import java.lang.reflect.Field; import java.lang.reflect.Method; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -154,7 +157,7 @@ private static String getTaskName(long id, String name) { * @param stream the stream to * @param title a string title for the stack trace */ - public synchronized static void printThreadInfo(PrintWriter stream, + public synchronized static void printThreadInfo(PrintStream stream, String title) { final int STACK_DEPTH = 20; boolean contention = threadBean.isThreadContentionMonitoringEnabled(); @@ -215,9 +218,12 @@ public static void logThreadInfo(Log log, } } if (dumpStack) { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - printThreadInfo(new PrintWriter(buffer), title); - log.info(buffer.toString()); + try { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + printThreadInfo(new PrintStream(buffer, false, "UTF-8"), title); + log.info(buffer.toString(Charset.defaultCharset().name())); + } catch (UnsupportedEncodingException ignored) { + } } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java index a44e992126..f0100d440a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.InputStream; +import java.nio.charset.Charset; import java.util.Arrays; import java.util.Map; import java.util.Timer; @@ -493,11 +494,11 @@ private void runCommand() throws IOException { timeOutTimer.schedule(timeoutTimerTask, timeOutInterval); } final BufferedReader errReader = - new BufferedReader(new InputStreamReader(process - .getErrorStream())); + new BufferedReader(new InputStreamReader( + process.getErrorStream(), Charset.defaultCharset())); BufferedReader inReader = - new BufferedReader(new InputStreamReader(process - .getInputStream())); + new BufferedReader(new InputStreamReader( + process.getInputStream(), Charset.defaultCharset())); final StringBuffer errMsg = new StringBuffer(); // read error and input streams as this would free up the buffers diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java index eae8ea7681..08aa2c9bb8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java @@ -245,9 +245,7 @@ public void testManyClosedSocketsInCache() throws Exception { private void assertXceiverCount(int expected) { int count = getXceiverCountWithoutServer(); if (count != expected) { - ReflectionUtils.printThreadInfo( - new PrintWriter(System.err), - "Thread dumps"); + ReflectionUtils.printThreadInfo(System.err, "Thread dumps"); fail("Expected " + expected + " xceivers, found " + count); }