HADOOP-14501. Switch from aalto-xml to woodstox to handle odd XML features (jeagles)

This commit is contained in:
Jonathan Eagles 2017-06-12 17:07:53 -05:00
parent 86368cc766
commit a81916ea89
4 changed files with 90 additions and 8 deletions

View File

@ -314,8 +314,8 @@
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml</groupId> <groupId>com.fasterxml.woodstox</groupId>
<artifactId>aalto-xml</artifactId> <artifactId>woodstox-core</artifactId>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.conf; package org.apache.hadoop.conf;
import com.fasterxml.aalto.stax.InputFactoryImpl; import com.ctc.wstx.stax.WstxInputFactory;
import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonGenerator;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@ -284,7 +284,7 @@ public String toString() {
* Specify exact input factory to avoid time finding correct one. * Specify exact input factory to avoid time finding correct one.
* Factory is reusable across un-synchronized threads once initialized * Factory is reusable across un-synchronized threads once initialized
*/ */
private static final XMLInputFactory2 factory = new InputFactoryImpl(); private static final XMLInputFactory2 XML_INPUT_FACTORY = new WstxInputFactory();
/** /**
* Class to keep the information about the keys which replace the deprecated * Class to keep the information about the keys which replace the deprecated
@ -2646,7 +2646,7 @@ private XMLStreamReader parse(InputStream is,
if (is == null) { if (is == null) {
return null; return null;
} }
return factory.createXMLStreamReader(systemId, is); return XML_INPUT_FACTORY.createXMLStreamReader(systemId, is);
} }
private void loadResources(Properties properties, private void loadResources(Properties properties,

View File

@ -30,6 +30,7 @@
import java.net.InetAddress; import java.net.InetAddress;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.net.URI; import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
@ -99,6 +100,18 @@ private void startConfig() throws IOException{
out.write("<configuration>\n"); out.write("<configuration>\n");
} }
private void writeHeader() throws IOException{
out.write("<?xml version=\"1.0\"?>\n");
}
private void writeHeader(String encoding) throws IOException{
out.write("<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>\n");
}
private void writeConfiguration() throws IOException{
out.write("<configuration>\n");
}
private void endConfig() throws IOException{ private void endConfig() throws IOException{
out.write("</configuration>\n"); out.write("</configuration>\n");
out.close(); out.close();
@ -120,6 +133,18 @@ private void endFallback() throws IOException {
out.write("</xi:fallback>\n "); out.write("</xi:fallback>\n ");
} }
private void declareEntity(String root, String entity, String value)
throws IOException {
out.write("<!DOCTYPE " + root
+ " [\n<!ENTITY " + entity + " \"" + value + "\">\n]>");
}
private void declareSystemEntity(String root, String entity, String value)
throws IOException {
out.write("<!DOCTYPE " + root
+ " [\n<!ENTITY " + entity + " SYSTEM \"" + value + "\">\n]>");
}
public void testInputStreamResource() throws Exception { public void testInputStreamResource() throws Exception {
StringWriter writer = new StringWriter(); StringWriter writer = new StringWriter();
out = new BufferedWriter(writer); out = new BufferedWriter(writer);
@ -550,6 +575,63 @@ public void testIncludes() throws Exception {
tearDown(); tearDown();
} }
public void testCharsetInDocumentEncoding() throws Exception {
tearDown();
out=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(CONFIG),
StandardCharsets.ISO_8859_1));
writeHeader(StandardCharsets.ISO_8859_1.displayName());
writeConfiguration();
appendProperty("a", "b");
appendProperty("c", "Müller");
endConfig();
// verify that the includes file contains all properties
Path fileResource = new Path(CONFIG);
conf.addResource(fileResource);
assertEquals(conf.get("a"), "b");
assertEquals(conf.get("c"), "Müller");
tearDown();
}
public void testEntityReference() throws Exception {
tearDown();
out=new BufferedWriter(new FileWriter(CONFIG));
writeHeader();
declareEntity("configuration", "d", "d");
writeConfiguration();
appendProperty("a", "b");
appendProperty("c", "&d;");
endConfig();
// verify that the includes file contains all properties
Path fileResource = new Path(CONFIG);
conf.addResource(fileResource);
assertEquals(conf.get("a"), "b");
assertEquals(conf.get("c"), "d");
tearDown();
}
public void testSystemEntityReference() throws Exception {
tearDown();
out=new BufferedWriter(new FileWriter(CONFIG2));
out.write("d");
out.close();
out=new BufferedWriter(new FileWriter(CONFIG));
writeHeader();
declareSystemEntity("configuration", "d", CONFIG2);
writeConfiguration();
appendProperty("a", "b");
appendProperty("c", "&d;");
endConfig();
// verify that the includes file contains all properties
Path fileResource = new Path(CONFIG);
conf.addResource(fileResource);
assertEquals(conf.get("a"), "b");
assertEquals(conf.get("c"), "d");
tearDown();
}
public void testIncludesWithFallback() throws Exception { public void testIncludesWithFallback() throws Exception {
tearDown(); tearDown();
out=new BufferedWriter(new FileWriter(CONFIG2)); out=new BufferedWriter(new FileWriter(CONFIG2));

View File

@ -881,9 +881,9 @@
<version>3.1.4</version> <version>3.1.4</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml</groupId> <groupId>com.fasterxml.woodstox</groupId>
<artifactId>aalto-xml</artifactId> <artifactId>woodstox-core</artifactId>
<version>1.0.0</version> <version>5.0.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.codehaus.jackson</groupId> <groupId>org.codehaus.jackson</groupId>