From 637cf89e180a42fe85817738707c629bb5e56a4c Mon Sep 17 00:00:00 2001 From: Dhruba Borthakur Date: Tue, 16 Mar 2010 08:03:38 +0000 Subject: [PATCH] HADOOP-6591. HarFileSystem can handle paths with the whitespace characters. (Rodrigo Schmidt via dhruba) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@923619 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 3 +++ .../org/apache/hadoop/fs/HarFileSystem.java | 25 ++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 178b3ddf21..b9d07341fa 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -190,6 +190,9 @@ Trunk (unreleased changes) HADOOP-6486. fix common classes to work with Avro 1.3 reflection. (cutting via tomwhite) + HADOOP-6591. HarFileSystem can handle paths with the whitespace characters. + (Rodrigo Schmidt via dhruba) + OPTIMIZATIONS HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..). diff --git a/src/java/org/apache/hadoop/fs/HarFileSystem.java b/src/java/org/apache/hadoop/fs/HarFileSystem.java index 051c4433e2..584b653927 100644 --- a/src/java/org/apache/hadoop/fs/HarFileSystem.java +++ b/src/java/org/apache/hadoop/fs/HarFileSystem.java @@ -19,8 +19,10 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; +import java.net.URLDecoder; import java.util.ArrayList; import java.util.EnumSet; import java.util.List; @@ -45,7 +47,7 @@ */ public class HarFileSystem extends FilterFileSystem { - public static final int VERSION = 1; + public static final int VERSION = 2; // uri representation of this Har filesystem private URI uri; // the version of this har filesystem @@ -121,7 +123,8 @@ public void initialize(URI name, Configuration conf) throws IOException { throw new IOException("Unable to " + "read the version of the Har file system: " + this.archivePath); } - if (this.version != HarFileSystem.VERSION) { + // make it always backwards-compatible + if (this.version > HarFileSystem.VERSION) { throw new IOException("Invalid version " + this.version + " expected " + HarFileSystem.VERSION); } @@ -213,6 +216,15 @@ private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException { return tmp; } + private String decodeFileName(String fname) + throws UnsupportedEncodingException { + + if (version == 2){ + return URLDecoder.decode(fname, "UTF-8"); + } + return fname; + } + /** * return the top level archive. */ @@ -479,6 +491,7 @@ private String fileStatusInIndex(Path harPath) throws IOException { read += tmp; String lineFeed = line.toString(); String[] parsed = lineFeed.split(" "); + parsed[0] = decodeFileName(parsed[0]); if (harPath.compareTo(new Path(parsed[0])) == 0) { // bingo! retStr = lineFeed; @@ -502,16 +515,16 @@ private String fileStatusInIndex(Path harPath) throws IOException { // the format is of the form // filename "dir"/"file" partFileName startIndex length // - private static class HarStatus { + private class HarStatus { boolean isDir; String name; List children; String partName; long startIndex; long length; - public HarStatus(String harString) { + public HarStatus(String harString) throws UnsupportedEncodingException { String[] splits = harString.split(" "); - this.name = splits[0]; + this.name = decodeFileName(splits[0]); this.isDir = "dir".equals(splits[1]) ? true: false; // this is equal to "none" if its a directory this.partName = splits[2]; @@ -520,7 +533,7 @@ public HarStatus(String harString) { if (isDir) { children = new ArrayList(); for (int i = 5; i < splits.length; i++) { - children.add(splits[i]); + children.add(decodeFileName(splits[i])); } } }