From 5d180463dcb689fa3b7c69b097a86398a03b41ad Mon Sep 17 00:00:00 2001 From: David Mollitor Date: Wed, 15 Jan 2020 18:38:34 +0000 Subject: [PATCH] HADOOP-16790. Add Write Convenience Methods. Contributed by David Mollitor. This adds operations in FileUtil to write text to a file via either a FileSystem or FileContext instance. Change-Id: I5fe8fcf1bdbdbc734e137f922a75a822f2b88410 --- .../java/org/apache/hadoop/fs/FileUtil.java | 236 ++++++++++++++++++ .../org/apache/hadoop/fs/TestFileUtil.java | 178 +++++++++++++ 2 files changed, 414 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java index 4566686a12..7bc93f9bf5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java @@ -21,16 +21,20 @@ import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; +import java.io.OutputStreamWriter; import java.net.InetAddress; import java.net.URI; import java.net.UnknownHostException; import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.nio.file.AccessDeniedException; import java.nio.file.FileSystems; import java.nio.file.Files; @@ -38,6 +42,7 @@ import java.util.Enumeration; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -1633,4 +1638,235 @@ public static boolean compareFs(FileSystem srcFs, FileSystem destFs) { // check for ports return srcUri.getPort()==dstUri.getPort(); } + + /** + * Writes bytes to a file. This utility method opens the file for writing, + * creating the file if it does not exist, or overwrites an existing file. All + * bytes in the byte array are written to the file. + * + * @param fs the file system with which to create the file + * @param path the path to the file + * @param bytes the byte array with the bytes to write + * + * @return the file system + * + * @throws NullPointerException if any of the arguments are {@code null} + * @throws IOException if an I/O error occurs creating or writing to the file + */ + public static FileSystem write(final FileSystem fs, final Path path, + final byte[] bytes) throws IOException { + + Objects.requireNonNull(path); + Objects.requireNonNull(bytes); + + try (FSDataOutputStream out = fs.createFile(path).overwrite(true).build()) { + out.write(bytes); + } + + return fs; + } + + /** + * Writes bytes to a file. This utility method opens the file for writing, + * creating the file if it does not exist, or overwrites an existing file. All + * bytes in the byte array are written to the file. + * + * @param fileContext the file context with which to create the file + * @param path the path to the file + * @param bytes the byte array with the bytes to write + * + * @return the file context + * + * @throws NullPointerException if any of the arguments are {@code null} + * @throws IOException if an I/O error occurs creating or writing to the file + */ + public static FileContext write(final FileContext fileContext, + final Path path, final byte[] bytes) throws IOException { + + Objects.requireNonNull(path); + Objects.requireNonNull(bytes); + + try (FSDataOutputStream out = + fileContext.create(path).overwrite(true).build()) { + out.write(bytes); + } + + return fileContext; + } + + /** + * Write lines of text to a file. Each line is a char sequence and is written + * to the file in sequence with each line terminated by the platform's line + * separator, as defined by the system property {@code + * line.separator}. Characters are encoded into bytes using the specified + * charset. This utility method opens the file for writing, creating the file + * if it does not exist, or overwrites an existing file. + * + * @param fs the file system with which to create the file + * @param path the path to the file + * @param lines a Collection to iterate over the char sequences + * @param cs the charset to use for encoding + * + * @return the file system + * + * @throws NullPointerException if any of the arguments are {@code null} + * @throws IOException if an I/O error occurs creating or writing to the file + */ + public static FileSystem write(final FileSystem fs, final Path path, + final Iterable lines, final Charset cs) + throws IOException { + + Objects.requireNonNull(path); + Objects.requireNonNull(lines); + Objects.requireNonNull(cs); + + CharsetEncoder encoder = cs.newEncoder(); + try (FSDataOutputStream out = fs.createFile(path).overwrite(true).build(); + BufferedWriter writer = + new BufferedWriter(new OutputStreamWriter(out, encoder))) { + for (CharSequence line : lines) { + writer.append(line); + writer.newLine(); + } + } + return fs; + } + + /** + * Write lines of text to a file. Each line is a char sequence and is written + * to the file in sequence with each line terminated by the platform's line + * separator, as defined by the system property {@code + * line.separator}. Characters are encoded into bytes using the specified + * charset. This utility method opens the file for writing, creating the file + * if it does not exist, or overwrites an existing file. + * + * @param fileContext the file context with which to create the file + * @param path the path to the file + * @param lines a Collection to iterate over the char sequences + * @param cs the charset to use for encoding + * + * @return the file context + * + * @throws NullPointerException if any of the arguments are {@code null} + * @throws IOException if an I/O error occurs creating or writing to the file + */ + public static FileContext write(final FileContext fileContext, + final Path path, final Iterable lines, + final Charset cs) throws IOException { + + Objects.requireNonNull(path); + Objects.requireNonNull(lines); + Objects.requireNonNull(cs); + + CharsetEncoder encoder = cs.newEncoder(); + try (FSDataOutputStream out = fileContext.create(path).overwrite(true).build(); + BufferedWriter writer = + new BufferedWriter(new OutputStreamWriter(out, encoder))) { + for (CharSequence line : lines) { + writer.append(line); + writer.newLine(); + } + } + return fileContext; + } + + /** + * Write a line of text to a file. Characters are encoded into bytes using the + * specified charset. This utility method opens the file for writing, creating + * the file if it does not exist, or overwrites an existing file. + * + * @param fs the file system with which to create the file + * @param path the path to the file + * @param charseq the char sequence to write to the file + * @param cs the charset to use for encoding + * + * @return the file system + * + * @throws NullPointerException if any of the arguments are {@code null} + * @throws IOException if an I/O error occurs creating or writing to the file + */ + public static FileSystem write(final FileSystem fs, final Path path, + final CharSequence charseq, final Charset cs) throws IOException { + + Objects.requireNonNull(path); + Objects.requireNonNull(charseq); + Objects.requireNonNull(cs); + + CharsetEncoder encoder = cs.newEncoder(); + try (FSDataOutputStream out = fs.createFile(path).overwrite(true).build(); + BufferedWriter writer = + new BufferedWriter(new OutputStreamWriter(out, encoder))) { + writer.append(charseq); + } + return fs; + } + + /** + * Write a line of text to a file. Characters are encoded into bytes using the + * specified charset. This utility method opens the file for writing, creating + * the file if it does not exist, or overwrites an existing file. + * + * @param FileContext the file context with which to create the file + * @param path the path to the file + * @param charseq the char sequence to write to the file + * @param cs the charset to use for encoding + * + * @return the file context + * + * @throws NullPointerException if any of the arguments are {@code null} + * @throws IOException if an I/O error occurs creating or writing to the file + */ + public static FileContext write(final FileContext fs, final Path path, + final CharSequence charseq, final Charset cs) throws IOException { + + Objects.requireNonNull(path); + Objects.requireNonNull(charseq); + Objects.requireNonNull(cs); + + CharsetEncoder encoder = cs.newEncoder(); + try (FSDataOutputStream out = fs.create(path).overwrite(true).build(); + BufferedWriter writer = + new BufferedWriter(new OutputStreamWriter(out, encoder))) { + writer.append(charseq); + } + return fs; + } + + /** + * Write a line of text to a file. Characters are encoded into bytes using + * UTF-8. This utility method opens the file for writing, creating the file if + * it does not exist, or overwrites an existing file. + * + * @param fs the files system with which to create the file + * @param path the path to the file + * @param charseq the char sequence to write to the file + * + * @return the file system + * + * @throws NullPointerException if any of the arguments are {@code null} + * @throws IOException if an I/O error occurs creating or writing to the file + */ + public static FileSystem write(final FileSystem fs, final Path path, + final CharSequence charseq) throws IOException { + return write(fs, path, charseq, StandardCharsets.UTF_8); + } + + /** + * Write a line of text to a file. Characters are encoded into bytes using + * UTF-8. This utility method opens the file for writing, creating the file if + * it does not exist, or overwrites an existing file. + * + * @param fileContext the files system with which to create the file + * @param path the path to the file + * @param charseq the char sequence to write to the file + * + * @return the file context + * + * @throws NullPointerException if any of the arguments are {@code null} + * @throws IOException if an I/O error occurs creating or writing to the file + */ + public static FileContext write(final FileContext fileContext, + final Path path, final CharSequence charseq) throws IOException { + return write(fileContext, path, charseq, StandardCharsets.UTF_8); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java index 5d22a6a2a4..eb9098e419 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; @@ -44,6 +45,7 @@ import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.jar.Attributes; @@ -1493,6 +1495,182 @@ public void testReadSymlinkWithAFileAsInput() throws IOException { file.delete(); } + /** + * Test that bytes are written out correctly to the local file system. + */ + @Test + public void testWriteBytesFileSystem() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.get(uri, conf); + Path testPath = new Path(new Path(uri), "writebytes.out"); + + byte[] write = new byte[] {0x00, 0x01, 0x02, 0x03}; + + FileUtil.write(fs, testPath, write); + + byte[] read = FileUtils.readFileToByteArray(new File(testPath.toUri())); + + assertArrayEquals(write, read); + } + + /** + * Test that a Collection of Strings are written out correctly to the local + * file system. + */ + @Test + public void testWriteStringsFileSystem() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.get(uri, conf); + Path testPath = new Path(new Path(uri), "writestrings.out"); + + Collection write = Arrays.asList("over", "the", "lazy", "dog"); + + FileUtil.write(fs, testPath, write, StandardCharsets.UTF_8); + + List read = + FileUtils.readLines(new File(testPath.toUri()), StandardCharsets.UTF_8); + + assertEquals(write, read); + } + + /** + * Test that a String is written out correctly to the local file system. + */ + @Test + public void testWriteStringFileSystem() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.get(uri, conf); + Path testPath = new Path(new Path(uri), "writestring.out"); + + String write = "A" + "\u00ea" + "\u00f1" + "\u00fc" + "C"; + + FileUtil.write(fs, testPath, write, StandardCharsets.UTF_8); + + String read = FileUtils.readFileToString(new File(testPath.toUri()), + StandardCharsets.UTF_8); + + assertEquals(write, read); + } + + /** + * Test that a String is written out correctly to the local file system + * without specifying a character set. + */ + @Test + public void testWriteStringNoCharSetFileSystem() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.get(uri, conf); + Path testPath = new Path(new Path(uri), "writestring.out"); + + String write = "A" + "\u00ea" + "\u00f1" + "\u00fc" + "C"; + FileUtil.write(fs, testPath, write); + + String read = FileUtils.readFileToString(new File(testPath.toUri()), + StandardCharsets.UTF_8); + + assertEquals(write, read); + } + + /** + * Test that bytes are written out correctly to the local file system. + */ + @Test + public void testWriteBytesFileContext() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileContext fc = FileContext.getFileContext(uri, conf); + Path testPath = new Path(new Path(uri), "writebytes.out"); + + byte[] write = new byte[] {0x00, 0x01, 0x02, 0x03}; + + FileUtil.write(fc, testPath, write); + + byte[] read = FileUtils.readFileToByteArray(new File(testPath.toUri())); + + assertArrayEquals(write, read); + } + + /** + * Test that a Collection of Strings are written out correctly to the local + * file system. + */ + @Test + public void testWriteStringsFileContext() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileContext fc = FileContext.getFileContext(uri, conf); + Path testPath = new Path(new Path(uri), "writestrings.out"); + + Collection write = Arrays.asList("over", "the", "lazy", "dog"); + + FileUtil.write(fc, testPath, write, StandardCharsets.UTF_8); + + List read = + FileUtils.readLines(new File(testPath.toUri()), StandardCharsets.UTF_8); + + assertEquals(write, read); + } + + /** + * Test that a String is written out correctly to the local file system. + */ + @Test + public void testWriteStringFileContext() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileContext fc = FileContext.getFileContext(uri, conf); + Path testPath = new Path(new Path(uri), "writestring.out"); + + String write = "A" + "\u00ea" + "\u00f1" + "\u00fc" + "C"; + + FileUtil.write(fc, testPath, write, StandardCharsets.UTF_8); + + String read = FileUtils.readFileToString(new File(testPath.toUri()), + StandardCharsets.UTF_8); + + assertEquals(write, read); + } + + /** + * Test that a String is written out correctly to the local file system + * without specifying a character set. + */ + @Test + public void testWriteStringNoCharSetFileContext() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileContext fc = FileContext.getFileContext(uri, conf); + Path testPath = new Path(new Path(uri), "writestring.out"); + + String write = "A" + "\u00ea" + "\u00f1" + "\u00fc" + "C"; + FileUtil.write(fc, testPath, write); + + String read = FileUtils.readFileToString(new File(testPath.toUri()), + StandardCharsets.UTF_8); + + assertEquals(write, read); + } + /** * The size of FileSystem cache. */