HDFS-10778. Add -format option to make the output of FileDistribution processor human-readable in OfflineImageViewer.

This commit is contained in:
Akira Ajisaka 2016-09-08 15:13:43 +09:00
parent d355573f56
commit 63f594892e
7 changed files with 164 additions and 104 deletions

View File

@ -31,6 +31,7 @@
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
import org.apache.hadoop.util.LimitInputStream;
import org.apache.hadoop.util.StringUtils;
import com.google.common.base.Preconditions;
@ -75,11 +76,14 @@ final class FileDistributionCalculator {
private long totalSpace;
private long maxFileSize;
private boolean formatOutput = false;
FileDistributionCalculator(Configuration conf, long maxSize, int steps,
PrintStream out) {
boolean formatOutput, PrintStream out) {
this.conf = conf;
this.maxSize = maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize;
this.steps = steps == 0 ? INTERVAL_DEFAULT : steps;
this.formatOutput = formatOutput;
this.out = out;
long numIntervals = this.maxSize / this.steps;
// avoid OutOfMemoryError when allocating an array
@ -148,10 +152,20 @@ private void run(InputStream in) throws IOException {
private void output() {
// write the distribution into the output file
out.print("Size\tNumFiles\n");
out.print((formatOutput ? "Size Range" : "Size") + "\tNumFiles\n");
for (int i = 0; i < distribution.length; i++) {
if (distribution[i] != 0) {
out.print(((long) i * steps) + "\t" + distribution[i]);
if (formatOutput) {
out.print((i == 0 ? "[" : "(")
+ StringUtils.byteDesc(((long) (i == 0 ? 0 : i - 1) * steps))
+ ", "
+ StringUtils.byteDesc((long)
(i == distribution.length - 1 ? maxFileSize : i * steps))
+ "]\t" + distribution[i]);
} else {
out.print(((long) i * steps) + "\t" + distribution[i]);
}
out.print('\n');
}
}

View File

@ -20,6 +20,8 @@
import java.io.IOException;
import java.util.LinkedList;
import org.apache.hadoop.util.StringUtils;
/**
* File size distribution visitor.
*
@ -67,6 +69,7 @@ class FileDistributionVisitor extends TextWriterImageVisitor {
private FileContext current;
private boolean inInode = false;
private boolean formatOutput = false;
/**
* File or directory information.
@ -78,12 +81,12 @@ private static class FileContext {
int replication;
}
public FileDistributionVisitor(String filename,
long maxSize,
int step) throws IOException {
public FileDistributionVisitor(String filename, long maxSize, int step,
boolean formatOutput) throws IOException {
super(filename, false);
this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
this.step = (step == 0 ? INTERVAL_DEFAULT : step);
this.formatOutput = formatOutput;
long numIntervals = this.maxSize / this.step;
if(numIntervals >= Integer.MAX_VALUE)
throw new IOException("Too many distribution intervals " + numIntervals);
@ -113,9 +116,22 @@ void finishAbnormally() throws IOException {
private void output() throws IOException {
// write the distribution into the output file
write("Size\tNumFiles\n");
for(int i = 0; i < distribution.length; i++)
write(((long)i * step) + "\t" + distribution[i] + "\n");
write((formatOutput ? "Size Range" : "Size") + "\tNumFiles\n");
for (int i = 0; i < distribution.length; i++) {
if (distribution[i] > 0) {
if (formatOutput) {
write((i == 0 ? "[" : "(")
+ StringUtils.byteDesc(((long) (i == 0 ? 0 : i - 1) * step))
+ ", "
+ StringUtils.byteDesc((long)
(i == distribution.length - 1 ? maxFileSize : i * step))
+ "]\t"
+ distribution[i] + "\n");
} else {
write(((long) i * step) + "\t" + distribution[i] + "\n");
}
}
}
System.out.println("totalFiles = " + totalFiles);
System.out.println("totalDirectories = " + totalDirectories);
System.out.println("totalBlocks = " + totalBlocks);

View File

@ -46,61 +46,63 @@
public class OfflineImageViewer {
public static final Log LOG = LogFactory.getLog(OfflineImageViewer.class);
private final static String usage =
"Usage: bin/hdfs oiv_legacy [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" +
"Offline Image Viewer\n" +
"View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n" +
"saving the results in OUTPUTFILE.\n" +
"\n" +
"The oiv utility will attempt to parse correctly formed image files\n" +
"and will abort fail with mal-formed image files.\n" +
"\n" +
"The tool works offline and does not require a running cluster in\n" +
"order to process an image file.\n" +
"\n" +
"The following image processors are available:\n" +
" * Ls: The default image processor generates an lsr-style listing\n" +
" of the files in the namespace, with the same fields in the same\n" +
" order. Note that in order to correctly determine file sizes,\n" +
" this formatter cannot skip blocks and will override the\n" +
" -skipBlocks option.\n" +
" * Indented: This processor enumerates over all of the elements in\n" +
" the fsimage file, using levels of indentation to delineate\n" +
" sections within the file.\n" +
" * Delimited: Generate a text file with all of the elements common\n" +
" to both inodes and inodes-under-construction, separated by a\n" +
" delimiter. The default delimiter is \u0001, though this may be\n" +
" changed via the -delimiter argument. This processor also overrides\n" +
" the -skipBlocks option for the same reason as the Ls processor\n" +
" * XML: This processor creates an XML document with all elements of\n" +
" the fsimage enumerated, suitable for further analysis by XML\n" +
" tools.\n" +
" * FileDistribution: This processor analyzes the file size\n" +
" distribution in the image.\n" +
" -maxSize specifies the range [0, maxSize] of file sizes to be\n" +
" analyzed (128GB by default).\n" +
" -step defines the granularity of the distribution. (2MB by default)\n" +
" * NameDistribution: This processor analyzes the file names\n" +
" in the image and prints total number of file names and how frequently\n" +
" file names are reused.\n" +
"\n" +
"Required command line arguments:\n" +
"-i,--inputFile <arg> FSImage file to process.\n" +
"-o,--outputFile <arg> Name of output file. If the specified\n" +
" file exists, it will be overwritten.\n" +
"\n" +
"Optional command line arguments:\n" +
"-p,--processor <arg> Select which type of processor to apply\n" +
" against image file." +
" (Ls|XML|Delimited|Indented|FileDistribution).\n" +
"-h,--help Display usage information and exit\n" +
"-printToScreen For processors that write to a file, also\n" +
" output to screen. On large image files this\n" +
" will dramatically increase processing time.\n" +
"-skipBlocks Skip inodes' blocks information. May\n" +
" significantly decrease output.\n" +
" (default = false).\n" +
"-delimiter <arg> Delimiting string to use with Delimited processor\n";
private final static String usage =
"Usage: bin/hdfs oiv_legacy [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n"
+ "Offline Image Viewer\n"
+ "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n"
+ "saving the results in OUTPUTFILE.\n"
+ "\n"
+ "The oiv utility will attempt to parse correctly formed image files\n"
+ "and will abort fail with mal-formed image files.\n"
+ "\n"
+ "The tool works offline and does not require a running cluster in\n"
+ "order to process an image file.\n"
+ "\n"
+ "The following image processors are available:\n"
+ " * Ls: The default image processor generates an lsr-style listing\n"
+ " of the files in the namespace, with the same fields in the same\n"
+ " order. Note that in order to correctly determine file sizes,\n"
+ " this formatter cannot skip blocks and will override the\n"
+ " -skipBlocks option.\n"
+ " * Indented: This processor enumerates over all of the elements in\n"
+ " the fsimage file, using levels of indentation to delineate\n"
+ " sections within the file.\n"
+ " * Delimited: Generate a text file with all of the elements common\n"
+ " to both inodes and inodes-under-construction, separated by a\n"
+ " delimiter. The default delimiter is \u0001, though this may be\n"
+ " changed via the -delimiter argument. This processor also overrides\n"
+ " the -skipBlocks option for the same reason as the Ls processor\n"
+ " * XML: This processor creates an XML document with all elements of\n"
+ " the fsimage enumerated, suitable for further analysis by XML\n"
+ " tools.\n"
+ " * FileDistribution: This processor analyzes the file size\n"
+ " distribution in the image.\n"
+ " -maxSize specifies the range [0, maxSize] of file sizes to be\n"
+ " analyzed (128GB by default).\n"
+ " -step defines the granularity of the distribution. (2MB by default)\n"
+ " -format formats the output result in a human-readable fashion\n"
+ " rather than a number of bytes. (false by default)\n"
+ " * NameDistribution: This processor analyzes the file names\n"
+ " in the image and prints total number of file names and how frequently\n"
+ " file names are reused.\n"
+ "\n"
+ "Required command line arguments:\n"
+ "-i,--inputFile <arg> FSImage file to process.\n"
+ "-o,--outputFile <arg> Name of output file. If the specified\n"
+ " file exists, it will be overwritten.\n"
+ "\n"
+ "Optional command line arguments:\n"
+ "-p,--processor <arg> Select which type of processor to apply\n"
+ " against image file."
+ " (Ls|XML|Delimited|Indented|FileDistribution).\n"
+ "-h,--help Display usage information and exit\n"
+ "-printToScreen For processors that write to a file, also\n"
+ " output to screen. On large image files this\n"
+ " will dramatically increase processing time.\n"
+ "-skipBlocks Skip inodes' blocks information. May\n"
+ " significantly decrease output.\n"
+ " (default = false).\n"
+ "-delimiter <arg> Delimiting string to use with Delimited processor\n";
private final boolean skipBlocks;
private final String inputFile;
@ -188,6 +190,7 @@ public static Options buildOptions() {
options.addOption("h", "help", false, "");
options.addOption("maxSize", true, "");
options.addOption("step", true, "");
options.addOption("format", false, "");
options.addOption("skipBlocks", false, "");
options.addOption("printToScreen", false, "");
options.addOption("delimiter", true, "");
@ -253,7 +256,8 @@ public static void main(String[] args) throws IOException {
} else if (processor.equals("FileDistribution")) {
long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
v = new FileDistributionVisitor(outputFile, maxSize, step);
boolean formatOutput = cmd.hasOption("format");
v = new FileDistributionVisitor(outputFile, maxSize, step, formatOutput);
} else if (processor.equals("NameDistribution")) {
v = new NameDistributionVisitor(outputFile, printToScreen);
} else {

View File

@ -67,6 +67,8 @@ public class OfflineImageViewerPB {
+ " -maxSize specifies the range [0, maxSize] of file sizes to be\n"
+ " analyzed (128GB by default).\n"
+ " -step defines the granularity of the distribution. (2MB by default)\n"
+ " -format formats the output result in a human-readable fashion\n"
+ " rather than a number of bytes. (false by default)\n"
+ " * Web: Run a viewer to expose read-only WebHDFS API.\n"
+ " -addr specifies the address to listen. (localhost:5978 by default)\n"
+ " * Delimited (experimental): Generate a text file with all of the elements common\n"
@ -111,6 +113,7 @@ private static Options buildOptions() {
options.addOption("h", "help", false, "");
options.addOption("maxSize", true, "");
options.addOption("step", true, "");
options.addOption("format", false, "");
options.addOption("addr", true, "");
options.addOption("delimiter", true, "");
options.addOption("t", "temp", true, "");
@ -172,43 +175,44 @@ public static int run(String[] args) throws Exception {
try (PrintStream out = outputFile.equals("-") ?
System.out : new PrintStream(outputFile, "UTF-8")) {
switch (processor) {
case "FileDistribution":
long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
new FileDistributionCalculator(conf, maxSize, step, out).visit(
new RandomAccessFile(inputFile, "r"));
break;
case "XML":
new PBImageXmlWriter(conf, out).visit(
new RandomAccessFile(inputFile, "r"));
break;
case "ReverseXML":
try {
OfflineImageReconstructor.run(inputFile, outputFile);
} catch (Exception e) {
System.err.println("OfflineImageReconstructor failed: " +
e.getMessage());
e.printStackTrace(System.err);
System.exit(1);
}
break;
case "Web":
String addr = cmd.getOptionValue("addr", "localhost:5978");
try (WebImageViewer viewer = new WebImageViewer(
NetUtils.createSocketAddr(addr))) {
viewer.start(inputFile);
}
break;
case "Delimited":
try (PBImageDelimitedTextWriter writer =
new PBImageDelimitedTextWriter(out, delimiter, tempPath)) {
writer.visit(new RandomAccessFile(inputFile, "r"));
}
break;
default:
System.err.println("Invalid processor specified : " + processor);
printUsage();
return -1;
case "FileDistribution":
long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
boolean formatOutput = cmd.hasOption("format");
new FileDistributionCalculator(conf, maxSize, step, formatOutput, out)
.visit(new RandomAccessFile(inputFile, "r"));
break;
case "XML":
new PBImageXmlWriter(conf, out).visit(new RandomAccessFile(inputFile,
"r"));
break;
case "ReverseXML":
try {
OfflineImageReconstructor.run(inputFile, outputFile);
} catch (Exception e) {
System.err.println("OfflineImageReconstructor failed: "
+ e.getMessage());
e.printStackTrace(System.err);
System.exit(1);
}
break;
case "Web":
String addr = cmd.getOptionValue("addr", "localhost:5978");
try (WebImageViewer viewer =
new WebImageViewer(NetUtils.createSocketAddr(addr))) {
viewer.start(inputFile);
}
break;
case "Delimited":
try (PBImageDelimitedTextWriter writer =
new PBImageDelimitedTextWriter(out, delimiter, tempPath)) {
writer.visit(new RandomAccessFile(inputFile, "r"));
}
break;
default:
System.err.println("Invalid processor specified : " + processor);
printUsage();
return -1;
}
return 0;
} catch (EOFException e) {

View File

@ -239,6 +239,7 @@ Usage: `hdfs oiv [OPTIONS] -i INPUT_FILE`
| `-addr` *address* | Specify the address(host:port) to listen. (localhost:5978 by default). This option is used with Web processor. |
| `-maxSize` *size* | Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. |
| `-step` *size* | Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. |
| `-format` | Format the output result in a human-readable fashion rather than a number of bytes. (false by default). This option is used with FileDistribution processor. |
| `-delimiter` *arg* | Delimiting string to use with Delimited processor. |
| `-t`,`--temp` *temporary dir* | Use temporary dir to cache intermediate result to generate Delimited outputs. If not set, Delimited processor constructs the namespace in memory before outputting text. |
| `-h`,`--help` | Display the tool usage and help information and exit. |

View File

@ -150,6 +150,7 @@ Options
| `-addr` *address* | Specify the address(host:port) to listen. (localhost:5978 by default). This option is used with Web processor. |
| `-maxSize` *size* | Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. |
| `-step` *size* | Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. |
| `-format` | Format the output result in a human-readable fashion rather than a number of bytes. (false by default). This option is used with FileDistribution processor. |
| `-delimiter` *arg* | Delimiting string to use with Delimited processor. |
| `-t`\|`--temp` *temporary dir* | Use temporary dir to cache intermediate result to generate Delimited outputs. If not set, Delimited processor constructs the namespace in memory before outputting text. |
| `-h`\|`--help` | Display the tool usage and help information and exit. |

View File

@ -237,7 +237,7 @@ public void testTruncatedFSImage() throws IOException {
File truncatedFile = new File(tempDir, "truncatedFsImage");
PrintStream output = new PrintStream(NullOutputStream.NULL_OUTPUT_STREAM);
copyPartOfFile(originalFsimage, truncatedFile);
new FileDistributionCalculator(new Configuration(), 0, 0, output)
new FileDistributionCalculator(new Configuration(), 0, 0, false, output)
.visit(new RandomAccessFile(truncatedFile, "r"));
}
@ -259,7 +259,7 @@ private void copyPartOfFile(File src, File dest) throws IOException {
public void testFileDistributionCalculator() throws IOException {
ByteArrayOutputStream output = new ByteArrayOutputStream();
PrintStream o = new PrintStream(output);
new FileDistributionCalculator(new Configuration(), 0, 0, o)
new FileDistributionCalculator(new Configuration(), 0, 0, false, o)
.visit(new RandomAccessFile(originalFsimage, "r"));
o.close();
@ -620,4 +620,24 @@ public void testOfflineImageViewerMaxSizeAndStepOptions() throws Exception {
IOUtils.closeStream(out);
}
}
@Test
public void testOfflineImageViewerWithFormatOption() throws Exception {
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final PrintStream out = new PrintStream(bytes);
final PrintStream oldOut = System.out;
try {
System.setOut(out);
int status =
OfflineImageViewerPB.run(new String[] {"-i",
originalFsimage.getAbsolutePath(), "-o", "-", "-p",
"FileDistribution", "-maxSize", "512", "-step", "8",
"-format"});
assertEquals(0, status);
Assert.assertTrue(bytes.toString().contains("(0 B, 8 B]"));
} finally {
System.setOut(oldOut);
IOUtils.closeStream(out);
}
}
}