HADOOP-11698. Remove DistCpV1 and Logalyzer. Contributed by Brahma Reddy Battula.
This commit is contained in:
parent
ce53c8eb0c
commit
4aa730ce85
@ -26,6 +26,9 @@ Trunk (Unreleased)
|
||||
HADOOP-11627. Remove io.native.lib.available.
|
||||
(Brahma Reddy Battula via aajisaka)
|
||||
|
||||
HADOOP-11698. Remove DistCpV1 and Logalyzer.
|
||||
(Brahma Reddy Battula via aajisaka)
|
||||
|
||||
NEW FEATURES
|
||||
|
||||
HADOOP-6590. Add a username check for hadoop sub-commands (John Smith via
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,329 +0,0 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.tools;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Random;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configurable;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configuration.DeprecationDelta;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
import org.apache.hadoop.io.WritableComparator;
|
||||
import org.apache.hadoop.mapred.FileInputFormat;
|
||||
import org.apache.hadoop.mapred.FileOutputFormat;
|
||||
import org.apache.hadoop.mapred.JobClient;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.MapReduceBase;
|
||||
import org.apache.hadoop.mapred.Mapper;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.mapred.TextInputFormat;
|
||||
import org.apache.hadoop.mapred.TextOutputFormat;
|
||||
import org.apache.hadoop.mapred.lib.LongSumReducer;
|
||||
import org.apache.hadoop.mapreduce.lib.map.RegexMapper;
|
||||
|
||||
/**
|
||||
* Logalyzer: A utility tool for archiving and analyzing hadoop logs.
|
||||
* <p>
|
||||
* This tool supports archiving and anaylzing (sort/grep) of log-files.
|
||||
* It takes as input
|
||||
* a) Input uri which will serve uris of the logs to be archived.
|
||||
* b) Output directory (not mandatory).
|
||||
* b) Directory on dfs to archive the logs.
|
||||
* c) The sort/grep patterns for analyzing the files and separator for boundaries.
|
||||
* Usage:
|
||||
* Logalyzer -archive -archiveDir <directory to archive logs> -analysis
|
||||
* <directory> -logs <log-list uri> -grep <pattern> -sort
|
||||
* <col1, col2> -separator <separator>
|
||||
* <p>
|
||||
*/
|
||||
@Deprecated
|
||||
public class Logalyzer {
|
||||
// Constants
|
||||
private static Configuration fsConfig = new Configuration();
|
||||
public static final String SORT_COLUMNS =
|
||||
"logalizer.logcomparator.sort.columns";
|
||||
public static final String COLUMN_SEPARATOR =
|
||||
"logalizer.logcomparator.column.separator";
|
||||
|
||||
static {
|
||||
Configuration.addDeprecations(new DeprecationDelta[] {
|
||||
new DeprecationDelta("mapred.reducer.sort", SORT_COLUMNS),
|
||||
new DeprecationDelta("mapred.reducer.separator", COLUMN_SEPARATOR)
|
||||
});
|
||||
}
|
||||
|
||||
/** A {@link Mapper} that extracts text matching a regular expression. */
|
||||
public static class LogRegexMapper<K extends WritableComparable>
|
||||
extends MapReduceBase
|
||||
implements Mapper<K, Text, Text, LongWritable> {
|
||||
|
||||
private Pattern pattern;
|
||||
|
||||
public void configure(JobConf job) {
|
||||
pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
|
||||
}
|
||||
|
||||
public void map(K key, Text value,
|
||||
OutputCollector<Text, LongWritable> output,
|
||||
Reporter reporter)
|
||||
throws IOException {
|
||||
String text = value.toString();
|
||||
Matcher matcher = pattern.matcher(text);
|
||||
while (matcher.find()) {
|
||||
output.collect(value, new LongWritable(1));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** A WritableComparator optimized for UTF8 keys of the logs. */
|
||||
public static class LogComparator extends Text.Comparator implements Configurable {
|
||||
|
||||
private static Log LOG = LogFactory.getLog(Logalyzer.class);
|
||||
private JobConf conf = null;
|
||||
private String[] sortSpec = null;
|
||||
private String columnSeparator = null;
|
||||
|
||||
public void setConf(Configuration conf) {
|
||||
if (conf instanceof JobConf) {
|
||||
this.conf = (JobConf) conf;
|
||||
} else {
|
||||
this.conf = new JobConf(conf);
|
||||
}
|
||||
|
||||
//Initialize the specification for *comparision*
|
||||
String sortColumns = this.conf.get(SORT_COLUMNS, null);
|
||||
if (sortColumns != null) {
|
||||
sortSpec = sortColumns.split(",");
|
||||
}
|
||||
|
||||
//Column-separator
|
||||
columnSeparator = this.conf.get(COLUMN_SEPARATOR, "");
|
||||
}
|
||||
|
||||
public Configuration getConf() {
|
||||
return conf;
|
||||
}
|
||||
|
||||
public int compare(byte[] b1, int s1, int l1,
|
||||
byte[] b2, int s2, int l2) {
|
||||
|
||||
if (sortSpec == null) {
|
||||
return super.compare(b1, s1, l1, b2, s2, l2);
|
||||
}
|
||||
|
||||
try {
|
||||
Text logline1 = new Text();
|
||||
logline1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
|
||||
String line1 = logline1.toString();
|
||||
String[] logColumns1 = line1.split(columnSeparator);
|
||||
|
||||
Text logline2 = new Text();
|
||||
logline2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
|
||||
String line2 = logline2.toString();
|
||||
String[] logColumns2 = line2.split(columnSeparator);
|
||||
|
||||
if (logColumns1 == null || logColumns2 == null) {
|
||||
return super.compare(b1, s1, l1, b2, s2, l2);
|
||||
}
|
||||
|
||||
//Compare column-wise according to *sortSpec*
|
||||
for(int i=0; i < sortSpec.length; ++i) {
|
||||
int column = Integer.parseInt(sortSpec[i]);
|
||||
String c1 = logColumns1[column];
|
||||
String c2 = logColumns2[column];
|
||||
|
||||
//Compare columns
|
||||
int comparision = super.compareBytes(
|
||||
c1.getBytes(Charset.forName("UTF-8")), 0, c1.length(),
|
||||
c2.getBytes(Charset.forName("UTF-8")), 0, c2.length()
|
||||
);
|
||||
|
||||
//They differ!
|
||||
if (comparision != 0) {
|
||||
return comparision;
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IOException ioe) {
|
||||
LOG.fatal("Caught " + ioe);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static {
|
||||
// register this comparator
|
||||
WritableComparator.define(Text.class, new LogComparator());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* doArchive: Workhorse function to archive log-files.
|
||||
* @param logListURI : The uri which will serve list of log-files to archive.
|
||||
* @param archiveDirectory : The directory to store archived logfiles.
|
||||
* @throws IOException
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
public void
|
||||
doArchive(String logListURI, String archiveDirectory)
|
||||
throws IOException
|
||||
{
|
||||
String destURL = FileSystem.getDefaultUri(fsConfig) + archiveDirectory;
|
||||
DistCpV1.copy(new JobConf(fsConfig), logListURI, destURL, null, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* doAnalyze:
|
||||
* @param inputFilesDirectory : Directory containing the files to be analyzed.
|
||||
* @param outputDirectory : Directory to store analysis (output).
|
||||
* @param grepPattern : Pattern to *grep* for.
|
||||
* @param sortColumns : Sort specification for output.
|
||||
* @param columnSeparator : Column separator.
|
||||
* @throws IOException
|
||||
*/
|
||||
public void
|
||||
doAnalyze(String inputFilesDirectory, String outputDirectory,
|
||||
String grepPattern, String sortColumns, String columnSeparator)
|
||||
throws IOException
|
||||
{
|
||||
Path grepInput = new Path(inputFilesDirectory);
|
||||
|
||||
Path analysisOutput = null;
|
||||
if (outputDirectory.equals("")) {
|
||||
analysisOutput = new Path(inputFilesDirectory, "logalyzer_" +
|
||||
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
|
||||
} else {
|
||||
analysisOutput = new Path(outputDirectory);
|
||||
}
|
||||
|
||||
JobConf grepJob = new JobConf(fsConfig);
|
||||
grepJob.setJobName("logalyzer-grep-sort");
|
||||
|
||||
FileInputFormat.setInputPaths(grepJob, grepInput);
|
||||
grepJob.setInputFormat(TextInputFormat.class);
|
||||
|
||||
grepJob.setMapperClass(LogRegexMapper.class);
|
||||
grepJob.set(RegexMapper.PATTERN, grepPattern);
|
||||
grepJob.set(SORT_COLUMNS, sortColumns);
|
||||
grepJob.set(COLUMN_SEPARATOR, columnSeparator);
|
||||
|
||||
grepJob.setCombinerClass(LongSumReducer.class);
|
||||
grepJob.setReducerClass(LongSumReducer.class);
|
||||
|
||||
FileOutputFormat.setOutputPath(grepJob, analysisOutput);
|
||||
grepJob.setOutputFormat(TextOutputFormat.class);
|
||||
grepJob.setOutputKeyClass(Text.class);
|
||||
grepJob.setOutputValueClass(LongWritable.class);
|
||||
grepJob.setOutputKeyComparatorClass(LogComparator.class);
|
||||
|
||||
grepJob.setNumReduceTasks(1); // write a single file
|
||||
|
||||
JobClient.runJob(grepJob);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
Log LOG = LogFactory.getLog(Logalyzer.class);
|
||||
|
||||
String version = "Logalyzer.0.0.1";
|
||||
String usage = "Usage: Logalyzer [-archive -logs <urlsFile>] " +
|
||||
"-archiveDir <archiveDirectory> " +
|
||||
"-grep <pattern> -sort <column1,column2,...> -separator <separator> " +
|
||||
"-analysis <outputDirectory>";
|
||||
|
||||
System.out.println(version);
|
||||
if (args.length == 0) {
|
||||
System.err.println(usage);
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
//Command line arguments
|
||||
boolean archive = false;
|
||||
boolean grep = false;
|
||||
boolean sort = false;
|
||||
|
||||
String archiveDir = "";
|
||||
String logListURI = "";
|
||||
String grepPattern = ".*";
|
||||
String sortColumns = "";
|
||||
String columnSeparator = " ";
|
||||
String outputDirectory = "";
|
||||
|
||||
for (int i = 0; i < args.length; i++) { // parse command line
|
||||
if (args[i].equals("-archive")) {
|
||||
archive = true;
|
||||
} else if (args[i].equals("-archiveDir")) {
|
||||
archiveDir = args[++i];
|
||||
} else if (args[i].equals("-grep")) {
|
||||
grep = true;
|
||||
grepPattern = args[++i];
|
||||
} else if (args[i].equals("-logs")) {
|
||||
logListURI = args[++i];
|
||||
} else if (args[i].equals("-sort")) {
|
||||
sort = true;
|
||||
sortColumns = args[++i];
|
||||
} else if (args[i].equals("-separator")) {
|
||||
columnSeparator = args[++i];
|
||||
} else if (args[i].equals("-analysis")) {
|
||||
outputDirectory = args[++i];
|
||||
}
|
||||
}
|
||||
|
||||
LOG.info("analysisDir = " + outputDirectory);
|
||||
LOG.info("archiveDir = " + archiveDir);
|
||||
LOG.info("logListURI = " + logListURI);
|
||||
LOG.info("grepPattern = " + grepPattern);
|
||||
LOG.info("sortColumns = " + sortColumns);
|
||||
LOG.info("separator = " + columnSeparator);
|
||||
|
||||
try {
|
||||
Logalyzer logalyzer = new Logalyzer();
|
||||
|
||||
// Archive?
|
||||
if (archive) {
|
||||
logalyzer.doArchive(logListURI, archiveDir);
|
||||
}
|
||||
|
||||
// Analyze?
|
||||
if (grep || sort) {
|
||||
logalyzer.doAnalyze(archiveDir, outputDirectory, grepPattern, sortColumns, columnSeparator);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
ioe.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
} //main
|
||||
|
||||
} //class Logalyzer
|
File diff suppressed because it is too large
Load Diff
@ -1,133 +0,0 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.tools;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.EnumSet;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.CreateFlag;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileContext;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestLogalyzer {
|
||||
private static String EL = System.getProperty("line.separator");
|
||||
private static String TAB = "\t";
|
||||
private static final Log LOG = LogFactory.getLog(TestLogalyzer.class);
|
||||
|
||||
private static File workSpace = new File("target",
|
||||
TestLogalyzer.class.getName() + "-workSpace");
|
||||
private static File outdir = new File(workSpace.getAbsoluteFile()
|
||||
+ File.separator + "out");
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("deprecation")
|
||||
public void testLogalyzer() throws Exception {
|
||||
Path f = createLogFile();
|
||||
|
||||
String[] args = new String[10];
|
||||
|
||||
args[0] = "-archiveDir";
|
||||
args[1] = f.toString();
|
||||
args[2] = "-grep";
|
||||
args[3] = "44";
|
||||
args[4] = "-sort";
|
||||
args[5] = "0";
|
||||
args[6] = "-analysis";
|
||||
args[7] = outdir.getAbsolutePath();
|
||||
args[8] = "-separator";
|
||||
args[9] = " ";
|
||||
|
||||
Logalyzer.main(args);
|
||||
checkResult();
|
||||
|
||||
}
|
||||
|
||||
private void checkResult() throws Exception {
|
||||
File result = new File(outdir.getAbsolutePath() + File.separator
|
||||
+ "part-00000");
|
||||
File success = new File(outdir.getAbsolutePath() + File.separator
|
||||
+ "_SUCCESS");
|
||||
Assert.assertTrue(success.exists());
|
||||
|
||||
FileInputStream fis = new FileInputStream(result);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
|
||||
String line = br.readLine();
|
||||
Assert.assertTrue(("1 44" + TAB + "2").equals(line));
|
||||
line = br.readLine();
|
||||
|
||||
Assert.assertTrue(("3 44" + TAB + "1").equals(line));
|
||||
line = br.readLine();
|
||||
|
||||
Assert.assertTrue(("4 44" + TAB + "1").equals(line));
|
||||
|
||||
br.close();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create simple log file
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
|
||||
private Path createLogFile() throws IOException {
|
||||
|
||||
FileContext files = FileContext.getLocalFSFileContext();
|
||||
|
||||
Path ws = new Path(workSpace.getAbsoluteFile().getAbsolutePath());
|
||||
|
||||
files.delete(ws, true);
|
||||
Path workSpacePath = new Path(workSpace.getAbsolutePath(), "log");
|
||||
files.mkdir(workSpacePath, null, true);
|
||||
|
||||
LOG.info("create logfile.log");
|
||||
Path logfile1 = new Path(workSpacePath, "logfile.log");
|
||||
|
||||
FSDataOutputStream os = files.create(logfile1,
|
||||
EnumSet.of(CreateFlag.CREATE));
|
||||
os.writeBytes("4 3" + EL + "1 3" + EL + "4 44" + EL);
|
||||
os.writeBytes("2 3" + EL + "1 3" + EL + "0 45" + EL);
|
||||
os.writeBytes("4 3" + EL + "1 3" + EL + "1 44" + EL);
|
||||
|
||||
os.flush();
|
||||
os.close();
|
||||
LOG.info("create logfile1.log");
|
||||
|
||||
Path logfile2 = new Path(workSpacePath, "logfile1.log");
|
||||
|
||||
os = files.create(logfile2, EnumSet.of(CreateFlag.CREATE));
|
||||
os.writeBytes("4 3" + EL + "1 3" + EL + "3 44" + EL);
|
||||
os.writeBytes("2 3" + EL + "1 3" + EL + "0 45" + EL);
|
||||
os.writeBytes("4 3" + EL + "1 3" + EL + "1 44" + EL);
|
||||
|
||||
os.flush();
|
||||
os.close();
|
||||
|
||||
return workSpacePath;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user