YARN-4612. Fix rumen and scheduler load simulator handle killed tasks
properly. Contributed by Ming Ma.
This commit is contained in:
parent
d323639686
commit
4efdf3a979
@ -473,9 +473,12 @@ private void processTaskFailedEvent(TaskFailedEvent event) {
|
||||
task.setTaskStatus(getPre21Value(event.getTaskStatus()));
|
||||
TaskFailed t = (TaskFailed)(event.getDatum());
|
||||
task.putDiagnosticInfo(t.error.toString());
|
||||
task.putFailedDueToAttemptId(t.failedDueToAttempt.toString());
|
||||
// killed task wouldn't have failed attempt.
|
||||
if (t.getFailedDueToAttempt() != null) {
|
||||
task.putFailedDueToAttemptId(t.getFailedDueToAttempt().toString());
|
||||
}
|
||||
org.apache.hadoop.mapreduce.jobhistory.JhCounters counters =
|
||||
((TaskFailed) event.getDatum()).counters;
|
||||
((TaskFailed) event.getDatum()).getCounters();
|
||||
task.incorporateCounters(
|
||||
counters == null ? EMPTY_COUNTERS : counters);
|
||||
}
|
||||
@ -500,7 +503,7 @@ private void processTaskAttemptUnsuccessfulCompletionEvent(
|
||||
|
||||
attempt.setFinishTime(event.getFinishTime());
|
||||
org.apache.hadoop.mapreduce.jobhistory.JhCounters counters =
|
||||
((TaskAttemptUnsuccessfulCompletion) event.getDatum()).counters;
|
||||
((TaskAttemptUnsuccessfulCompletion) event.getDatum()).getCounters();
|
||||
attempt.incorporateCounters(
|
||||
counters == null ? EMPTY_COUNTERS : counters);
|
||||
attempt.arraySetClockSplits(event.getClockSplits());
|
||||
@ -509,7 +512,7 @@ private void processTaskAttemptUnsuccessfulCompletionEvent(
|
||||
attempt.arraySetPhysMemKbytes(event.getPhysMemKbytes());
|
||||
TaskAttemptUnsuccessfulCompletion t =
|
||||
(TaskAttemptUnsuccessfulCompletion) (event.getDatum());
|
||||
attempt.putDiagnosticInfo(t.error.toString());
|
||||
attempt.putDiagnosticInfo(t.getError().toString());
|
||||
}
|
||||
|
||||
private void processTaskAttemptStartedEvent(TaskAttemptStartedEvent event) {
|
||||
|
@ -10208,4 +10208,610 @@
|
||||
"clusterReduceMB" : -1,
|
||||
"jobMapMB" : 200,
|
||||
"jobReduceMB" : 200
|
||||
} {
|
||||
"priority" : "NORMAL",
|
||||
"jobID" : "job_1369942127770_1207",
|
||||
"user" : "jenkins",
|
||||
"jobName" : "TeraGen",
|
||||
"submitTime" : 1371223054499,
|
||||
"finishTime" : 1371223153874,
|
||||
"queue" : "sls_queue_1",
|
||||
"mapTasks" : [ {
|
||||
"startTime" : 1371223059053,
|
||||
"taskID" : "task_1369942127770_1207_m_000000",
|
||||
"taskType" : "MAP",
|
||||
"finishTime" : 1371223078206,
|
||||
"attempts" : [ ],
|
||||
"preferredLocations" : [ ],
|
||||
"taskStatus" : "KILLED",
|
||||
"inputBytes" : -1,
|
||||
"inputRecords" : -1,
|
||||
"outputBytes" : -1,
|
||||
"outputRecords" : -1
|
||||
} ],
|
||||
"reduceTasks" : [ ],
|
||||
"launchTime" : 1371223058937,
|
||||
"totalMaps" : 1,
|
||||
"totalReduces" : 0,
|
||||
"otherTasks" : [ ],
|
||||
"jobProperties" : {
|
||||
"mapreduce.job.ubertask.enable" : "false",
|
||||
"yarn.resourcemanager.max-completed-applications" : "10000",
|
||||
"yarn.resourcemanager.delayed.delegation-token.removal-interval-ms" : "30000",
|
||||
"mapreduce.client.submit.file.replication" : "2",
|
||||
"yarn.nodemanager.container-manager.thread-count" : "20",
|
||||
"mapred.queue.default.acl-administer-jobs" : "*",
|
||||
"dfs.image.transfer.bandwidthPerSec" : "0",
|
||||
"mapreduce.tasktracker.healthchecker.interval" : "60000",
|
||||
"mapreduce.jobtracker.staging.root.dir" : "/user",
|
||||
"yarn.resourcemanager.recovery.enabled" : "false",
|
||||
"yarn.resourcemanager.am.max-retries" : "1",
|
||||
"dfs.block.access.token.lifetime" : "600",
|
||||
"fs.AbstractFileSystem.file.impl" : "org.apache.hadoop.fs.local.LocalFs",
|
||||
"mapreduce.client.completion.pollinterval" : "5000",
|
||||
"mapreduce.job.ubertask.maxreduces" : "1",
|
||||
"mapreduce.reduce.shuffle.memory.limit.percent" : "0.25",
|
||||
"dfs.domain.socket.path" : "/var/run/hdfs-sockets/dn",
|
||||
"hadoop.ssl.keystores.factory.class" : "org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory",
|
||||
"hadoop.http.authentication.kerberos.keytab" : "${user.home}/hadoop.keytab",
|
||||
"yarn.nodemanager.keytab" : "/etc/krb5.keytab",
|
||||
"io.seqfile.sorter.recordlimit" : "1000000",
|
||||
"s3.blocksize" : "67108864",
|
||||
"mapreduce.task.io.sort.factor" : "10",
|
||||
"yarn.nodemanager.disk-health-checker.interval-ms" : "120000",
|
||||
"mapreduce.job.working.dir" : "hdfs://a2115.smile.com:8020/user/jenkins",
|
||||
"yarn.admin.acl" : "*",
|
||||
"mapreduce.job.speculative.speculativecap" : "0.1",
|
||||
"dfs.namenode.num.checkpoints.retained" : "2",
|
||||
"dfs.namenode.delegation.token.renew-interval" : "86400000",
|
||||
"yarn.nodemanager.resource.memory-mb" : "8192",
|
||||
"io.map.index.interval" : "128",
|
||||
"s3.client-write-packet-size" : "65536",
|
||||
"mapreduce.task.files.preserve.failedtasks" : "false",
|
||||
"dfs.namenode.http-address" : "a2115.smile.com:20101",
|
||||
"ha.zookeeper.session-timeout.ms" : "5000",
|
||||
"hadoop.hdfs.configuration.version" : "1",
|
||||
"s3.replication" : "3",
|
||||
"dfs.datanode.balance.bandwidthPerSec" : "1048576",
|
||||
"mapreduce.reduce.shuffle.connect.timeout" : "180000",
|
||||
"hadoop.ssl.enabled" : "false",
|
||||
"dfs.journalnode.rpc-address" : "0.0.0.0:8485",
|
||||
"yarn.nodemanager.aux-services" : "mapreduce.shuffle",
|
||||
"mapreduce.job.counters.max" : "120",
|
||||
"dfs.datanode.readahead.bytes" : "4193404",
|
||||
"ipc.client.connect.max.retries.on.timeouts" : "45",
|
||||
"mapreduce.job.complete.cancel.delegation.tokens" : "true",
|
||||
"dfs.client.failover.max.attempts" : "15",
|
||||
"dfs.namenode.checkpoint.dir" : "file://${hadoop.tmp.dir}/dfs/namesecondary",
|
||||
"dfs.namenode.replication.work.multiplier.per.iteration" : "2",
|
||||
"fs.trash.interval" : "1",
|
||||
"yarn.resourcemanager.admin.address" : "a2115.smile.com:8033",
|
||||
"ha.health-monitor.check-interval.ms" : "1000",
|
||||
"mapreduce.job.outputformat.class" : "org.apache.hadoop.examples.terasort.TeraOutputFormat",
|
||||
"hadoop.jetty.logs.serve.aliases" : "true",
|
||||
"hadoop.http.authentication.kerberos.principal" : "HTTP/_HOST@LOCALHOST",
|
||||
"mapreduce.job.reduce.shuffle.consumer.plugin.class" : "org.apache.hadoop.mapreduce.task.reduce.Shuffle",
|
||||
"s3native.blocksize" : "67108864",
|
||||
"dfs.namenode.edits.dir" : "${dfs.namenode.name.dir}",
|
||||
"ha.health-monitor.sleep-after-disconnect.ms" : "1000",
|
||||
"dfs.encrypt.data.transfer" : "false",
|
||||
"dfs.datanode.http.address" : "0.0.0.0:50075",
|
||||
"mapreduce.terasort.num-rows" : "400000000",
|
||||
"mapreduce.job.map.class" : "org.apache.hadoop.examples.terasort.TeraGen$SortGenMapper",
|
||||
"mapreduce.jobtracker.jobhistory.task.numberprogresssplits" : "12",
|
||||
"dfs.namenode.write.stale.datanode.ratio" : "0.5f",
|
||||
"dfs.client.use.datanode.hostname" : "false",
|
||||
"yarn.acl.enable" : "true",
|
||||
"hadoop.security.instrumentation.requires.admin" : "false",
|
||||
"yarn.nodemanager.localizer.fetch.thread-count" : "4",
|
||||
"hadoop.security.authorization" : "false",
|
||||
"user.name" : "jenkins",
|
||||
"dfs.namenode.fs-limits.min-block-size" : "1048576",
|
||||
"dfs.client.failover.connection.retries.on.timeouts" : "0",
|
||||
"hadoop.security.group.mapping.ldap.search.filter.group" : "(objectClass=group)",
|
||||
"mapreduce.output.fileoutputformat.compress.codec" : "org.apache.hadoop.io.compress.DefaultCodec",
|
||||
"dfs.namenode.safemode.extension" : "30000",
|
||||
"mapreduce.shuffle.port" : "8080",
|
||||
"mapreduce.reduce.log.level" : "INFO",
|
||||
"yarn.log-aggregation-enable" : "false",
|
||||
"dfs.datanode.sync.behind.writes" : "false",
|
||||
"mapreduce.jobtracker.instrumentation" : "org.apache.hadoop.mapred.JobTrackerMetricsInst",
|
||||
"dfs.https.server.keystore.resource" : "ssl-server.xml",
|
||||
"hadoop.security.group.mapping.ldap.search.attr.group.name" : "cn",
|
||||
"dfs.namenode.replication.min" : "1",
|
||||
"mapreduce.map.java.opts" : " -Xmx825955249",
|
||||
"yarn.scheduler.fair.allocation.file" : "/etc/yarn/fair-scheduler.xml",
|
||||
"s3native.bytes-per-checksum" : "512",
|
||||
"mapreduce.tasktracker.tasks.sleeptimebeforesigkill" : "5000",
|
||||
"tfile.fs.output.buffer.size" : "262144",
|
||||
"yarn.nodemanager.local-dirs" : "${hadoop.tmp.dir}/nm-local-dir",
|
||||
"mapreduce.jobtracker.persist.jobstatus.active" : "false",
|
||||
"fs.AbstractFileSystem.hdfs.impl" : "org.apache.hadoop.fs.Hdfs",
|
||||
"mapreduce.job.map.output.collector.class" : "org.apache.hadoop.mapred.MapTask$MapOutputBuffer",
|
||||
"mapreduce.tasktracker.local.dir.minspacestart" : "0",
|
||||
"dfs.namenode.safemode.min.datanodes" : "0",
|
||||
"hadoop.security.uid.cache.secs" : "14400",
|
||||
"dfs.client.https.need-auth" : "false",
|
||||
"dfs.client.write.exclude.nodes.cache.expiry.interval.millis" : "600000",
|
||||
"dfs.client.https.keystore.resource" : "ssl-client.xml",
|
||||
"dfs.namenode.max.objects" : "0",
|
||||
"hadoop.ssl.client.conf" : "ssl-client.xml",
|
||||
"dfs.namenode.safemode.threshold-pct" : "0.999f",
|
||||
"mapreduce.tasktracker.local.dir.minspacekill" : "0",
|
||||
"mapreduce.jobtracker.retiredjobs.cache.size" : "1000",
|
||||
"dfs.blocksize" : "134217728",
|
||||
"yarn.resourcemanager.scheduler.class" : "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler",
|
||||
"mapreduce.job.reduce.slowstart.completedmaps" : "0.8",
|
||||
"mapreduce.job.end-notification.retry.attempts" : "5",
|
||||
"mapreduce.job.inputformat.class" : "org.apache.hadoop.examples.terasort.TeraGen$RangeInputFormat",
|
||||
"mapreduce.map.memory.mb" : "1024",
|
||||
"mapreduce.job.user.name" : "jenkins",
|
||||
"mapreduce.tasktracker.outofband.heartbeat" : "false",
|
||||
"io.native.lib.available" : "true",
|
||||
"mapreduce.jobtracker.persist.jobstatus.hours" : "0",
|
||||
"dfs.client-write-packet-size" : "65536",
|
||||
"mapreduce.client.progressmonitor.pollinterval" : "1000",
|
||||
"dfs.namenode.name.dir" : "file://${hadoop.tmp.dir}/dfs/name",
|
||||
"dfs.ha.log-roll.period" : "120",
|
||||
"mapreduce.reduce.input.buffer.percent" : "0.0",
|
||||
"mapreduce.map.output.compress.codec" : "org.apache.hadoop.io.compress.SnappyCodec",
|
||||
"dfs.client.failover.sleep.base.millis" : "500",
|
||||
"dfs.datanode.directoryscan.threads" : "1",
|
||||
"mapreduce.jobtracker.address" : "neededForHive:999999",
|
||||
"mapreduce.cluster.local.dir" : "${hadoop.tmp.dir}/mapred/local",
|
||||
"yarn.scheduler.fair.user-as-default-queue" : "true",
|
||||
"mapreduce.job.application.attempt.id" : "1",
|
||||
"dfs.permissions.enabled" : "true",
|
||||
"mapreduce.tasktracker.taskcontroller" : "org.apache.hadoop.mapred.DefaultTaskController",
|
||||
"yarn.scheduler.fair.preemption" : "true",
|
||||
"mapreduce.reduce.shuffle.parallelcopies" : "5",
|
||||
"dfs.support.append" : "true",
|
||||
"yarn.nodemanager.env-whitelist" : "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME",
|
||||
"mapreduce.jobtracker.heartbeats.in.second" : "100",
|
||||
"mapreduce.job.maxtaskfailures.per.tracker" : "3",
|
||||
"ipc.client.connection.maxidletime" : "10000",
|
||||
"mapreduce.shuffle.ssl.enabled" : "false",
|
||||
"dfs.namenode.invalidate.work.pct.per.iteration" : "0.32f",
|
||||
"dfs.blockreport.intervalMsec" : "21600000",
|
||||
"fs.s3.sleepTimeSeconds" : "10",
|
||||
"dfs.namenode.replication.considerLoad" : "true",
|
||||
"dfs.client.block.write.retries" : "3",
|
||||
"hadoop.ssl.server.conf" : "ssl-server.xml",
|
||||
"dfs.namenode.name.dir.restore" : "false",
|
||||
"rpc.engine.org.apache.hadoop.mapreduce.v2.api.MRClientProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
|
||||
"dfs.datanode.hdfs-blocks-metadata.enabled" : "true",
|
||||
"ha.zookeeper.parent-znode" : "/hadoop-ha",
|
||||
"io.seqfile.lazydecompress" : "true",
|
||||
"mapreduce.reduce.merge.inmem.threshold" : "1000",
|
||||
"mapreduce.input.fileinputformat.split.minsize" : "0",
|
||||
"dfs.replication" : "3",
|
||||
"ipc.client.tcpnodelay" : "false",
|
||||
"dfs.namenode.accesstime.precision" : "3600000",
|
||||
"s3.stream-buffer-size" : "4096",
|
||||
"mapreduce.jobtracker.tasktracker.maxblacklists" : "4",
|
||||
"dfs.client.read.shortcircuit.skip.checksum" : "false",
|
||||
"mapreduce.job.jvm.numtasks" : "1",
|
||||
"mapreduce.task.io.sort.mb" : "100",
|
||||
"io.file.buffer.size" : "65536",
|
||||
"dfs.namenode.audit.loggers" : "default",
|
||||
"dfs.namenode.checkpoint.txns" : "1000000",
|
||||
"yarn.nodemanager.admin-env" : "MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX",
|
||||
"mapreduce.job.jar" : "/user/jenkins/.staging/job_1369942127770_1207/job.jar",
|
||||
"mapreduce.job.split.metainfo.maxsize" : "10000000",
|
||||
"kfs.replication" : "3",
|
||||
"rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
|
||||
"yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms" : "1000",
|
||||
"mapreduce.reduce.maxattempts" : "4",
|
||||
"kfs.stream-buffer-size" : "4096",
|
||||
"dfs.ha.tail-edits.period" : "60",
|
||||
"hadoop.security.authentication" : "simple",
|
||||
"fs.s3.buffer.dir" : "${hadoop.tmp.dir}/s3",
|
||||
"rpc.engine.org.apache.hadoop.yarn.api.AMRMProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
|
||||
"mapreduce.jobtracker.taskscheduler" : "org.apache.hadoop.mapred.JobQueueTaskScheduler",
|
||||
"yarn.app.mapreduce.am.job.task.listener.thread-count" : "30",
|
||||
"dfs.namenode.avoid.read.stale.datanode" : "false",
|
||||
"mapreduce.job.reduces" : "0",
|
||||
"mapreduce.map.sort.spill.percent" : "0.8",
|
||||
"dfs.client.file-block-storage-locations.timeout" : "60",
|
||||
"dfs.datanode.drop.cache.behind.writes" : "false",
|
||||
"mapreduce.job.end-notification.retry.interval" : "1",
|
||||
"mapreduce.job.maps" : "96",
|
||||
"mapreduce.job.speculative.slownodethreshold" : "1.0",
|
||||
"tfile.fs.input.buffer.size" : "262144",
|
||||
"mapreduce.map.speculative" : "false",
|
||||
"dfs.block.access.token.enable" : "false",
|
||||
"dfs.journalnode.http-address" : "0.0.0.0:8480",
|
||||
"mapreduce.job.acl-view-job" : " ",
|
||||
"mapreduce.reduce.shuffle.retry-delay.max.ms" : "60000",
|
||||
"mapreduce.job.end-notification.max.retry.interval" : "5",
|
||||
"ftp.blocksize" : "67108864",
|
||||
"mapreduce.tasktracker.http.threads" : "80",
|
||||
"mapreduce.reduce.java.opts" : " -Xmx825955249",
|
||||
"dfs.datanode.data.dir" : "file://${hadoop.tmp.dir}/dfs/data",
|
||||
"ha.failover-controller.cli-check.rpc-timeout.ms" : "20000",
|
||||
"dfs.namenode.max.extra.edits.segments.retained" : "10000",
|
||||
"dfs.https.port" : "20102",
|
||||
"dfs.namenode.replication.interval" : "3",
|
||||
"mapreduce.task.skip.start.attempts" : "2",
|
||||
"dfs.namenode.https-address" : "a2115.smile.com:20102",
|
||||
"mapreduce.jobtracker.persist.jobstatus.dir" : "/jobtracker/jobsInfo",
|
||||
"ipc.client.kill.max" : "10",
|
||||
"dfs.ha.automatic-failover.enabled" : "false",
|
||||
"mapreduce.jobhistory.keytab" : "/etc/security/keytab/jhs.service.keytab",
|
||||
"dfs.image.transfer.timeout" : "600000",
|
||||
"dfs.client.failover.sleep.max.millis" : "15000",
|
||||
"mapreduce.job.end-notification.max.attempts" : "5",
|
||||
"mapreduce.task.tmp.dir" : "./tmp",
|
||||
"dfs.default.chunk.view.size" : "32768",
|
||||
"kfs.bytes-per-checksum" : "512",
|
||||
"mapreduce.reduce.memory.mb" : "1024",
|
||||
"hadoop.http.filter.initializers" : "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer",
|
||||
"dfs.datanode.failed.volumes.tolerated" : "0",
|
||||
"hadoop.http.authentication.type" : "simple",
|
||||
"dfs.datanode.data.dir.perm" : "700",
|
||||
"yarn.resourcemanager.client.thread-count" : "50",
|
||||
"ipc.server.listen.queue.size" : "128",
|
||||
"mapreduce.reduce.skip.maxgroups" : "0",
|
||||
"file.stream-buffer-size" : "4096",
|
||||
"dfs.namenode.fs-limits.max-directory-items" : "0",
|
||||
"io.mapfile.bloom.size" : "1048576",
|
||||
"yarn.nodemanager.container-executor.class" : "org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor",
|
||||
"mapreduce.map.maxattempts" : "4",
|
||||
"mapreduce.jobtracker.jobhistory.block.size" : "3145728",
|
||||
"yarn.log-aggregation.retain-seconds" : "-1",
|
||||
"yarn.app.mapreduce.am.job.committer.cancel-timeout" : "60000",
|
||||
"ftp.replication" : "3",
|
||||
"mapreduce.jobtracker.http.address" : "0.0.0.0:50030",
|
||||
"yarn.nodemanager.health-checker.script.timeout-ms" : "1200000",
|
||||
"mapreduce.jobhistory.address" : "a2115.smile.com:10020",
|
||||
"mapreduce.jobtracker.taskcache.levels" : "2",
|
||||
"dfs.datanode.dns.nameserver" : "default",
|
||||
"mapreduce.application.classpath" : "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*",
|
||||
"yarn.nodemanager.log.retain-seconds" : "10800",
|
||||
"mapred.child.java.opts" : "-Xmx200m",
|
||||
"dfs.replication.max" : "512",
|
||||
"map.sort.class" : "org.apache.hadoop.util.QuickSort",
|
||||
"dfs.stream-buffer-size" : "4096",
|
||||
"dfs.namenode.backup.address" : "0.0.0.0:50100",
|
||||
"hadoop.util.hash.type" : "murmur",
|
||||
"dfs.block.access.key.update.interval" : "600",
|
||||
"dfs.datanode.dns.interface" : "default",
|
||||
"dfs.datanode.use.datanode.hostname" : "false",
|
||||
"mapreduce.job.output.key.class" : "org.apache.hadoop.io.Text",
|
||||
"dfs.client.read.shortcircuit" : "false",
|
||||
"dfs.namenode.backup.http-address" : "0.0.0.0:50105",
|
||||
"yarn.nodemanager.container-monitor.interval-ms" : "3000",
|
||||
"yarn.nodemanager.disk-health-checker.min-healthy-disks" : "0.25",
|
||||
"kfs.client-write-packet-size" : "65536",
|
||||
"ha.zookeeper.acl" : "world:anyone:rwcda",
|
||||
"yarn.nodemanager.sleep-delay-before-sigkill.ms" : "250",
|
||||
"mapreduce.job.dir" : "/user/jenkins/.staging/job_1369942127770_1207",
|
||||
"io.map.index.skip" : "0",
|
||||
"net.topology.node.switch.mapping.impl" : "org.apache.hadoop.net.ScriptBasedMapping",
|
||||
"fs.s3.maxRetries" : "4",
|
||||
"ha.failover-controller.new-active.rpc-timeout.ms" : "60000",
|
||||
"s3native.client-write-packet-size" : "65536",
|
||||
"yarn.resourcemanager.amliveliness-monitor.interval-ms" : "1000",
|
||||
"hadoop.http.staticuser.user" : "dr.who",
|
||||
"mapreduce.reduce.speculative" : "false",
|
||||
"mapreduce.client.output.filter" : "FAILED",
|
||||
"mapreduce.ifile.readahead.bytes" : "4194304",
|
||||
"mapreduce.tasktracker.report.address" : "127.0.0.1:0",
|
||||
"mapreduce.task.userlog.limit.kb" : "0",
|
||||
"mapreduce.tasktracker.map.tasks.maximum" : "2",
|
||||
"hadoop.http.authentication.simple.anonymous.allowed" : "true",
|
||||
"hadoop.fuse.timer.period" : "5",
|
||||
"dfs.namenode.num.extra.edits.retained" : "1000000",
|
||||
"hadoop.rpc.socket.factory.class.default" : "org.apache.hadoop.net.StandardSocketFactory",
|
||||
"mapreduce.job.submithostname" : "a2115.smile.com",
|
||||
"dfs.namenode.handler.count" : "10",
|
||||
"fs.automatic.close" : "false",
|
||||
"mapreduce.job.submithostaddress" : "10.20.206.115",
|
||||
"mapreduce.tasktracker.healthchecker.script.timeout" : "600000",
|
||||
"dfs.datanode.directoryscan.interval" : "21600",
|
||||
"yarn.resourcemanager.address" : "a2115.smile.com:8032",
|
||||
"yarn.nodemanager.health-checker.interval-ms" : "600000",
|
||||
"dfs.client.file-block-storage-locations.num-threads" : "10",
|
||||
"yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs" : "86400",
|
||||
"mapreduce.reduce.markreset.buffer.percent" : "0.0",
|
||||
"hadoop.security.group.mapping.ldap.directory.search.timeout" : "10000",
|
||||
"mapreduce.map.log.level" : "INFO",
|
||||
"dfs.bytes-per-checksum" : "512",
|
||||
"yarn.nodemanager.localizer.address" : "0.0.0.0:8040",
|
||||
"dfs.namenode.checkpoint.max-retries" : "3",
|
||||
"ha.health-monitor.rpc-timeout.ms" : "45000",
|
||||
"yarn.resourcemanager.keytab" : "/etc/krb5.keytab",
|
||||
"ftp.stream-buffer-size" : "4096",
|
||||
"dfs.namenode.avoid.write.stale.datanode" : "false",
|
||||
"hadoop.security.group.mapping.ldap.search.attr.member" : "member",
|
||||
"mapreduce.output.fileoutputformat.outputdir" : "hdfs://a2115.smile.com:8020/user/jenkins/tera-gen-1",
|
||||
"dfs.blockreport.initialDelay" : "0",
|
||||
"yarn.nm.liveness-monitor.expiry-interval-ms" : "600000",
|
||||
"hadoop.http.authentication.token.validity" : "36000",
|
||||
"dfs.namenode.delegation.token.max-lifetime" : "604800000",
|
||||
"mapreduce.job.hdfs-servers" : "${fs.defaultFS}",
|
||||
"s3native.replication" : "3",
|
||||
"yarn.nodemanager.localizer.client.thread-count" : "5",
|
||||
"dfs.heartbeat.interval" : "3",
|
||||
"rpc.engine.org.apache.hadoop.ipc.ProtocolMetaInfoPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
|
||||
"dfs.ha.fencing.ssh.connect-timeout" : "30000",
|
||||
"yarn.resourcemanager.container.liveness-monitor.interval-ms" : "600000",
|
||||
"yarn.am.liveness-monitor.expiry-interval-ms" : "600000",
|
||||
"mapreduce.task.profile" : "false",
|
||||
"mapreduce.tasktracker.http.address" : "0.0.0.0:50060",
|
||||
"mapreduce.tasktracker.instrumentation" : "org.apache.hadoop.mapred.TaskTrackerMetricsInst",
|
||||
"mapreduce.jobhistory.webapp.address" : "a2115.smile.com:19888",
|
||||
"ha.failover-controller.graceful-fence.rpc-timeout.ms" : "5000",
|
||||
"yarn.ipc.rpc.class" : "org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC",
|
||||
"mapreduce.job.name" : "TeraGen",
|
||||
"kfs.blocksize" : "67108864",
|
||||
"yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs" : "86400",
|
||||
"mapreduce.job.ubertask.maxmaps" : "9",
|
||||
"yarn.scheduler.maximum-allocation-mb" : "8192",
|
||||
"yarn.nodemanager.heartbeat.interval-ms" : "1000",
|
||||
"mapreduce.job.userlog.retain.hours" : "24",
|
||||
"dfs.namenode.secondary.http-address" : "0.0.0.0:50090",
|
||||
"mapreduce.task.timeout" : "600000",
|
||||
"mapreduce.framework.name" : "yarn",
|
||||
"ipc.client.idlethreshold" : "4000",
|
||||
"ftp.bytes-per-checksum" : "512",
|
||||
"ipc.server.tcpnodelay" : "false",
|
||||
"dfs.namenode.stale.datanode.interval" : "30000",
|
||||
"s3.bytes-per-checksum" : "512",
|
||||
"mapreduce.job.speculative.slowtaskthreshold" : "1.0",
|
||||
"yarn.nodemanager.localizer.cache.target-size-mb" : "10240",
|
||||
"yarn.nodemanager.remote-app-log-dir" : "/tmp/logs",
|
||||
"fs.s3.block.size" : "67108864",
|
||||
"mapreduce.job.queuename" : "sls_queue_1",
|
||||
"dfs.client.failover.connection.retries" : "0",
|
||||
"hadoop.rpc.protection" : "authentication",
|
||||
"yarn.scheduler.minimum-allocation-mb" : "1024",
|
||||
"yarn.app.mapreduce.client-am.ipc.max-retries" : "1",
|
||||
"hadoop.security.auth_to_local" : "DEFAULT",
|
||||
"dfs.secondary.namenode.kerberos.internal.spnego.principal" : "${dfs.web.authentication.kerberos.principal}",
|
||||
"ftp.client-write-packet-size" : "65536",
|
||||
"fs.defaultFS" : "hdfs://a2115.smile.com:8020",
|
||||
"yarn.nodemanager.address" : "0.0.0.0:0",
|
||||
"yarn.scheduler.fair.assignmultiple" : "true",
|
||||
"yarn.resourcemanager.scheduler.client.thread-count" : "50",
|
||||
"mapreduce.task.merge.progress.records" : "10000",
|
||||
"file.client-write-packet-size" : "65536",
|
||||
"yarn.nodemanager.delete.thread-count" : "4",
|
||||
"yarn.resourcemanager.scheduler.address" : "a2115.smile.com:8030",
|
||||
"fs.trash.checkpoint.interval" : "0",
|
||||
"hadoop.http.authentication.signature.secret.file" : "${user.home}/hadoop-http-auth-signature-secret",
|
||||
"s3native.stream-buffer-size" : "4096",
|
||||
"mapreduce.reduce.shuffle.read.timeout" : "180000",
|
||||
"mapreduce.admin.user.env" : "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native",
|
||||
"yarn.app.mapreduce.am.command-opts" : " -Xmx1238932873",
|
||||
"dfs.namenode.checkpoint.edits.dir" : "${dfs.namenode.checkpoint.dir}",
|
||||
"fs.permissions.umask-mode" : "022",
|
||||
"dfs.client.domain.socket.data.traffic" : "false",
|
||||
"hadoop.common.configuration.version" : "0.23.0",
|
||||
"mapreduce.tasktracker.dns.interface" : "default",
|
||||
"mapreduce.output.fileoutputformat.compress.type" : "BLOCK",
|
||||
"mapreduce.ifile.readahead" : "true",
|
||||
"hadoop.security.group.mapping.ldap.ssl" : "false",
|
||||
"io.serializations" : "org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization",
|
||||
"yarn.nodemanager.aux-services.mapreduce.shuffle.class" : "org.apache.hadoop.mapred.ShuffleHandler",
|
||||
"fs.df.interval" : "60000",
|
||||
"mapreduce.reduce.shuffle.input.buffer.percent" : "0.70",
|
||||
"io.seqfile.compress.blocksize" : "1000000",
|
||||
"hadoop.security.groups.cache.secs" : "300",
|
||||
"ipc.client.connect.max.retries" : "10",
|
||||
"dfs.namenode.delegation.key.update-interval" : "86400000",
|
||||
"yarn.nodemanager.process-kill-wait.ms" : "2000",
|
||||
"yarn.application.classpath" : "$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,$YARN_HOME/*,$YARN_HOME/lib/*",
|
||||
"yarn.app.mapreduce.client.max-retries" : "3",
|
||||
"dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction" : "0.75f",
|
||||
"yarn.nodemanager.log-aggregation.compression-type" : "none",
|
||||
"hadoop.security.group.mapping.ldap.search.filter.user" : "(&(objectClass=user)(sAMAccountName={0}))",
|
||||
"yarn.nodemanager.localizer.cache.cleanup.interval-ms" : "600000",
|
||||
"dfs.image.compress" : "false",
|
||||
"mapred.mapper.new-api" : "true",
|
||||
"yarn.nodemanager.log-dirs" : "${yarn.log.dir}/userlogs",
|
||||
"dfs.namenode.kerberos.internal.spnego.principal" : "${dfs.web.authentication.kerberos.principal}",
|
||||
"fs.s3n.block.size" : "67108864",
|
||||
"fs.ftp.host" : "0.0.0.0",
|
||||
"hadoop.security.group.mapping" : "org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback",
|
||||
"dfs.datanode.address" : "0.0.0.0:50010",
|
||||
"mapreduce.map.skip.maxrecords" : "0",
|
||||
"dfs.datanode.https.address" : "0.0.0.0:50475",
|
||||
"file.replication" : "1",
|
||||
"yarn.resourcemanager.resource-tracker.address" : "a2115.smile.com:8031",
|
||||
"dfs.datanode.drop.cache.behind.reads" : "false",
|
||||
"hadoop.fuse.connection.timeout" : "300",
|
||||
"hadoop.work.around.non.threadsafe.getpwuid" : "false",
|
||||
"mapreduce.jobtracker.restart.recover" : "false",
|
||||
"hadoop.tmp.dir" : "/tmp/hadoop-${user.name}",
|
||||
"mapreduce.output.fileoutputformat.compress" : "false",
|
||||
"mapreduce.tasktracker.indexcache.mb" : "10",
|
||||
"mapreduce.client.genericoptionsparser.used" : "true",
|
||||
"dfs.client.block.write.replace-datanode-on-failure.policy" : "DEFAULT",
|
||||
"mapreduce.job.committer.setup.cleanup.needed" : "true",
|
||||
"hadoop.kerberos.kinit.command" : "kinit",
|
||||
"dfs.datanode.du.reserved" : "0",
|
||||
"dfs.namenode.fs-limits.max-blocks-per-file" : "1048576",
|
||||
"file.bytes-per-checksum" : "512",
|
||||
"mapreduce.task.profile.reduces" : "0-2",
|
||||
"mapreduce.jobtracker.handler.count" : "10",
|
||||
"dfs.client.block.write.replace-datanode-on-failure.enable" : "true",
|
||||
"mapreduce.job.output.value.class" : "org.apache.hadoop.io.Text",
|
||||
"yarn.dispatcher.exit-on-error" : "true",
|
||||
"net.topology.script.number.args" : "100",
|
||||
"mapreduce.task.profile.maps" : "0-2",
|
||||
"dfs.namenode.decommission.interval" : "30",
|
||||
"dfs.image.compression.codec" : "org.apache.hadoop.io.compress.DefaultCodec",
|
||||
"yarn.resourcemanager.webapp.address" : "a2115.smile.com:8088",
|
||||
"mapreduce.jobtracker.system.dir" : "${hadoop.tmp.dir}/mapred/system",
|
||||
"hadoop.ssl.hostname.verifier" : "DEFAULT",
|
||||
"yarn.nodemanager.vmem-pmem-ratio" : "2.1",
|
||||
"dfs.namenode.support.allow.format" : "true",
|
||||
"mapreduce.jobhistory.principal" : "jhs/_HOST@REALM.TLD",
|
||||
"io.mapfile.bloom.error.rate" : "0.005",
|
||||
"mapreduce.shuffle.ssl.file.buffer.size" : "65536",
|
||||
"dfs.permissions.superusergroup" : "supergroup",
|
||||
"dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold" : "10737418240",
|
||||
"mapreduce.jobtracker.expire.trackers.interval" : "600000",
|
||||
"mapreduce.cluster.acls.enabled" : "false",
|
||||
"yarn.nodemanager.remote-app-log-dir-suffix" : "logs",
|
||||
"ha.failover-controller.graceful-fence.connection.retries" : "1",
|
||||
"ha.health-monitor.connect-retry-interval.ms" : "1000",
|
||||
"mapreduce.reduce.shuffle.merge.percent" : "0.66",
|
||||
"yarn.app.mapreduce.am.resource.mb" : "1536",
|
||||
"io.seqfile.local.dir" : "${hadoop.tmp.dir}/io/local",
|
||||
"dfs.namenode.checkpoint.check.period" : "60",
|
||||
"yarn.resourcemanager.nm.liveness-monitor.interval-ms" : "1000",
|
||||
"mapreduce.jobtracker.maxtasks.perjob" : "-1",
|
||||
"mapreduce.jobtracker.jobhistory.lru.cache.size" : "5",
|
||||
"file.blocksize" : "67108864",
|
||||
"tfile.io.chunk.size" : "1048576",
|
||||
"mapreduce.job.acl-modify-job" : " ",
|
||||
"yarn.nodemanager.webapp.address" : "0.0.0.0:8042",
|
||||
"mapreduce.tasktracker.reduce.tasks.maximum" : "2",
|
||||
"io.skip.checksum.errors" : "false",
|
||||
"mapreduce.cluster.temp.dir" : "${hadoop.tmp.dir}/mapred/temp",
|
||||
"yarn.app.mapreduce.am.staging-dir" : "/user",
|
||||
"dfs.namenode.edits.journal-plugin.qjournal" : "org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager",
|
||||
"dfs.datanode.handler.count" : "10",
|
||||
"fs.ftp.host.port" : "21",
|
||||
"dfs.namenode.decommission.nodes.per.interval" : "5",
|
||||
"yarn.resourcemanager.admin.client.thread-count" : "1",
|
||||
"dfs.namenode.fs-limits.max-component-length" : "0",
|
||||
"dfs.namenode.checkpoint.period" : "3600",
|
||||
"fs.AbstractFileSystem.viewfs.impl" : "org.apache.hadoop.fs.viewfs.ViewFs",
|
||||
"yarn.resourcemanager.resource-tracker.client.thread-count" : "50",
|
||||
"mapreduce.tasktracker.dns.nameserver" : "default",
|
||||
"mapreduce.map.output.compress" : "true",
|
||||
"dfs.datanode.ipc.address" : "0.0.0.0:50020",
|
||||
"hadoop.ssl.require.client.cert" : "false",
|
||||
"yarn.nodemanager.delete.debug-delay-sec" : "0",
|
||||
"dfs.datanode.max.transfer.threads" : "4096"
|
||||
},
|
||||
"computonsPerMapInputByte" : -1,
|
||||
"computonsPerMapOutputByte" : -1,
|
||||
"computonsPerReduceInputByte" : -1,
|
||||
"computonsPerReduceOutputByte" : -1,
|
||||
"heapMegabytes" : 200,
|
||||
"outcome" : "SUCCESS",
|
||||
"jobtype" : "JAVA",
|
||||
"directDependantJobs" : [ ],
|
||||
"successfulMapAttemptCDFs" : [ {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
}, {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
}, {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
}, {
|
||||
"maximum" : 47021,
|
||||
"minimum" : 11143,
|
||||
"rankings" : [ {
|
||||
"datum" : 13354,
|
||||
"relativeRanking" : 0.05
|
||||
}, {
|
||||
"datum" : 14101,
|
||||
"relativeRanking" : 0.1
|
||||
}, {
|
||||
"datum" : 15609,
|
||||
"relativeRanking" : 0.15
|
||||
}, {
|
||||
"datum" : 15919,
|
||||
"relativeRanking" : 0.2
|
||||
}, {
|
||||
"datum" : 17003,
|
||||
"relativeRanking" : 0.25
|
||||
}, {
|
||||
"datum" : 17109,
|
||||
"relativeRanking" : 0.3
|
||||
}, {
|
||||
"datum" : 18342,
|
||||
"relativeRanking" : 0.35
|
||||
}, {
|
||||
"datum" : 18870,
|
||||
"relativeRanking" : 0.4
|
||||
}, {
|
||||
"datum" : 19127,
|
||||
"relativeRanking" : 0.45
|
||||
}, {
|
||||
"datum" : 19221,
|
||||
"relativeRanking" : 0.5
|
||||
}, {
|
||||
"datum" : 19481,
|
||||
"relativeRanking" : 0.55
|
||||
}, {
|
||||
"datum" : 19896,
|
||||
"relativeRanking" : 0.6
|
||||
}, {
|
||||
"datum" : 20585,
|
||||
"relativeRanking" : 0.65
|
||||
}, {
|
||||
"datum" : 20784,
|
||||
"relativeRanking" : 0.7
|
||||
}, {
|
||||
"datum" : 21452,
|
||||
"relativeRanking" : 0.75
|
||||
}, {
|
||||
"datum" : 21853,
|
||||
"relativeRanking" : 0.8
|
||||
}, {
|
||||
"datum" : 22436,
|
||||
"relativeRanking" : 0.85
|
||||
}, {
|
||||
"datum" : 32646,
|
||||
"relativeRanking" : 0.9
|
||||
}, {
|
||||
"datum" : 41553,
|
||||
"relativeRanking" : 0.95
|
||||
} ],
|
||||
"numberValues" : 96
|
||||
} ],
|
||||
"failedMapAttemptCDFs" : [ {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
}, {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
}, {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
}, {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
} ],
|
||||
"successfulReduceAttemptCDF" : {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
},
|
||||
"failedReduceAttemptCDF" : {
|
||||
"maximum" : 9223372036854775807,
|
||||
"minimum" : -9223372036854775808,
|
||||
"rankings" : [ ],
|
||||
"numberValues" : 0
|
||||
},
|
||||
"mapperTriesToSucceed" : [ 1.0 ],
|
||||
"failedMapperFraction" : 0.0,
|
||||
"relativeTime" : 0,
|
||||
"clusterMapMB" : -1,
|
||||
"clusterReduceMB" : -1,
|
||||
"jobMapMB" : 200,
|
||||
"jobReduceMB" : 200
|
||||
}
|
||||
|
@ -389,6 +389,9 @@ private void startAMFromRumenTraces(Resource containerResource,
|
||||
new ArrayList<ContainerSimulator>();
|
||||
// map tasks
|
||||
for(LoggedTask mapTask : job.getMapTasks()) {
|
||||
if (mapTask.getAttempts().size() == 0) {
|
||||
continue;
|
||||
}
|
||||
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
|
||||
.get(mapTask.getAttempts().size() - 1);
|
||||
String hostname = taskAttempt.getHostName().getValue();
|
||||
@ -400,6 +403,9 @@ private void startAMFromRumenTraces(Resource containerResource,
|
||||
|
||||
// reduce tasks
|
||||
for(LoggedTask reduceTask : job.getReduceTasks()) {
|
||||
if (reduceTask.getAttempts().size() == 0) {
|
||||
continue;
|
||||
}
|
||||
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
|
||||
.get(reduceTask.getAttempts().size() - 1);
|
||||
String hostname = taskAttempt.getHostName().getValue();
|
||||
|
@ -72,11 +72,17 @@ public static Set<String> parseNodesFromRumenTrace(String jobTrace)
|
||||
while ((job = reader.getNext()) != null) {
|
||||
for(LoggedTask mapTask : job.getMapTasks()) {
|
||||
// select the last attempt
|
||||
if (mapTask.getAttempts().size() == 0) {
|
||||
continue;
|
||||
}
|
||||
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
|
||||
.get(mapTask.getAttempts().size() - 1);
|
||||
nodeSet.add(taskAttempt.getHostName().getValue());
|
||||
}
|
||||
for(LoggedTask reduceTask : job.getReduceTasks()) {
|
||||
if (reduceTask.getAttempts().size() == 0) {
|
||||
continue;
|
||||
}
|
||||
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
|
||||
.get(reduceTask.getAttempts().size() - 1);
|
||||
nodeSet.add(taskAttempt.getHostName().getValue());
|
||||
|
@ -159,6 +159,9 @@ Release 2.9.0 - UNRELEASED
|
||||
YARN-4613. Fix test failure in TestClientRMService#testGetClusterNodes.
|
||||
(Takashi Ohnishi via rohithsharmaks)
|
||||
|
||||
YARN-4612. Fix rumen and scheduler load simulator handle killed tasks properly.
|
||||
(Ming Ma via xgong)
|
||||
|
||||
Release 2.8.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
Loading…
Reference in New Issue
Block a user