HADOOP-14207. "dfsadmin -refreshCallQueue" fails with DecayRpcScheduler. Contributed by Surendra Singh Lihore.
This commit is contained in:
parent
d11d280b8b
commit
4ea2778acb
@ -261,6 +261,7 @@ public class CallQueueManager<E> {
|
|||||||
Class<? extends BlockingQueue<E>> queueClassToUse, int maxSize,
|
Class<? extends BlockingQueue<E>> queueClassToUse, int maxSize,
|
||||||
String ns, Configuration conf) {
|
String ns, Configuration conf) {
|
||||||
int priorityLevels = parseNumLevels(ns, conf);
|
int priorityLevels = parseNumLevels(ns, conf);
|
||||||
|
this.scheduler.stop();
|
||||||
RpcScheduler newScheduler = createScheduler(schedulerClass, priorityLevels,
|
RpcScheduler newScheduler = createScheduler(schedulerClass, priorityLevels,
|
||||||
ns, conf);
|
ns, conf);
|
||||||
BlockingQueue<E> newQ = createCallQueueInstance(queueClassToUse,
|
BlockingQueue<E> newQ = createCallQueueInstance(queueClassToUse,
|
||||||
|
@ -33,6 +33,8 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||||||
import java.util.concurrent.atomic.AtomicLongArray;
|
import java.util.concurrent.atomic.AtomicLongArray;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
|
import javax.management.ObjectName;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.util.concurrent.AtomicDoubleArray;
|
import com.google.common.util.concurrent.AtomicDoubleArray;
|
||||||
@ -162,6 +164,7 @@ public class DecayRpcScheduler implements RpcScheduler,
|
|||||||
private final String namespace;
|
private final String namespace;
|
||||||
private final int topUsersCount; // e.g., report top 10 users' metrics
|
private final int topUsersCount; // e.g., report top 10 users' metrics
|
||||||
private static final double PRECISION = 0.0001;
|
private static final double PRECISION = 0.0001;
|
||||||
|
private MetricsProxy metricsProxy;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This TimerTask will call decayCurrentCounts until
|
* This TimerTask will call decayCurrentCounts until
|
||||||
@ -230,9 +233,8 @@ public class DecayRpcScheduler implements RpcScheduler,
|
|||||||
DecayTask task = new DecayTask(this, timer);
|
DecayTask task = new DecayTask(this, timer);
|
||||||
timer.scheduleAtFixedRate(task, decayPeriodMillis, decayPeriodMillis);
|
timer.scheduleAtFixedRate(task, decayPeriodMillis, decayPeriodMillis);
|
||||||
|
|
||||||
MetricsProxy prox = MetricsProxy.getInstance(ns, numLevels);
|
metricsProxy = MetricsProxy.getInstance(ns, numLevels);
|
||||||
prox.setDelegate(this);
|
metricsProxy.setDelegate(this);
|
||||||
prox.registerMetrics2Source(ns);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load configs
|
// Load configs
|
||||||
@ -671,11 +673,14 @@ public class DecayRpcScheduler implements RpcScheduler,
|
|||||||
private WeakReference<DecayRpcScheduler> delegate;
|
private WeakReference<DecayRpcScheduler> delegate;
|
||||||
private double[] averageResponseTimeDefault;
|
private double[] averageResponseTimeDefault;
|
||||||
private long[] callCountInLastWindowDefault;
|
private long[] callCountInLastWindowDefault;
|
||||||
|
private ObjectName decayRpcSchedulerInfoBeanName;
|
||||||
|
|
||||||
private MetricsProxy(String namespace, int numLevels) {
|
private MetricsProxy(String namespace, int numLevels) {
|
||||||
averageResponseTimeDefault = new double[numLevels];
|
averageResponseTimeDefault = new double[numLevels];
|
||||||
callCountInLastWindowDefault = new long[numLevels];
|
callCountInLastWindowDefault = new long[numLevels];
|
||||||
|
decayRpcSchedulerInfoBeanName =
|
||||||
MBeans.register(namespace, "DecayRpcScheduler", this);
|
MBeans.register(namespace, "DecayRpcScheduler", this);
|
||||||
|
this.registerMetrics2Source(namespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static synchronized MetricsProxy getInstance(String namespace,
|
public static synchronized MetricsProxy getInstance(String namespace,
|
||||||
@ -689,6 +694,10 @@ public class DecayRpcScheduler implements RpcScheduler,
|
|||||||
return mp;
|
return mp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static synchronized void removeInstance(String namespace) {
|
||||||
|
MetricsProxy.INSTANCES.remove(namespace);
|
||||||
|
}
|
||||||
|
|
||||||
public void setDelegate(DecayRpcScheduler obj) {
|
public void setDelegate(DecayRpcScheduler obj) {
|
||||||
this.delegate = new WeakReference<DecayRpcScheduler>(obj);
|
this.delegate = new WeakReference<DecayRpcScheduler>(obj);
|
||||||
}
|
}
|
||||||
@ -698,6 +707,14 @@ public class DecayRpcScheduler implements RpcScheduler,
|
|||||||
DefaultMetricsSystem.instance().register(name, name, this);
|
DefaultMetricsSystem.instance().register(name, name, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void unregisterSource(String namespace) {
|
||||||
|
final String name = "DecayRpcSchedulerMetrics2." + namespace;
|
||||||
|
DefaultMetricsSystem.instance().unregisterSource(name);
|
||||||
|
if (decayRpcSchedulerInfoBeanName != null) {
|
||||||
|
MBeans.unregister(decayRpcSchedulerInfoBeanName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getSchedulingDecisionSummary() {
|
public String getSchedulingDecisionSummary() {
|
||||||
DecayRpcScheduler scheduler = delegate.get();
|
DecayRpcScheduler scheduler = delegate.get();
|
||||||
@ -921,4 +938,10 @@ public class DecayRpcScheduler implements RpcScheduler,
|
|||||||
}
|
}
|
||||||
return decayedCallCounts;
|
return decayedCallCounts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stop() {
|
||||||
|
metricsProxy.unregisterSource(namespace);
|
||||||
|
MetricsProxy.removeInstance(namespace);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -42,4 +42,8 @@ public class DefaultRpcScheduler implements RpcScheduler {
|
|||||||
public DefaultRpcScheduler(int priorityLevels, String namespace,
|
public DefaultRpcScheduler(int priorityLevels, String namespace,
|
||||||
Configuration conf) {
|
Configuration conf) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stop() {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,4 +32,6 @@ public interface RpcScheduler {
|
|||||||
|
|
||||||
void addResponseTime(String name, int priorityLevel, int queueTime,
|
void addResponseTime(String name, int priorityLevel, int queueTime,
|
||||||
int processingTime);
|
int processingTime);
|
||||||
|
|
||||||
|
void stop();
|
||||||
}
|
}
|
||||||
|
@ -525,7 +525,7 @@ public class NameNodeRpcServer implements NamenodeProtocols {
|
|||||||
|
|
||||||
/** Allow access to the client RPC server for testing */
|
/** Allow access to the client RPC server for testing */
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
RPC.Server getClientRpcServer() {
|
public RPC.Server getClientRpcServer() {
|
||||||
return clientRpcServer;
|
return clientRpcServer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,49 +33,42 @@ import org.apache.hadoop.conf.Configuration;
|
|||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer;
|
||||||
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
||||||
|
import org.apache.hadoop.ipc.FairCallQueue;
|
||||||
|
import org.apache.hadoop.metrics2.MetricsException;
|
||||||
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class TestRefreshCallQueue {
|
public class TestRefreshCallQueue {
|
||||||
private MiniDFSCluster cluster;
|
private MiniDFSCluster cluster;
|
||||||
private Configuration config;
|
private Configuration config;
|
||||||
private FileSystem fs;
|
|
||||||
static int mockQueueConstructions;
|
static int mockQueueConstructions;
|
||||||
static int mockQueuePuts;
|
static int mockQueuePuts;
|
||||||
private String callQueueConfigKey = "";
|
private int nnPort = 0;
|
||||||
private final Random rand = new Random();
|
|
||||||
|
|
||||||
@Before
|
private void setUp(Class<?> queueClass) throws IOException {
|
||||||
public void setUp() throws Exception {
|
|
||||||
// We want to count additional events, so we reset here
|
|
||||||
mockQueueConstructions = 0;
|
|
||||||
mockQueuePuts = 0;
|
|
||||||
int portRetries = 5;
|
int portRetries = 5;
|
||||||
int nnPort;
|
Random rand = new Random();
|
||||||
|
|
||||||
for (; portRetries > 0; --portRetries) {
|
for (; portRetries > 0; --portRetries) {
|
||||||
// Pick a random port in the range [30000,60000).
|
// Pick a random port in the range [30000,60000).
|
||||||
nnPort = 30000 + rand.nextInt(30000);
|
nnPort = 30000 + rand.nextInt(30000);
|
||||||
config = new Configuration();
|
config = new Configuration();
|
||||||
callQueueConfigKey = "ipc." + nnPort + ".callqueue.impl";
|
String callQueueConfigKey = "ipc." + nnPort + ".callqueue.impl";
|
||||||
config.setClass(callQueueConfigKey,
|
config.setClass(callQueueConfigKey, queueClass, BlockingQueue.class);
|
||||||
MockCallQueue.class, BlockingQueue.class);
|
|
||||||
config.set("hadoop.security.authorization", "true");
|
config.set("hadoop.security.authorization", "true");
|
||||||
|
|
||||||
FileSystem.setDefaultUri(config, "hdfs://localhost:" + nnPort);
|
FileSystem.setDefaultUri(config, "hdfs://localhost:" + nnPort);
|
||||||
fs = FileSystem.get(config);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
cluster = new MiniDFSCluster.Builder(config).nameNodePort(nnPort).build();
|
cluster = new MiniDFSCluster.Builder(config).nameNodePort(nnPort)
|
||||||
|
.build();
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
break;
|
break;
|
||||||
} catch (BindException be) {
|
} catch (BindException be) {
|
||||||
// Retry with a different port number.
|
// Retry with a different port number.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (portRetries == 0) {
|
if (portRetries == 0) {
|
||||||
// Bail if we get very unlucky with our choice of ports.
|
// Bail if we get very unlucky with our choice of ports.
|
||||||
fail("Failed to pick an ephemeral port for the NameNode RPC server.");
|
fail("Failed to pick an ephemeral port for the NameNode RPC server.");
|
||||||
@ -83,8 +76,8 @@ public class TestRefreshCallQueue {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void tearDown() throws Exception {
|
public void tearDown() throws IOException {
|
||||||
if(cluster!=null) {
|
if (cluster != null) {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
cluster = null;
|
cluster = null;
|
||||||
}
|
}
|
||||||
@ -105,29 +98,66 @@ public class TestRefreshCallQueue {
|
|||||||
|
|
||||||
// Returns true if mock queue was used for put
|
// Returns true if mock queue was used for put
|
||||||
public boolean canPutInMockQueue() throws IOException {
|
public boolean canPutInMockQueue() throws IOException {
|
||||||
|
FileSystem fs = FileSystem.get(config);
|
||||||
int putsBefore = mockQueuePuts;
|
int putsBefore = mockQueuePuts;
|
||||||
fs.exists(new Path("/")); // Make an RPC call
|
fs.exists(new Path("/")); // Make an RPC call
|
||||||
|
fs.close();
|
||||||
return mockQueuePuts > putsBefore;
|
return mockQueuePuts > putsBefore;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRefresh() throws Exception {
|
public void testRefresh() throws Exception {
|
||||||
assertTrue("Mock queue should have been constructed", mockQueueConstructions > 0);
|
// We want to count additional events, so we reset here
|
||||||
|
mockQueueConstructions = 0;
|
||||||
|
mockQueuePuts = 0;
|
||||||
|
setUp(MockCallQueue.class);
|
||||||
|
|
||||||
|
assertTrue("Mock queue should have been constructed",
|
||||||
|
mockQueueConstructions > 0);
|
||||||
assertTrue("Puts are routed through MockQueue", canPutInMockQueue());
|
assertTrue("Puts are routed through MockQueue", canPutInMockQueue());
|
||||||
int lastMockQueueConstructions = mockQueueConstructions;
|
int lastMockQueueConstructions = mockQueueConstructions;
|
||||||
|
|
||||||
// Replace queue with the queue specified in core-site.xml, which would be the LinkedBlockingQueue
|
// Replace queue with the queue specified in core-site.xml, which would be
|
||||||
|
// the LinkedBlockingQueue
|
||||||
DFSAdmin admin = new DFSAdmin(config);
|
DFSAdmin admin = new DFSAdmin(config);
|
||||||
String [] args = new String[]{"-refreshCallQueue"};
|
String [] args = new String[]{"-refreshCallQueue"};
|
||||||
int exitCode = admin.run(args);
|
int exitCode = admin.run(args);
|
||||||
assertEquals("DFSAdmin should return 0", 0, exitCode);
|
assertEquals("DFSAdmin should return 0", 0, exitCode);
|
||||||
|
|
||||||
assertEquals("Mock queue should have no additional constructions", lastMockQueueConstructions, mockQueueConstructions);
|
assertEquals("Mock queue should have no additional constructions",
|
||||||
|
lastMockQueueConstructions, mockQueueConstructions);
|
||||||
try {
|
try {
|
||||||
assertFalse("Puts are routed through LBQ instead of MockQueue", canPutInMockQueue());
|
assertFalse("Puts are routed through LBQ instead of MockQueue",
|
||||||
} catch (IOException ioe){
|
canPutInMockQueue());
|
||||||
|
} catch (IOException ioe) {
|
||||||
fail("Could not put into queue at all");
|
fail("Could not put into queue at all");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRefreshCallQueueWithFairCallQueue() throws Exception {
|
||||||
|
setUp(FairCallQueue.class);
|
||||||
|
boolean oldValue = DefaultMetricsSystem.inMiniClusterMode();
|
||||||
|
|
||||||
|
// throw an error when we double-initialize JvmMetrics
|
||||||
|
DefaultMetricsSystem.setMiniClusterMode(false);
|
||||||
|
|
||||||
|
NameNodeRpcServer rpcServer = (NameNodeRpcServer) cluster.getNameNodeRpc();
|
||||||
|
try {
|
||||||
|
rpcServer.getClientRpcServer().refreshCallQueue(config);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Throwable cause = e.getCause();
|
||||||
|
if ((cause instanceof MetricsException)
|
||||||
|
&& cause.getMessage().contains(
|
||||||
|
"Metrics source DecayRpcSchedulerMetrics2.ipc." + nnPort
|
||||||
|
+ " already exists!")) {
|
||||||
|
fail("DecayRpcScheduler metrics should be unregistered before"
|
||||||
|
+ " reregister");
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
DefaultMetricsSystem.setMiniClusterMode(oldValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user