From cb3add4f3355cf9c00f5c91ee48bb3322a0a0280 Mon Sep 17 00:00:00 2001 From: zeekling Date: Sat, 4 Nov 2023 22:05:21 +0800 Subject: [PATCH 1/3] =?UTF-8?q?namenode=E5=90=AF=E5=8A=A8=E6=BA=90?= =?UTF-8?q?=E7=A0=81=E5=88=86=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hdfs/nameNode启动过程.md | 122 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 hdfs/nameNode启动过程.md diff --git a/hdfs/nameNode启动过程.md b/hdfs/nameNode启动过程.md new file mode 100644 index 0000000..7d61c09 --- /dev/null +++ b/hdfs/nameNode启动过程.md @@ -0,0 +1,122 @@ + +## 简介 + +本章详细介绍NameNode启动过程。主要是代码级别的解释。 + +nameNode的启动主要是有NameNode.java主导的,由main函数开始了解。 + +下面是main函数里面的主要内容,可以看到主要由createNameNode实现NameNode的启动。 +```java +NameNode namenode = createNameNode(argv, null); +if (namenode != null) { + namenode.join(); +} +``` + +在createNameNode函数里面主要是分为两部分: +- 参数解析:主要关心解析startOpt,startOpt可以控制具体操作,比如format、rockback等。主要操作如下,后续会详细介绍。 + ```java + FORMAT ("-format"), + CLUSTERID ("-clusterid"), + GENCLUSTERID ("-genclusterid"), + REGULAR ("-regular"), + BACKUP ("-backup"), + CHECKPOINT("-checkpoint"), + UPGRADE ("-upgrade"), + ROLLBACK("-rollback"), + ROLLINGUPGRADE("-rollingUpgrade"), + IMPORT ("-importCheckpoint"), + BOOTSTRAPSTANDBY("-bootstrapStandby"), + INITIALIZESHAREDEDITS("-initializeSharedEdits"), + RECOVER ("-recover"), + FORCE("-force"), + NONINTERACTIVE("-nonInteractive"), + SKIPSHAREDEDITSCHECK("-skipSharedEditsCheck"), + RENAMERESERVED("-renameReserved"), + METADATAVERSION("-metadataVersion"), + UPGRADEONLY("-upgradeOnly"), + HOTSWAP("-hotswap"), + OBSERVER("-observer"); + ``` + 模型情况下会走到启动的启动的流程里面。 +- 启动NameNode或者其他操作,比如format等。 + +### 启动 + +NameNode的核心主要在NameNode的构造函数里面。 + +```java +this.haEnabled = HAUtil.isHAEnabled(conf, nsId); +// 检查HA的状态,主要是判断当前启动的是主实例还是备实例 +state = createHAState(getStartupOption(conf)); +this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); +this.haContext = createHAContext(); +try { + initializeGenericKeys(conf, nsId, namenodeId); + // 启动NameNode + initialize(getConf()); + state.prepareToEnterState(haContext); + try { + haContext.writeLock(); + state.enterState(haContext); + } finally { + haContext.writeUnlock(); + } +} catch (IOException e) { + this.stopAtException(e); + throw e; +} catch (HadoopIllegalArgumentException e) { + this.stopAtException(e); + throw e; +} +``` + +initialize函数详解如下: + +```java +protected void initialize(Configuration conf) throws IOException { + // .... 省略 + //登录kerberos + UserGroupInformation.setConfiguration(conf); + loginAsNameNodeUser(conf); + + // 初始化监控信息 + NameNode.initMetrics(conf, this.getRole()); + StartupProgressMetrics.register(startupProgress); + + pauseMonitor = new JvmPauseMonitor(); + pauseMonitor.init(conf); + pauseMonitor.start(); + metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); + + // .... 省略 + + if (NamenodeRole.NAMENODE == role) { + startHttpServer(conf); + } + // 从本地加载FSImage,并且与Editlog合并产生新的FSImage + loadNamesystem(conf); + //TODO 待确认用途 + startAliasMapServerIfNecessary(conf); + + //创建rpcserver,封装了NameNodeRpcServer、ClientRPCServer + //支持ClientNameNodeProtocol、DataNodeProtocolPB等协议 + rpcServer = createRpcServer(conf); + + initReconfigurableBackoffKey(); + + // .... 省略 + + if (NamenodeRole.NAMENODE == role) { + httpServer.setNameNodeAddress(getNameNodeAddress()); + httpServer.setFSImage(getFSImage()); + if (levelDBAliasMapServer != null) { + httpServer.setAliasMap(levelDBAliasMapServer.getAliasMap()); + } + } + + //启动执行多个重要的工作线程 + startCommonServices(conf); + startMetricsLogger(conf); +} +``` -- 2.45.2 From 22117d178d94f6931cc4ab50abc9133c57a80ec8 Mon Sep 17 00:00:00 2001 From: zeekling Date: Sat, 4 Nov 2023 23:56:35 +0800 Subject: [PATCH 2/3] =?UTF-8?q?namenode=E5=90=AF=E5=8A=A8=E6=BA=90?= =?UTF-8?q?=E7=A0=81=E5=88=86=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hdfs/nameNode启动过程.md | 117 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 2 deletions(-) diff --git a/hdfs/nameNode启动过程.md b/hdfs/nameNode启动过程.md index 7d61c09..829bfdf 100644 --- a/hdfs/nameNode启动过程.md +++ b/hdfs/nameNode启动过程.md @@ -1,5 +1,5 @@ -## 简介 +# 简介 本章详细介绍NameNode启动过程。主要是代码级别的解释。 @@ -41,7 +41,7 @@ if (namenode != null) { 模型情况下会走到启动的启动的流程里面。 - 启动NameNode或者其他操作,比如format等。 -### 启动 +## 启动 NameNode的核心主要在NameNode的构造函数里面。 @@ -120,3 +120,116 @@ protected void initialize(Configuration conf) throws IOException { startMetricsLogger(conf); } ``` + +### startCommonServices函数详解 + +启动NameNode关键服务 + +```java +private void startCommonServices(Configuration conf) throws IOException { + // 创建NameNodeResourceChecker、激活BlockManager等 + namesystem.startCommonServices(conf, haContext); + registerNNSMXBean(); + if (NamenodeRole.NAMENODE != role) { + startHttpServer(conf); + httpServer.setNameNodeAddress(getNameNodeAddress()); + httpServer.setFSImage(getFSImage()); + if (levelDBAliasMapServer != null) { + httpServer.setAliasMap(levelDBAliasMapServer.getAliasMap()); + } + } + // 启动rpc服务 + rpcServer.start(); + try { + // 获取启动插件列表 + plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY, + ServicePlugin.class); + } catch (RuntimeException e) { + String pluginsValue = conf.get(DFS_NAMENODE_PLUGINS_KEY); + LOG.error("Unable to load NameNode plugins. Specified list of plugins: " + + pluginsValue, e); + throw e; + } + // 启动所有插件 + for (ServicePlugin p: plugins) { + try { + p.start(this); + } catch (Throwable t) { + LOG.warn("ServicePlugin " + p + " could not be started", t); + } + } + LOG.info(getRole() + " RPC up at: " + getNameNodeAddress()); + if (rpcServer.getServiceRpcAddress() != null) { + LOG.info(getRole() + " service RPC up at: " + + rpcServer.getServiceRpcAddress()); + } +} +``` + +### namesystem.startCommonServices + +在当前函数中启动blockManager和NameNodeResourceChecker,blockManager比较关键。 + +```java +void startCommonServices(Configuration conf, HAContext haContext) throws IOException { + this.registerMBean(); // register the MBean for the FSNamesystemState + writeLock(); + this.haContext = haContext; + try { + //创建NameNodeResourceChecker,并立即检查一次 + nnResourceChecker = new NameNodeResourceChecker(conf); + checkAvailableResources(); + assert !blockManager.isPopulatingReplQueues(); + StartupProgress prog = NameNode.getStartupProgress(); + prog.beginPhase(Phase.SAFEMODE); + //获取已完成的数据块总量 + long completeBlocksTotal = getCompleteBlocksTotal(); + prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, + completeBlocksTotal); + // 激活blockManager,blockManager负责管理文件系统中文件的物理块与实际存储位置的映射关系, + // 是NameNode的核心功能之一。 + blockManager.activate(conf, completeBlocksTotal); + } finally { + writeUnlock("startCommonServices"); + } + + registerMXBean(); + DefaultMetricsSystem.instance().register(this); + if (inodeAttributeProvider != null) { + inodeAttributeProvider.start(); + dir.setINodeAttributeProvider(inodeAttributeProvider); + } + // 注册快照管理器 + snapshotManager.registerMXBean(); + InetSocketAddress serviceAddress = NameNode.getServiceAddress(conf, true); + this.nameNodeHostName = (serviceAddress != null) ? + serviceAddress.getHostName() : ""; +} +``` + +### blockManager.activate + +启动blockManager.activate 主要是初始化blockManager。 + +主要包含下面几个方面: +- pendingReconstruction +- datanodeManager +- bmSafeMode + +```java +public void activate(Configuration conf, long blockTotal) { + pendingReconstruction.start(); + datanodeManager.activate(conf); + this.redundancyThread.setName("RedundancyMonitor"); + this.redundancyThread.start(); + this.markedDeleteBlockScrubberThread.setName("MarkedDeleteBlockScrubberThread"); + this.markedDeleteBlockScrubberThread.start(); + this.blockReportThread.start(); + mxBeanName = MBeans.register("NameNode", "BlockStats", this); + bmSafeMode.activate(blockTotal); +} +``` + + + + -- 2.45.2 From 027d9e5ef0a2bd10ff704baa30211234fd1af8e3 Mon Sep 17 00:00:00 2001 From: zeekling Date: Sun, 5 Nov 2023 11:59:29 +0800 Subject: [PATCH 3/3] =?UTF-8?q?namenode=E5=90=AF=E5=8A=A8=E6=BA=90?= =?UTF-8?q?=E7=A0=81=E5=88=86=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hdfs/nameNode启动过程.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hdfs/nameNode启动过程.md b/hdfs/nameNode启动过程.md index 829bfdf..da873ef 100644 --- a/hdfs/nameNode启动过程.md +++ b/hdfs/nameNode启动过程.md @@ -153,6 +153,7 @@ private void startCommonServices(Configuration conf) throws IOException { // 启动所有插件 for (ServicePlugin p: plugins) { try { + // 调用插件的start接口,需要插件自己实现,需要实现接口ServicePlugin p.start(this); } catch (Throwable t) { LOG.warn("ServicePlugin " + p + " could not be started", t); @@ -202,8 +203,7 @@ void startCommonServices(Configuration conf, HAContext haContext) throws IOExcep // 注册快照管理器 snapshotManager.registerMXBean(); InetSocketAddress serviceAddress = NameNode.getServiceAddress(conf, true); - this.nameNodeHostName = (serviceAddress != null) ? - serviceAddress.getHostName() : ""; + this.nameNodeHostName = (serviceAddress != null) ? serviceAddress.getHostName() : ""; } ``` @@ -219,6 +219,7 @@ void startCommonServices(Configuration conf, HAContext haContext) throws IOExcep ```java public void activate(Configuration conf, long blockTotal) { pendingReconstruction.start(); + // 初始化datanodeManager datanodeManager.activate(conf); this.redundancyThread.setName("RedundancyMonitor"); this.redundancyThread.start(); @@ -231,5 +232,3 @@ public void activate(Configuration conf, long blockTotal) { ``` - - -- 2.45.2