@@ -973,6 +973,9 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
973973 // We shouldn't be calling saveNamespace if we've come up in standby state.
974974 MetaRecoveryContext recovery = startOpt .createRecoveryContext ();
975975 final boolean staleImage
976+ // namenode 启动
977+ // fsimage + editlog = new fsImage
978+ // TODO (1)合并元数据
976979 = fsImage .recoverTransitionRead (startOpt , this , recovery );
977980 if (RollingUpgradeStartupOption .ROLLBACK .matches (startOpt ) ||
978981 RollingUpgradeStartupOption .DOWNGRADE .matches (startOpt )) {
@@ -983,6 +986,7 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
983986 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
984987 + ", isRollingUpgrade=" + isRollingUpgrade () + ")" );
985988 if (needToSave ) {
989+ // TODO (2) 把合并出来的新的fsimage写到我们的磁盘上面
986990 fsImage .saveNamespace (this );
987991 } else {
988992 updateStorageVersionForRollingUpgrade (fsImage .getLayoutVersion (),
@@ -996,6 +1000,7 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
9961000 // we shouldn't do it when coming up in standby state
9971001 if (!haEnabled || (haEnabled && startOpt == StartupOption .UPGRADE )
9981002 || (haEnabled && startOpt == StartupOption .UPGRADEONLY )) {
1003+ // TODO (3) 打开新的editlog开始写日志
9991004 fsImage .openEditLogForWrite ();
10001005 }
10011006 success = true ;
@@ -1049,19 +1054,24 @@ private void stopSecretManager() {
10491054
10501055 /**
10511056 * Start services common to both active and standby states
1057+ * 1) fsimage -> 目录,磁盘存储够不够(100M)
1058+ * 2)editlog -> 目录,磁盘存储够不够
10521059 */
10531060 void startCommonServices (Configuration conf , HAContext haContext ) throws IOException {
10541061 this .registerMBean (); // register the MBean for the FSNamesystemState
10551062 writeLock ();
10561063 this .haContext = haContext ;
10571064 try {
1065+ // NameNode资源检查 通过hdfs-site.xml, core-site.xml 两个文件找到fsimage, editlog的目录
10581066 nnResourceChecker = new NameNodeResourceChecker (conf );
1067+ // 判断路径是否有足够的可用空间
10591068 checkAvailableResources ();
10601069 assert safeMode != null && !isPopulatingReplQueues ();
10611070 StartupProgress prog = NameNode .getStartupProgress ();
10621071 prog .beginPhase (Phase .SAFEMODE );
10631072 prog .setTotal (Phase .SAFEMODE , STEP_AWAITING_REPORTED_BLOCKS ,
10641073 getCompleteBlocksTotal ());
1074+ // TODO 设置安全模式
10651075 setBlockTotal ();
10661076 blockManager .activate (conf );
10671077 } finally {
@@ -5264,6 +5274,10 @@ private synchronized boolean canLeave() {
52645274 /**
52655275 * There is no need to enter safe mode
52665276 * if DFS is empty or {@link #threshold} == 0
5277+ * TODO 触发安全模式的三个条件:
5278+ * 1. complete block的个数高于threshold
5279+ * 2, datanode的个数高于 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY = "dfs.namenode.safemode.min.datanodes"
5280+ * 3. 存放fsimage editlog的目录下面的大小高于100m
52675281 */
52685282 private boolean needEnter () {
52695283 return (threshold != 0 && blockSafe < blockThreshold ) ||
@@ -5272,7 +5286,8 @@ private boolean needEnter() {
52725286 }
52735287
52745288 /**
5275- * Check and trigger safe mode if needed.
5289+ * Check and trigger safe mode if needed.
5290+ * TODO 检查是否进入安全模式
52765291 */
52775292 private void checkMode () {
52785293 // Have to have write-lock since leaving safemode initializes
@@ -5283,6 +5298,7 @@ private void checkMode() {
52835298 }
52845299 // if smmthread is already running, the block threshold must have been
52855300 // reached before, there is no need to enter the safe mode again
5301+ // TODO needEnter =》 检查是否进入安全模式
52865302 if (smmthread == null && needEnter ()) {
52875303 enter ();
52885304 // check if we are ready to initialize replication queues
@@ -5344,6 +5360,8 @@ private synchronized void setBlockTotal(int total) {
53445360 */
53455361 private synchronized void incrementSafeBlockCount (short replication ) {
53465362 if (replication == safeReplication ) {
5363+ // datanode 会进行block的汇报
5364+ // 每次汇报后blocksafe就会 自增
53475365 this .blockSafe ++;
53485366
53495367 // Report startup progress only if we haven't completed startup yet.
@@ -5679,6 +5697,7 @@ public void setBlockTotal() {
56795697 SafeModeInfo safeMode = this .safeMode ;
56805698 if (safeMode == null )
56815699 return ;
5700+ // getCompleteBlocksTotal 获取所有正常的block的个数
56825701 safeMode .setBlockTotal ((int )getCompleteBlocksTotal ());
56835702 }
56845703
@@ -5694,6 +5713,10 @@ public long getBlocksTotal() {
56945713 /**
56955714 * Get the total number of COMPLETE blocks in the system.
56965715 * For safe mode only complete blocks are counted.
5716+ *
5717+ * TODO 在HDFS集群里面block的状态分为两种类型
5718+ * 1)complete类型:正常的可用的block
5719+ * 2)underconstruction类型: 处于正在构建的block
56975720 */
56985721 private long getCompleteBlocksTotal () {
56995722 // Calculate number of blocks under construction
0 commit comments