001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.util.Time.monotonicNow; 021 022import java.io.DataInput; 023import java.io.DataInputStream; 024import java.io.DataOutputStream; 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.FileNotFoundException; 028import java.io.FileOutputStream; 029import java.io.IOException; 030import java.security.DigestInputStream; 031import java.security.DigestOutputStream; 032import java.security.MessageDigest; 033import java.util.ArrayList; 034import java.util.Arrays; 035import java.util.Collection; 036import java.util.HashMap; 037import java.util.List; 038import java.util.Map; 039import java.util.TreeMap; 040 041import org.apache.commons.logging.Log; 042import org.apache.hadoop.classification.InterfaceAudience; 043import org.apache.hadoop.classification.InterfaceStability; 044import org.apache.hadoop.conf.Configuration; 045import org.apache.hadoop.fs.FileSystem; 046import org.apache.hadoop.fs.Path; 047import org.apache.hadoop.fs.PathIsNotDirectoryException; 048import org.apache.hadoop.fs.UnresolvedLinkException; 049import org.apache.hadoop.fs.permission.PermissionStatus; 050import org.apache.hadoop.hdfs.DFSUtil; 051import org.apache.hadoop.hdfs.protocol.HdfsConstants; 052import org.apache.hadoop.hdfs.protocol.LayoutFlags; 053import org.apache.hadoop.hdfs.protocol.LayoutVersion; 054import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; 055import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous; 056import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction; 057import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 058import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 059import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; 060import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature; 061import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList; 062import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 063import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat; 064import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap; 065import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 066import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 067import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 068import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 069import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 070import org.apache.hadoop.hdfs.util.ReadOnlyList; 071import org.apache.hadoop.io.IOUtils; 072import org.apache.hadoop.io.MD5Hash; 073import org.apache.hadoop.io.Text; 074import org.apache.hadoop.util.StringUtils; 075 076import com.google.common.annotations.VisibleForTesting; 077import com.google.common.base.Preconditions; 078 079/** 080 * Contains inner classes for reading or writing the on-disk format for 081 * FSImages. 082 * 083 * In particular, the format of the FSImage looks like: 084 * <pre> 085 * FSImage { 086 * layoutVersion: int, namespaceID: int, numberItemsInFSDirectoryTree: long, 087 * namesystemGenerationStampV1: long, namesystemGenerationStampV2: long, 088 * generationStampAtBlockIdSwitch:long, lastAllocatedBlockId: 089 * long transactionID: long, snapshotCounter: int, numberOfSnapshots: int, 090 * numOfSnapshottableDirs: int, 091 * {FSDirectoryTree, FilesUnderConstruction, SecretManagerState} (can be compressed) 092 * } 093 * 094 * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported) { 095 * INodeInfo of root, numberOfChildren of root: int 096 * [list of INodeInfo of root's children], 097 * [list of INodeDirectoryInfo of root's directory children] 098 * } 099 * 100 * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} not supported){ 101 * [list of INodeInfo of INodes in topological order] 102 * } 103 * 104 * INodeInfo { 105 * { 106 * localName: short + byte[] 107 * } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported 108 * or 109 * { 110 * fullPath: byte[] 111 * } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is not supported 112 * replicationFactor: short, modificationTime: long, 113 * accessTime: long, preferredBlockSize: long, 114 * numberOfBlocks: int (-1 for INodeDirectory, -2 for INodeSymLink), 115 * { 116 * nsQuota: long, dsQuota: long, 117 * { 118 * isINodeSnapshottable: byte, 119 * isINodeWithSnapshot: byte (if isINodeSnapshottable is false) 120 * } (when {@link Feature#SNAPSHOT} is supported), 121 * fsPermission: short, PermissionStatus 122 * } for INodeDirectory 123 * or 124 * { 125 * symlinkString, fsPermission: short, PermissionStatus 126 * } for INodeSymlink 127 * or 128 * { 129 * [list of BlockInfo] 130 * [list of FileDiff] 131 * { 132 * isINodeFileUnderConstructionSnapshot: byte, 133 * {clientName: short + byte[], clientMachine: short + byte[]} (when 134 * isINodeFileUnderConstructionSnapshot is true), 135 * } (when {@link Feature#SNAPSHOT} is supported and writing snapshotINode), 136 * fsPermission: short, PermissionStatus 137 * } for INodeFile 138 * } 139 * 140 * INodeDirectoryInfo { 141 * fullPath of the directory: short + byte[], 142 * numberOfChildren: int, [list of INodeInfo of children INode], 143 * { 144 * numberOfSnapshots: int, 145 * [list of Snapshot] (when NumberOfSnapshots is positive), 146 * numberOfDirectoryDiffs: int, 147 * [list of DirectoryDiff] (NumberOfDirectoryDiffs is positive), 148 * number of children that are directories, 149 * [list of INodeDirectoryInfo of the directory children] (includes 150 * snapshot copies of deleted sub-directories) 151 * } (when {@link Feature#SNAPSHOT} is supported), 152 * } 153 * 154 * Snapshot { 155 * snapshotID: int, root of Snapshot: INodeDirectoryInfo (its local name is 156 * the name of the snapshot) 157 * } 158 * 159 * DirectoryDiff { 160 * full path of the root of the associated Snapshot: short + byte[], 161 * childrenSize: int, 162 * isSnapshotRoot: byte, 163 * snapshotINodeIsNotNull: byte (when isSnapshotRoot is false), 164 * snapshotINode: INodeDirectory (when SnapshotINodeIsNotNull is true), Diff 165 * } 166 * 167 * Diff { 168 * createdListSize: int, [Local name of INode in created list], 169 * deletedListSize: int, [INode in deleted list: INodeInfo] 170 * } 171 * 172 * FileDiff { 173 * full path of the root of the associated Snapshot: short + byte[], 174 * fileSize: long, 175 * snapshotINodeIsNotNull: byte, 176 * snapshotINode: INodeFile (when SnapshotINodeIsNotNull is true), Diff 177 * } 178 * </pre> 179 */ 180@InterfaceAudience.Private 181@InterfaceStability.Evolving 182public class FSImageFormat { 183 private static final Log LOG = FSImage.LOG; 184 185 // Static-only class 186 private FSImageFormat() {} 187 188 interface AbstractLoader { 189 MD5Hash getLoadedImageMd5(); 190 long getLoadedImageTxId(); 191 } 192 193 static class LoaderDelegator implements AbstractLoader { 194 private AbstractLoader impl; 195 private final Configuration conf; 196 private final FSNamesystem fsn; 197 198 LoaderDelegator(Configuration conf, FSNamesystem fsn) { 199 this.conf = conf; 200 this.fsn = fsn; 201 } 202 203 @Override 204 public MD5Hash getLoadedImageMd5() { 205 return impl.getLoadedImageMd5(); 206 } 207 208 @Override 209 public long getLoadedImageTxId() { 210 return impl.getLoadedImageTxId(); 211 } 212 213 public void load(File file, boolean requireSameLayoutVersion) 214 throws IOException { 215 Preconditions.checkState(impl == null, "Image already loaded!"); 216 217 FileInputStream is = null; 218 try { 219 is = new FileInputStream(file); 220 byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length]; 221 IOUtils.readFully(is, magic, 0, magic.length); 222 if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) { 223 FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader( 224 conf, fsn, requireSameLayoutVersion); 225 impl = loader; 226 loader.load(file); 227 } else { 228 Loader loader = new Loader(conf, fsn); 229 impl = loader; 230 loader.load(file); 231 } 232 } finally { 233 IOUtils.cleanup(LOG, is); 234 } 235 } 236 } 237 238 /** 239 * Construct a loader class to load the image. It chooses the loader based on 240 * the layout version. 241 */ 242 public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) { 243 return new LoaderDelegator(conf, fsn); 244 } 245 246 /** 247 * A one-shot class responsible for loading an image. The load() function 248 * should be called once, after which the getter methods may be used to retrieve 249 * information about the image that was loaded, if loading was successful. 250 */ 251 public static class Loader implements AbstractLoader { 252 private final Configuration conf; 253 /** which namesystem this loader is working for */ 254 private final FSNamesystem namesystem; 255 256 /** Set to true once a file has been loaded using this loader. */ 257 private boolean loaded = false; 258 259 /** The transaction ID of the last edit represented by the loaded file */ 260 private long imgTxId; 261 /** The MD5 sum of the loaded file */ 262 private MD5Hash imgDigest; 263 264 private Map<Integer, Snapshot> snapshotMap = null; 265 private final ReferenceMap referenceMap = new ReferenceMap(); 266 267 Loader(Configuration conf, FSNamesystem namesystem) { 268 this.conf = conf; 269 this.namesystem = namesystem; 270 } 271 272 /** 273 * Return the MD5 checksum of the image that has been loaded. 274 * @throws IllegalStateException if load() has not yet been called. 275 */ 276 @Override 277 public MD5Hash getLoadedImageMd5() { 278 checkLoaded(); 279 return imgDigest; 280 } 281 282 @Override 283 public long getLoadedImageTxId() { 284 checkLoaded(); 285 return imgTxId; 286 } 287 288 /** 289 * Throw IllegalStateException if load() has not yet been called. 290 */ 291 private void checkLoaded() { 292 if (!loaded) { 293 throw new IllegalStateException("Image not yet loaded!"); 294 } 295 } 296 297 /** 298 * Throw IllegalStateException if load() has already been called. 299 */ 300 private void checkNotLoaded() { 301 if (loaded) { 302 throw new IllegalStateException("Image already loaded!"); 303 } 304 } 305 306 public void load(File curFile) throws IOException { 307 checkNotLoaded(); 308 assert curFile != null : "curFile is null"; 309 310 StartupProgress prog = NameNode.getStartupProgress(); 311 Step step = new Step(StepType.INODES); 312 prog.beginStep(Phase.LOADING_FSIMAGE, step); 313 long startTime = monotonicNow(); 314 315 // 316 // Load in bits 317 // 318 MessageDigest digester = MD5Hash.getDigester(); 319 DigestInputStream fin = new DigestInputStream( 320 new FileInputStream(curFile), digester); 321 322 DataInputStream in = new DataInputStream(fin); 323 try { 324 // read image version: first appeared in version -1 325 int imgVersion = in.readInt(); 326 if (getLayoutVersion() != imgVersion) { 327 throw new InconsistentFSStateException(curFile, 328 "imgVersion " + imgVersion + 329 " expected to be " + getLayoutVersion()); 330 } 331 boolean supportSnapshot = NameNodeLayoutVersion.supports( 332 LayoutVersion.Feature.SNAPSHOT, imgVersion); 333 if (NameNodeLayoutVersion.supports( 334 LayoutVersion.Feature.ADD_LAYOUT_FLAGS, imgVersion)) { 335 LayoutFlags.read(in); 336 } 337 338 // read namespaceID: first appeared in version -2 339 in.readInt(); 340 341 long numFiles = in.readLong(); 342 343 // read in the last generation stamp for legacy blocks. 344 long genstamp = in.readLong(); 345 namesystem.getBlockIdManager().setGenerationStampV1(genstamp); 346 347 if (NameNodeLayoutVersion.supports( 348 LayoutVersion.Feature.SEQUENTIAL_BLOCK_ID, imgVersion)) { 349 // read the starting generation stamp for sequential block IDs 350 genstamp = in.readLong(); 351 namesystem.getBlockIdManager().setGenerationStampV2(genstamp); 352 353 // read the last generation stamp for blocks created after 354 // the switch to sequential block IDs. 355 long stampAtIdSwitch = in.readLong(); 356 namesystem.getBlockIdManager().setGenerationStampV1Limit(stampAtIdSwitch); 357 358 // read the max sequential block ID. 359 long maxSequentialBlockId = in.readLong(); 360 namesystem.getBlockIdManager().setLastAllocatedBlockId(maxSequentialBlockId); 361 } else { 362 363 long startingGenStamp = namesystem.getBlockIdManager() 364 .upgradeGenerationStampToV2(); 365 // This is an upgrade. 366 LOG.info("Upgrading to sequential block IDs. Generation stamp " + 367 "for new blocks set to " + startingGenStamp); 368 } 369 370 // read the transaction ID of the last edit represented by 371 // this image 372 if (NameNodeLayoutVersion.supports( 373 LayoutVersion.Feature.STORED_TXIDS, imgVersion)) { 374 imgTxId = in.readLong(); 375 } else { 376 imgTxId = 0; 377 } 378 379 // read the last allocated inode id in the fsimage 380 if (NameNodeLayoutVersion.supports( 381 LayoutVersion.Feature.ADD_INODE_ID, imgVersion)) { 382 long lastInodeId = in.readLong(); 383 namesystem.dir.resetLastInodeId(lastInodeId); 384 if (LOG.isDebugEnabled()) { 385 LOG.debug("load last allocated InodeId from fsimage:" + lastInodeId); 386 } 387 } else { 388 if (LOG.isDebugEnabled()) { 389 LOG.debug("Old layout version doesn't have inode id." 390 + " Will assign new id for each inode."); 391 } 392 } 393 394 if (supportSnapshot) { 395 snapshotMap = namesystem.getSnapshotManager().read(in, this); 396 } 397 398 // read compression related info 399 FSImageCompression compression; 400 if (NameNodeLayoutVersion.supports( 401 LayoutVersion.Feature.FSIMAGE_COMPRESSION, imgVersion)) { 402 compression = FSImageCompression.readCompressionHeader(conf, in); 403 } else { 404 compression = FSImageCompression.createNoopCompression(); 405 } 406 in = compression.unwrapInputStream(fin); 407 408 LOG.info("Loading image file " + curFile + " using " + compression); 409 410 // load all inodes 411 LOG.info("Number of files = " + numFiles); 412 prog.setTotal(Phase.LOADING_FSIMAGE, step, numFiles); 413 Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); 414 if (NameNodeLayoutVersion.supports( 415 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, imgVersion)) { 416 if (supportSnapshot) { 417 loadLocalNameINodesWithSnapshot(numFiles, in, counter); 418 } else { 419 loadLocalNameINodes(numFiles, in, counter); 420 } 421 } else { 422 loadFullNameINodes(numFiles, in, counter); 423 } 424 425 loadFilesUnderConstruction(in, supportSnapshot, counter); 426 prog.endStep(Phase.LOADING_FSIMAGE, step); 427 // Now that the step is finished, set counter equal to total to adjust 428 // for possible under-counting due to reference inodes. 429 prog.setCount(Phase.LOADING_FSIMAGE, step, numFiles); 430 431 loadSecretManagerState(in); 432 433 loadCacheManagerState(in); 434 435 // make sure to read to the end of file 436 boolean eof = (in.read() == -1); 437 assert eof : "Should have reached the end of image file " + curFile; 438 } finally { 439 in.close(); 440 } 441 442 imgDigest = new MD5Hash(digester.digest()); 443 loaded = true; 444 445 LOG.info("Image file " + curFile + " of size " + curFile.length() 446 + " bytes loaded in " + (monotonicNow() - startTime) / 1000 447 + " seconds."); 448 } 449 450 /** Update the root node's attributes */ 451 private void updateRootAttr(INodeWithAdditionalFields root) { 452 final QuotaCounts q = root.getQuotaCounts(); 453 final long nsQuota = q.getNameSpace(); 454 final long dsQuota = q.getStorageSpace(); 455 FSDirectory fsDir = namesystem.dir; 456 if (nsQuota != -1 || dsQuota != -1) { 457 fsDir.rootDir.getDirectoryWithQuotaFeature().setQuota(nsQuota, dsQuota); 458 } 459 fsDir.rootDir.cloneModificationTime(root); 460 fsDir.rootDir.clonePermissionStatus(root); 461 } 462 463 /** 464 * Load fsimage files when 1) only local names are stored, 465 * and 2) snapshot is supported. 466 * 467 * @param numFiles number of files expected to be read 468 * @param in Image input stream 469 * @param counter Counter to increment for namenode startup progress 470 */ 471 private void loadLocalNameINodesWithSnapshot(long numFiles, DataInput in, 472 Counter counter) throws IOException { 473 assert NameNodeLayoutVersion.supports( 474 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion()); 475 assert NameNodeLayoutVersion.supports( 476 LayoutVersion.Feature.SNAPSHOT, getLayoutVersion()); 477 478 // load root 479 loadRoot(in, counter); 480 // load rest of the nodes recursively 481 loadDirectoryWithSnapshot(in, counter); 482 } 483 484 /** 485 * load fsimage files assuming only local names are stored. Used when 486 * snapshots are not supported by the layout version. 487 * 488 * @param numFiles number of files expected to be read 489 * @param in image input stream 490 * @param counter Counter to increment for namenode startup progress 491 * @throws IOException 492 */ 493 private void loadLocalNameINodes(long numFiles, DataInput in, Counter counter) 494 throws IOException { 495 assert NameNodeLayoutVersion.supports( 496 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion()); 497 assert numFiles > 0; 498 499 // load root 500 loadRoot(in, counter); 501 // have loaded the first file (the root) 502 numFiles--; 503 504 // load rest of the nodes directory by directory 505 while (numFiles > 0) { 506 numFiles -= loadDirectory(in, counter); 507 } 508 if (numFiles != 0) { 509 throw new IOException("Read unexpect number of files: " + -numFiles); 510 } 511 } 512 513 /** 514 * Load information about root, and use the information to update the root 515 * directory of NameSystem. 516 * @param in The {@link DataInput} instance to read. 517 * @param counter Counter to increment for namenode startup progress 518 */ 519 private void loadRoot(DataInput in, Counter counter) 520 throws IOException { 521 // load root 522 if (in.readShort() != 0) { 523 throw new IOException("First node is not root"); 524 } 525 final INodeDirectory root = loadINode(null, false, in, counter) 526 .asDirectory(); 527 // update the root's attributes 528 updateRootAttr(root); 529 } 530 531 /** Load children nodes for the parent directory. */ 532 private int loadChildren(INodeDirectory parent, DataInput in, 533 Counter counter) throws IOException { 534 int numChildren = in.readInt(); 535 for (int i = 0; i < numChildren; i++) { 536 // load single inode 537 INode newNode = loadINodeWithLocalName(false, in, true, counter); 538 addToParent(parent, newNode); 539 } 540 return numChildren; 541 } 542 543 /** 544 * Load a directory when snapshot is supported. 545 * @param in The {@link DataInput} instance to read. 546 * @param counter Counter to increment for namenode startup progress 547 */ 548 private void loadDirectoryWithSnapshot(DataInput in, Counter counter) 549 throws IOException { 550 // Step 1. Identify the parent INode 551 long inodeId = in.readLong(); 552 final INodeDirectory parent = this.namesystem.dir.getInode(inodeId) 553 .asDirectory(); 554 555 // Check if the whole subtree has been saved (for reference nodes) 556 boolean toLoadSubtree = referenceMap.toProcessSubtree(parent.getId()); 557 if (!toLoadSubtree) { 558 return; 559 } 560 561 // Step 2. Load snapshots if parent is snapshottable 562 int numSnapshots = in.readInt(); 563 if (numSnapshots >= 0) { 564 // load snapshots and snapshotQuota 565 SnapshotFSImageFormat.loadSnapshotList(parent, numSnapshots, in, this); 566 if (parent.getDirectorySnapshottableFeature().getSnapshotQuota() > 0) { 567 // add the directory to the snapshottable directory list in 568 // SnapshotManager. Note that we only add root when its snapshot quota 569 // is positive. 570 this.namesystem.getSnapshotManager().addSnapshottable(parent); 571 } 572 } 573 574 // Step 3. Load children nodes under parent 575 loadChildren(parent, in, counter); 576 577 // Step 4. load Directory Diff List 578 SnapshotFSImageFormat.loadDirectoryDiffList(parent, in, this); 579 580 // Recursively load sub-directories, including snapshot copies of deleted 581 // directories 582 int numSubTree = in.readInt(); 583 for (int i = 0; i < numSubTree; i++) { 584 loadDirectoryWithSnapshot(in, counter); 585 } 586 } 587 588 /** 589 * Load all children of a directory 590 * 591 * @param in input to load from 592 * @param counter Counter to increment for namenode startup progress 593 * @return number of child inodes read 594 * @throws IOException 595 */ 596 private int loadDirectory(DataInput in, Counter counter) throws IOException { 597 String parentPath = FSImageSerialization.readString(in); 598 // Rename .snapshot paths if we're doing an upgrade 599 parentPath = renameReservedPathsOnUpgrade(parentPath, getLayoutVersion()); 600 final INodeDirectory parent = INodeDirectory.valueOf( 601 namesystem.dir.getINode(parentPath, true), parentPath); 602 return loadChildren(parent, in, counter); 603 } 604 605 /** 606 * load fsimage files assuming full path names are stored 607 * 608 * @param numFiles total number of files to load 609 * @param in data input stream 610 * @param counter Counter to increment for namenode startup progress 611 * @throws IOException if any error occurs 612 */ 613 private void loadFullNameINodes(long numFiles, DataInput in, Counter counter) 614 throws IOException { 615 byte[][] pathComponents; 616 byte[][] parentPath = {{}}; 617 FSDirectory fsDir = namesystem.dir; 618 INodeDirectory parentINode = fsDir.rootDir; 619 for (long i = 0; i < numFiles; i++) { 620 pathComponents = FSImageSerialization.readPathComponents(in); 621 for (int j=0; j < pathComponents.length; j++) { 622 byte[] newComponent = renameReservedComponentOnUpgrade 623 (pathComponents[j], getLayoutVersion()); 624 if (!Arrays.equals(newComponent, pathComponents[j])) { 625 String oldPath = DFSUtil.byteArray2PathString(pathComponents); 626 pathComponents[j] = newComponent; 627 String newPath = DFSUtil.byteArray2PathString(pathComponents); 628 LOG.info("Renaming reserved path " + oldPath + " to " + newPath); 629 } 630 } 631 final INode newNode = loadINode( 632 pathComponents[pathComponents.length-1], false, in, counter); 633 634 if (isRoot(pathComponents)) { // it is the root 635 // update the root's attributes 636 updateRootAttr(newNode.asDirectory()); 637 continue; 638 } 639 640 namesystem.dir.addToInodeMap(newNode); 641 // check if the new inode belongs to the same parent 642 if(!isParent(pathComponents, parentPath)) { 643 parentINode = getParentINodeDirectory(pathComponents); 644 parentPath = getParent(pathComponents); 645 } 646 647 // add new inode 648 addToParent(parentINode, newNode); 649 } 650 } 651 652 private INodeDirectory getParentINodeDirectory(byte[][] pathComponents 653 ) throws FileNotFoundException, PathIsNotDirectoryException, 654 UnresolvedLinkException { 655 if (pathComponents.length < 2) { // root 656 return null; 657 } 658 // Gets the parent INode 659 final INodesInPath inodes = namesystem.dir.getExistingPathINodes( 660 pathComponents); 661 return INodeDirectory.valueOf(inodes.getINode(-2), pathComponents); 662 } 663 664 /** 665 * Add the child node to parent and, if child is a file, update block map. 666 * This method is only used for image loading so that synchronization, 667 * modification time update and space count update are not needed. 668 */ 669 private void addToParent(INodeDirectory parent, INode child) { 670 FSDirectory fsDir = namesystem.dir; 671 if (parent == fsDir.rootDir) { 672 child.setLocalName(renameReservedRootComponentOnUpgrade( 673 child.getLocalNameBytes(), getLayoutVersion())); 674 } 675 // NOTE: This does not update space counts for parents 676 if (!parent.addChild(child)) { 677 return; 678 } 679 namesystem.dir.cacheName(child); 680 681 if (child.isFile()) { 682 updateBlocksMap(child.asFile()); 683 } 684 } 685 686 public void updateBlocksMap(INodeFile file) { 687 // Add file->block mapping 688 final BlockInfoContiguous[] blocks = file.getBlocks(); 689 if (blocks != null) { 690 final BlockManager bm = namesystem.getBlockManager(); 691 for (int i = 0; i < blocks.length; i++) { 692 file.setBlock(i, bm.addBlockCollection(blocks[i], file)); 693 } 694 } 695 } 696 697 /** @return The FSDirectory of the namesystem where the fsimage is loaded */ 698 public FSDirectory getFSDirectoryInLoading() { 699 return namesystem.dir; 700 } 701 702 public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in, 703 boolean updateINodeMap) throws IOException { 704 return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null); 705 } 706 707 public INode loadINodeWithLocalName(boolean isSnapshotINode, 708 DataInput in, boolean updateINodeMap, Counter counter) 709 throws IOException { 710 byte[] localName = FSImageSerialization.readLocalName(in); 711 localName = 712 renameReservedComponentOnUpgrade(localName, getLayoutVersion()); 713 INode inode = loadINode(localName, isSnapshotINode, in, counter); 714 if (updateINodeMap) { 715 namesystem.dir.addToInodeMap(inode); 716 } 717 return inode; 718 } 719 720 /** 721 * load an inode from fsimage except for its name 722 * 723 * @param in data input stream from which image is read 724 * @param counter Counter to increment for namenode startup progress 725 * @return an inode 726 */ 727 @SuppressWarnings("deprecation") 728 INode loadINode(final byte[] localName, boolean isSnapshotINode, 729 DataInput in, Counter counter) throws IOException { 730 final int imgVersion = getLayoutVersion(); 731 if (NameNodeLayoutVersion.supports( 732 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 733 namesystem.getFSDirectory().verifyINodeName(localName); 734 } 735 736 long inodeId = NameNodeLayoutVersion.supports( 737 LayoutVersion.Feature.ADD_INODE_ID, imgVersion) ? in.readLong() 738 : namesystem.dir.allocateNewInodeId(); 739 740 final short replication = namesystem.getBlockManager().adjustReplication( 741 in.readShort()); 742 final long modificationTime = in.readLong(); 743 long atime = 0; 744 if (NameNodeLayoutVersion.supports( 745 LayoutVersion.Feature.FILE_ACCESS_TIME, imgVersion)) { 746 atime = in.readLong(); 747 } 748 final long blockSize = in.readLong(); 749 final int numBlocks = in.readInt(); 750 751 if (numBlocks >= 0) { 752 // file 753 754 // read blocks 755 BlockInfoContiguous[] blocks = new BlockInfoContiguous[numBlocks]; 756 for (int j = 0; j < numBlocks; j++) { 757 blocks[j] = new BlockInfoContiguous(replication); 758 blocks[j].readFields(in); 759 } 760 761 String clientName = ""; 762 String clientMachine = ""; 763 boolean underConstruction = false; 764 FileDiffList fileDiffs = null; 765 if (NameNodeLayoutVersion.supports( 766 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 767 // read diffs 768 fileDiffs = SnapshotFSImageFormat.loadFileDiffList(in, this); 769 770 if (isSnapshotINode) { 771 underConstruction = in.readBoolean(); 772 if (underConstruction) { 773 clientName = FSImageSerialization.readString(in); 774 clientMachine = FSImageSerialization.readString(in); 775 // convert the last block to BlockUC 776 if (blocks.length > 0) { 777 BlockInfoContiguous lastBlk = blocks[blocks.length - 1]; 778 blocks[blocks.length - 1] = new BlockInfoContiguousUnderConstruction( 779 lastBlk, replication); 780 } 781 } 782 } 783 } 784 785 final PermissionStatus permissions = PermissionStatus.read(in); 786 787 // return 788 if (counter != null) { 789 counter.increment(); 790 } 791 792 final INodeFile file = new INodeFile(inodeId, localName, permissions, 793 modificationTime, atime, blocks, replication, blockSize, (byte)0); 794 if (underConstruction) { 795 file.toUnderConstruction(clientName, clientMachine); 796 } 797 return fileDiffs == null ? file : new INodeFile(file, fileDiffs); 798 } else if (numBlocks == -1) { 799 //directory 800 801 //read quotas 802 final long nsQuota = in.readLong(); 803 long dsQuota = -1L; 804 if (NameNodeLayoutVersion.supports( 805 LayoutVersion.Feature.DISKSPACE_QUOTA, imgVersion)) { 806 dsQuota = in.readLong(); 807 } 808 809 //read snapshot info 810 boolean snapshottable = false; 811 boolean withSnapshot = false; 812 if (NameNodeLayoutVersion.supports( 813 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 814 snapshottable = in.readBoolean(); 815 if (!snapshottable) { 816 withSnapshot = in.readBoolean(); 817 } 818 } 819 820 final PermissionStatus permissions = PermissionStatus.read(in); 821 822 //return 823 if (counter != null) { 824 counter.increment(); 825 } 826 final INodeDirectory dir = new INodeDirectory(inodeId, localName, 827 permissions, modificationTime); 828 if (nsQuota >= 0 || dsQuota >= 0) { 829 dir.addDirectoryWithQuotaFeature(new DirectoryWithQuotaFeature.Builder(). 830 nameSpaceQuota(nsQuota).storageSpaceQuota(dsQuota).build()); 831 } 832 if (withSnapshot) { 833 dir.addSnapshotFeature(null); 834 } 835 if (snapshottable) { 836 dir.addSnapshottableFeature(); 837 } 838 return dir; 839 } else if (numBlocks == -2) { 840 //symlink 841 if (!FileSystem.areSymlinksEnabled()) { 842 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS"); 843 } 844 845 final String symlink = Text.readString(in); 846 final PermissionStatus permissions = PermissionStatus.read(in); 847 if (counter != null) { 848 counter.increment(); 849 } 850 return new INodeSymlink(inodeId, localName, permissions, 851 modificationTime, atime, symlink); 852 } else if (numBlocks == -3) { 853 //reference 854 // Intentionally do not increment counter, because it is too difficult at 855 // this point to assess whether or not this is a reference that counts 856 // toward quota. 857 858 final boolean isWithName = in.readBoolean(); 859 // lastSnapshotId for WithName node, dstSnapshotId for DstReference node 860 int snapshotId = in.readInt(); 861 862 final INodeReference.WithCount withCount 863 = referenceMap.loadINodeReferenceWithCount(isSnapshotINode, in, this); 864 865 if (isWithName) { 866 return new INodeReference.WithName(null, withCount, localName, 867 snapshotId); 868 } else { 869 final INodeReference ref = new INodeReference.DstReference(null, 870 withCount, snapshotId); 871 return ref; 872 } 873 } 874 875 throw new IOException("Unknown inode type: numBlocks=" + numBlocks); 876 } 877 878 /** Load {@link INodeFileAttributes}. */ 879 public INodeFileAttributes loadINodeFileAttributes(DataInput in) 880 throws IOException { 881 final int layoutVersion = getLayoutVersion(); 882 883 if (!NameNodeLayoutVersion.supports( 884 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) { 885 return loadINodeWithLocalName(true, in, false).asFile(); 886 } 887 888 final byte[] name = FSImageSerialization.readLocalName(in); 889 final PermissionStatus permissions = PermissionStatus.read(in); 890 final long modificationTime = in.readLong(); 891 final long accessTime = in.readLong(); 892 893 final short replication = namesystem.getBlockManager().adjustReplication( 894 in.readShort()); 895 final long preferredBlockSize = in.readLong(); 896 897 return new INodeFileAttributes.SnapshotCopy(name, permissions, null, modificationTime, 898 accessTime, replication, preferredBlockSize, (byte) 0, null); 899 } 900 901 public INodeDirectoryAttributes loadINodeDirectoryAttributes(DataInput in) 902 throws IOException { 903 final int layoutVersion = getLayoutVersion(); 904 905 if (!NameNodeLayoutVersion.supports( 906 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) { 907 return loadINodeWithLocalName(true, in, false).asDirectory(); 908 } 909 910 final byte[] name = FSImageSerialization.readLocalName(in); 911 final PermissionStatus permissions = PermissionStatus.read(in); 912 final long modificationTime = in.readLong(); 913 914 // Read quotas: quota by storage type does not need to be processed below. 915 // It is handled only in protobuf based FsImagePBINode class for newer 916 // fsImages. Tools using this class such as legacy-mode of offline image viewer 917 // should only load legacy FSImages without newer features. 918 final long nsQuota = in.readLong(); 919 final long dsQuota = in.readLong(); 920 921 return nsQuota == -1L && dsQuota == -1L ? new INodeDirectoryAttributes.SnapshotCopy( 922 name, permissions, null, modificationTime, null) 923 : new INodeDirectoryAttributes.CopyWithQuota(name, permissions, 924 null, modificationTime, nsQuota, dsQuota, null, null); 925 } 926 927 private void loadFilesUnderConstruction(DataInput in, 928 boolean supportSnapshot, Counter counter) throws IOException { 929 FSDirectory fsDir = namesystem.dir; 930 int size = in.readInt(); 931 932 LOG.info("Number of files under construction = " + size); 933 934 for (int i = 0; i < size; i++) { 935 INodeFile cons = FSImageSerialization.readINodeUnderConstruction(in, 936 namesystem, getLayoutVersion()); 937 counter.increment(); 938 939 // verify that file exists in namespace 940 String path = cons.getLocalName(); 941 INodeFile oldnode = null; 942 boolean inSnapshot = false; 943 if (path != null && FSDirectory.isReservedName(path) && 944 NameNodeLayoutVersion.supports( 945 LayoutVersion.Feature.ADD_INODE_ID, getLayoutVersion())) { 946 // TODO: for HDFS-5428, we use reserved path for those INodeFileUC in 947 // snapshot. If we support INode ID in the layout version, we can use 948 // the inode id to find the oldnode. 949 oldnode = namesystem.dir.getInode(cons.getId()).asFile(); 950 inSnapshot = true; 951 } else { 952 path = renameReservedPathsOnUpgrade(path, getLayoutVersion()); 953 final INodesInPath iip = fsDir.getINodesInPath(path, true); 954 oldnode = INodeFile.valueOf(iip.getLastINode(), path); 955 } 956 957 FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature(); 958 oldnode.toUnderConstruction(uc.getClientName(), uc.getClientMachine()); 959 if (oldnode.numBlocks() > 0) { 960 BlockInfoContiguous ucBlock = cons.getLastBlock(); 961 // we do not replace the inode, just replace the last block of oldnode 962 BlockInfoContiguous info = namesystem.getBlockManager().addBlockCollection( 963 ucBlock, oldnode); 964 oldnode.setBlock(oldnode.numBlocks() - 1, info); 965 } 966 967 if (!inSnapshot) { 968 namesystem.leaseManager.addLease(cons 969 .getFileUnderConstructionFeature().getClientName(), path); 970 } 971 } 972 } 973 974 private void loadSecretManagerState(DataInput in) 975 throws IOException { 976 int imgVersion = getLayoutVersion(); 977 978 if (!NameNodeLayoutVersion.supports( 979 LayoutVersion.Feature.DELEGATION_TOKEN, imgVersion)) { 980 //SecretManagerState is not available. 981 //This must not happen if security is turned on. 982 return; 983 } 984 namesystem.loadSecretManagerStateCompat(in); 985 } 986 987 private void loadCacheManagerState(DataInput in) throws IOException { 988 int imgVersion = getLayoutVersion(); 989 if (!NameNodeLayoutVersion.supports( 990 LayoutVersion.Feature.CACHING, imgVersion)) { 991 return; 992 } 993 namesystem.getCacheManager().loadStateCompat(in); 994 } 995 996 private int getLayoutVersion() { 997 return namesystem.getFSImage().getStorage().getLayoutVersion(); 998 } 999 1000 private boolean isRoot(byte[][] path) { 1001 return path.length == 1 && 1002 path[0] == null; 1003 } 1004 1005 private boolean isParent(byte[][] path, byte[][] parent) { 1006 if (path == null || parent == null) 1007 return false; 1008 if (parent.length == 0 || path.length != parent.length + 1) 1009 return false; 1010 boolean isParent = true; 1011 for (int i = 0; i < parent.length; i++) { 1012 isParent = isParent && Arrays.equals(path[i], parent[i]); 1013 } 1014 return isParent; 1015 } 1016 1017 /** 1018 * Return string representing the parent of the given path. 1019 */ 1020 String getParent(String path) { 1021 return path.substring(0, path.lastIndexOf(Path.SEPARATOR)); 1022 } 1023 1024 byte[][] getParent(byte[][] path) { 1025 byte[][] result = new byte[path.length - 1][]; 1026 for (int i = 0; i < result.length; i++) { 1027 result[i] = new byte[path[i].length]; 1028 System.arraycopy(path[i], 0, result[i], 0, path[i].length); 1029 } 1030 return result; 1031 } 1032 1033 public Snapshot getSnapshot(DataInput in) throws IOException { 1034 return snapshotMap.get(in.readInt()); 1035 } 1036 } 1037 1038 @VisibleForTesting 1039 public static final TreeMap<String, String> renameReservedMap = 1040 new TreeMap<String, String>(); 1041 1042 /** 1043 * Use the default key-value pairs that will be used to determine how to 1044 * rename reserved paths on upgrade. 1045 */ 1046 @VisibleForTesting 1047 public static void useDefaultRenameReservedPairs() { 1048 renameReservedMap.clear(); 1049 for (String key: HdfsConstants.RESERVED_PATH_COMPONENTS) { 1050 renameReservedMap.put( 1051 key, 1052 key + "." + HdfsConstants.NAMENODE_LAYOUT_VERSION + "." 1053 + "UPGRADE_RENAMED"); 1054 } 1055 } 1056 1057 /** 1058 * Set the key-value pairs that will be used to determine how to rename 1059 * reserved paths on upgrade. 1060 */ 1061 @VisibleForTesting 1062 public static void setRenameReservedPairs(String renameReserved) { 1063 // Clear and set the default values 1064 useDefaultRenameReservedPairs(); 1065 // Overwrite with provided values 1066 setRenameReservedMapInternal(renameReserved); 1067 } 1068 1069 private static void setRenameReservedMapInternal(String renameReserved) { 1070 Collection<String> pairs = 1071 StringUtils.getTrimmedStringCollection(renameReserved); 1072 for (String p : pairs) { 1073 String[] pair = StringUtils.split(p, '/', '='); 1074 Preconditions.checkArgument(pair.length == 2, 1075 "Could not parse key-value pair " + p); 1076 String key = pair[0]; 1077 String value = pair[1]; 1078 Preconditions.checkArgument(DFSUtil.isReservedPathComponent(key), 1079 "Unknown reserved path " + key); 1080 Preconditions.checkArgument(DFSUtil.isValidNameForComponent(value), 1081 "Invalid rename path for " + key + ": " + value); 1082 LOG.info("Will rename reserved path " + key + " to " + value); 1083 renameReservedMap.put(key, value); 1084 } 1085 } 1086 1087 /** 1088 * When upgrading from an old version, the filesystem could contain paths 1089 * that are now reserved in the new version (e.g. .snapshot). This renames 1090 * these new reserved paths to a user-specified value to avoid collisions 1091 * with the reserved name. 1092 * 1093 * @param path Old path potentially containing a reserved path 1094 * @return New path with reserved path components renamed to user value 1095 */ 1096 static String renameReservedPathsOnUpgrade(String path, 1097 final int layoutVersion) { 1098 final String oldPath = path; 1099 // If any known LVs aren't supported, we're doing an upgrade 1100 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) { 1101 String[] components = INode.getPathNames(path); 1102 // Only need to worry about the root directory 1103 if (components.length > 1) { 1104 components[1] = DFSUtil.bytes2String( 1105 renameReservedRootComponentOnUpgrade( 1106 DFSUtil.string2Bytes(components[1]), 1107 layoutVersion)); 1108 path = DFSUtil.strings2PathString(components); 1109 } 1110 } 1111 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) { 1112 String[] components = INode.getPathNames(path); 1113 // Special case the root path 1114 if (components.length == 0) { 1115 return path; 1116 } 1117 for (int i=0; i<components.length; i++) { 1118 components[i] = DFSUtil.bytes2String( 1119 renameReservedComponentOnUpgrade( 1120 DFSUtil.string2Bytes(components[i]), 1121 layoutVersion)); 1122 } 1123 path = DFSUtil.strings2PathString(components); 1124 } 1125 1126 if (!path.equals(oldPath)) { 1127 LOG.info("Upgrade process renamed reserved path " + oldPath + " to " 1128 + path); 1129 } 1130 return path; 1131 } 1132 1133 private final static String RESERVED_ERROR_MSG = 1134 FSDirectory.DOT_RESERVED_PATH_PREFIX + " is a reserved path and " 1135 + HdfsConstants.DOT_SNAPSHOT_DIR + " is a reserved path component in" 1136 + " this version of HDFS. Please rollback and delete or rename" 1137 + " this path, or upgrade with the " 1138 + StartupOption.RENAMERESERVED.getName() 1139 + " [key-value pairs]" 1140 + " option to automatically rename these paths during upgrade."; 1141 1142 /** 1143 * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single 1144 * byte array path component. 1145 */ 1146 private static byte[] renameReservedComponentOnUpgrade(byte[] component, 1147 final int layoutVersion) { 1148 // If the LV doesn't support snapshots, we're doing an upgrade 1149 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) { 1150 if (Arrays.equals(component, HdfsConstants.DOT_SNAPSHOT_DIR_BYTES)) { 1151 Preconditions.checkArgument( 1152 renameReservedMap.containsKey(HdfsConstants.DOT_SNAPSHOT_DIR), 1153 RESERVED_ERROR_MSG); 1154 component = 1155 DFSUtil.string2Bytes(renameReservedMap 1156 .get(HdfsConstants.DOT_SNAPSHOT_DIR)); 1157 } 1158 } 1159 return component; 1160 } 1161 1162 /** 1163 * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single 1164 * byte array path component. 1165 */ 1166 private static byte[] renameReservedRootComponentOnUpgrade(byte[] component, 1167 final int layoutVersion) { 1168 // If the LV doesn't support inode IDs, we're doing an upgrade 1169 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) { 1170 if (Arrays.equals(component, FSDirectory.DOT_RESERVED)) { 1171 Preconditions.checkArgument( 1172 renameReservedMap.containsKey(FSDirectory.DOT_RESERVED_STRING), 1173 RESERVED_ERROR_MSG); 1174 final String renameString = renameReservedMap 1175 .get(FSDirectory.DOT_RESERVED_STRING); 1176 component = 1177 DFSUtil.string2Bytes(renameString); 1178 LOG.info("Renamed root path " + FSDirectory.DOT_RESERVED_STRING 1179 + " to " + renameString); 1180 } 1181 } 1182 return component; 1183 } 1184 1185 /** 1186 * A one-shot class responsible for writing an image file. 1187 * The write() function should be called once, after which the getter 1188 * functions may be used to retrieve information about the file that was written. 1189 * 1190 * This is replaced by the PB-based FSImage. The class is to maintain 1191 * compatibility for the external fsimage tool. 1192 */ 1193 @Deprecated 1194 static class Saver { 1195 private static final int LAYOUT_VERSION = -51; 1196 public static final int CHECK_CANCEL_INTERVAL = 4096; 1197 private final SaveNamespaceContext context; 1198 /** Set to true once an image has been written */ 1199 private boolean saved = false; 1200 private long checkCancelCounter = 0; 1201 1202 /** The MD5 checksum of the file that was written */ 1203 private MD5Hash savedDigest; 1204 private final ReferenceMap referenceMap = new ReferenceMap(); 1205 1206 private final Map<Long, INodeFile> snapshotUCMap = 1207 new HashMap<Long, INodeFile>(); 1208 1209 /** @throws IllegalStateException if the instance has not yet saved an image */ 1210 private void checkSaved() { 1211 if (!saved) { 1212 throw new IllegalStateException("FSImageSaver has not saved an image"); 1213 } 1214 } 1215 1216 /** @throws IllegalStateException if the instance has already saved an image */ 1217 private void checkNotSaved() { 1218 if (saved) { 1219 throw new IllegalStateException("FSImageSaver has already saved an image"); 1220 } 1221 } 1222 1223 1224 Saver(SaveNamespaceContext context) { 1225 this.context = context; 1226 } 1227 1228 /** 1229 * Return the MD5 checksum of the image file that was saved. 1230 */ 1231 MD5Hash getSavedDigest() { 1232 checkSaved(); 1233 return savedDigest; 1234 } 1235 1236 void save(File newFile, FSImageCompression compression) throws IOException { 1237 checkNotSaved(); 1238 1239 final FSNamesystem sourceNamesystem = context.getSourceNamesystem(); 1240 final INodeDirectory rootDir = sourceNamesystem.dir.rootDir; 1241 final long numINodes = rootDir.getDirectoryWithQuotaFeature() 1242 .getSpaceConsumed().getNameSpace(); 1243 String sdPath = newFile.getParentFile().getParentFile().getAbsolutePath(); 1244 Step step = new Step(StepType.INODES, sdPath); 1245 StartupProgress prog = NameNode.getStartupProgress(); 1246 prog.beginStep(Phase.SAVING_CHECKPOINT, step); 1247 prog.setTotal(Phase.SAVING_CHECKPOINT, step, numINodes); 1248 Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); 1249 long startTime = monotonicNow(); 1250 // 1251 // Write out data 1252 // 1253 MessageDigest digester = MD5Hash.getDigester(); 1254 FileOutputStream fout = new FileOutputStream(newFile); 1255 DigestOutputStream fos = new DigestOutputStream(fout, digester); 1256 DataOutputStream out = new DataOutputStream(fos); 1257 try { 1258 out.writeInt(LAYOUT_VERSION); 1259 LayoutFlags.write(out); 1260 // We use the non-locked version of getNamespaceInfo here since 1261 // the coordinating thread of saveNamespace already has read-locked 1262 // the namespace for us. If we attempt to take another readlock 1263 // from the actual saver thread, there's a potential of a 1264 // fairness-related deadlock. See the comments on HDFS-2223. 1265 out.writeInt(sourceNamesystem.unprotectedGetNamespaceInfo() 1266 .getNamespaceID()); 1267 out.writeLong(numINodes); 1268 out.writeLong(sourceNamesystem.getBlockIdManager().getGenerationStampV1()); 1269 out.writeLong(sourceNamesystem.getBlockIdManager().getGenerationStampV2()); 1270 out.writeLong(sourceNamesystem.getBlockIdManager().getGenerationStampAtblockIdSwitch()); 1271 out.writeLong(sourceNamesystem.getBlockIdManager().getLastAllocatedBlockId()); 1272 out.writeLong(context.getTxId()); 1273 out.writeLong(sourceNamesystem.dir.getLastInodeId()); 1274 1275 1276 sourceNamesystem.getSnapshotManager().write(out); 1277 1278 // write compression info and set up compressed stream 1279 out = compression.writeHeaderAndWrapStream(fos); 1280 LOG.info("Saving image file " + newFile + 1281 " using " + compression); 1282 1283 // save the root 1284 saveINode2Image(rootDir, out, false, referenceMap, counter); 1285 // save the rest of the nodes 1286 saveImage(rootDir, out, true, false, counter); 1287 prog.endStep(Phase.SAVING_CHECKPOINT, step); 1288 // Now that the step is finished, set counter equal to total to adjust 1289 // for possible under-counting due to reference inodes. 1290 prog.setCount(Phase.SAVING_CHECKPOINT, step, numINodes); 1291 // save files under construction 1292 // TODO: for HDFS-5428, since we cannot break the compatibility of 1293 // fsimage, we store part of the under-construction files that are only 1294 // in snapshots in this "under-construction-file" section. As a 1295 // temporary solution, we use "/.reserved/.inodes/<inodeid>" as their 1296 // paths, so that when loading fsimage we do not put them into the lease 1297 // map. In the future, we can remove this hack when we can bump the 1298 // layout version. 1299 sourceNamesystem.saveFilesUnderConstruction(out, snapshotUCMap); 1300 1301 context.checkCancelled(); 1302 sourceNamesystem.saveSecretManagerStateCompat(out, sdPath); 1303 context.checkCancelled(); 1304 sourceNamesystem.getCacheManager().saveStateCompat(out, sdPath); 1305 context.checkCancelled(); 1306 out.flush(); 1307 context.checkCancelled(); 1308 fout.getChannel().force(true); 1309 } finally { 1310 out.close(); 1311 } 1312 1313 saved = true; 1314 // set md5 of the saved image 1315 savedDigest = new MD5Hash(digester.digest()); 1316 1317 LOG.info("Image file " + newFile + " of size " + newFile.length() 1318 + " bytes saved in " + (monotonicNow() - startTime) / 1000 1319 + " seconds."); 1320 } 1321 1322 /** 1323 * Save children INodes. 1324 * @param children The list of children INodes 1325 * @param out The DataOutputStream to write 1326 * @param inSnapshot Whether the parent directory or its ancestor is in 1327 * the deleted list of some snapshot (caused by rename or 1328 * deletion) 1329 * @param counter Counter to increment for namenode startup progress 1330 * @return Number of children that are directory 1331 */ 1332 private int saveChildren(ReadOnlyList<INode> children, 1333 DataOutputStream out, boolean inSnapshot, Counter counter) 1334 throws IOException { 1335 // Write normal children INode. 1336 out.writeInt(children.size()); 1337 int dirNum = 0; 1338 for(INode child : children) { 1339 // print all children first 1340 // TODO: for HDFS-5428, we cannot change the format/content of fsimage 1341 // here, thus even if the parent directory is in snapshot, we still 1342 // do not handle INodeUC as those stored in deleted list 1343 saveINode2Image(child, out, false, referenceMap, counter); 1344 if (child.isDirectory()) { 1345 dirNum++; 1346 } else if (inSnapshot && child.isFile() 1347 && child.asFile().isUnderConstruction()) { 1348 this.snapshotUCMap.put(child.getId(), child.asFile()); 1349 } 1350 if (checkCancelCounter++ % CHECK_CANCEL_INTERVAL == 0) { 1351 context.checkCancelled(); 1352 } 1353 } 1354 return dirNum; 1355 } 1356 1357 /** 1358 * Save file tree image starting from the given root. 1359 * This is a recursive procedure, which first saves all children and 1360 * snapshot diffs of a current directory and then moves inside the 1361 * sub-directories. 1362 * 1363 * @param current The current node 1364 * @param out The DataoutputStream to write the image 1365 * @param toSaveSubtree Whether or not to save the subtree to fsimage. For 1366 * reference node, its subtree may already have been 1367 * saved before. 1368 * @param inSnapshot Whether the current directory is in snapshot 1369 * @param counter Counter to increment for namenode startup progress 1370 */ 1371 private void saveImage(INodeDirectory current, DataOutputStream out, 1372 boolean toSaveSubtree, boolean inSnapshot, Counter counter) 1373 throws IOException { 1374 // write the inode id of the directory 1375 out.writeLong(current.getId()); 1376 1377 if (!toSaveSubtree) { 1378 return; 1379 } 1380 1381 final ReadOnlyList<INode> children = current 1382 .getChildrenList(Snapshot.CURRENT_STATE_ID); 1383 int dirNum = 0; 1384 List<INodeDirectory> snapshotDirs = null; 1385 DirectoryWithSnapshotFeature sf = current.getDirectoryWithSnapshotFeature(); 1386 if (sf != null) { 1387 snapshotDirs = new ArrayList<INodeDirectory>(); 1388 sf.getSnapshotDirectory(snapshotDirs); 1389 dirNum += snapshotDirs.size(); 1390 } 1391 1392 // 2. Write INodeDirectorySnapshottable#snapshotsByNames to record all 1393 // Snapshots 1394 if (current.isDirectory() && current.asDirectory().isSnapshottable()) { 1395 SnapshotFSImageFormat.saveSnapshots(current.asDirectory(), out); 1396 } else { 1397 out.writeInt(-1); // # of snapshots 1398 } 1399 1400 // 3. Write children INode 1401 dirNum += saveChildren(children, out, inSnapshot, counter); 1402 1403 // 4. Write DirectoryDiff lists, if there is any. 1404 SnapshotFSImageFormat.saveDirectoryDiffList(current, out, referenceMap); 1405 1406 // Write sub-tree of sub-directories, including possible snapshots of 1407 // deleted sub-directories 1408 out.writeInt(dirNum); // the number of sub-directories 1409 for(INode child : children) { 1410 if(!child.isDirectory()) { 1411 continue; 1412 } 1413 // make sure we only save the subtree under a reference node once 1414 boolean toSave = child.isReference() ? 1415 referenceMap.toProcessSubtree(child.getId()) : true; 1416 saveImage(child.asDirectory(), out, toSave, inSnapshot, counter); 1417 } 1418 if (snapshotDirs != null) { 1419 for (INodeDirectory subDir : snapshotDirs) { 1420 // make sure we only save the subtree under a reference node once 1421 boolean toSave = subDir.getParentReference() != null ? 1422 referenceMap.toProcessSubtree(subDir.getId()) : true; 1423 saveImage(subDir, out, toSave, true, counter); 1424 } 1425 } 1426 } 1427 1428 /** 1429 * Saves inode and increments progress counter. 1430 * 1431 * @param inode INode to save 1432 * @param out DataOutputStream to receive inode 1433 * @param writeUnderConstruction boolean true if this is under construction 1434 * @param referenceMap ReferenceMap containing reference inodes 1435 * @param counter Counter to increment for namenode startup progress 1436 * @throws IOException thrown if there is an I/O error 1437 */ 1438 private void saveINode2Image(INode inode, DataOutputStream out, 1439 boolean writeUnderConstruction, ReferenceMap referenceMap, 1440 Counter counter) throws IOException { 1441 FSImageSerialization.saveINode2Image(inode, out, writeUnderConstruction, 1442 referenceMap); 1443 // Intentionally do not increment counter for reference inodes, because it 1444 // is too difficult at this point to assess whether or not this is a 1445 // reference that counts toward quota. 1446 if (!(inode instanceof INodeReference)) { 1447 counter.increment(); 1448 } 1449 } 1450 } 1451}