001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import java.io.Closeable; 021import java.io.File; 022import java.io.IOException; 023import java.io.RandomAccessFile; 024import java.net.URI; 025import java.net.UnknownHostException; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.EnumSet; 029import java.util.HashMap; 030import java.util.Iterator; 031import java.util.List; 032import java.util.Properties; 033import java.util.UUID; 034import java.util.concurrent.CopyOnWriteArrayList; 035 036import org.apache.hadoop.classification.InterfaceAudience; 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.fs.FileUtil; 039import org.apache.hadoop.hdfs.DFSUtil; 040import org.apache.hadoop.hdfs.protocol.HdfsConstants; 041import org.apache.hadoop.hdfs.protocol.LayoutVersion; 042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; 043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; 044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 045import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; 046import org.apache.hadoop.hdfs.server.common.IncorrectVersionException; 047import org.apache.hadoop.hdfs.server.common.Storage; 048import org.apache.hadoop.hdfs.server.common.StorageErrorReporter; 049import org.apache.hadoop.hdfs.server.common.Util; 050import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 051import org.apache.hadoop.hdfs.util.PersistentLongFile; 052import org.apache.hadoop.io.IOUtils; 053import org.apache.hadoop.net.DNS; 054import org.apache.hadoop.util.Time; 055 056import com.google.common.annotations.VisibleForTesting; 057import com.google.common.base.Preconditions; 058import com.google.common.collect.Lists; 059 060/** 061 * NNStorage is responsible for management of the StorageDirectories used by 062 * the NameNode. 063 */ 064@InterfaceAudience.Private 065public class NNStorage extends Storage implements Closeable, 066 StorageErrorReporter { 067 static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest"; 068 static final String LOCAL_URI_SCHEME = "file"; 069 070 // 071 // The filenames used for storing the images 072 // 073 public enum NameNodeFile { 074 IMAGE ("fsimage"), 075 TIME ("fstime"), // from "old" pre-HDFS-1073 format 076 SEEN_TXID ("seen_txid"), 077 EDITS ("edits"), 078 IMAGE_NEW ("fsimage.ckpt"), 079 IMAGE_ROLLBACK("fsimage_rollback"), 080 EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format 081 EDITS_INPROGRESS ("edits_inprogress"), 082 EDITS_TMP ("edits_tmp"), 083 IMAGE_LEGACY_OIV ("fsimage_legacy_oiv"); // For pre-PB format 084 085 private String fileName = null; 086 private NameNodeFile(String name) { this.fileName = name; } 087 @VisibleForTesting 088 public String getName() { return fileName; } 089 } 090 091 /** 092 * Implementation of StorageDirType specific to namenode storage 093 * A Storage directory could be of type IMAGE which stores only fsimage, 094 * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which 095 * stores both fsimage and edits. 096 */ 097 @VisibleForTesting 098 public static enum NameNodeDirType implements StorageDirType { 099 UNDEFINED, 100 IMAGE, 101 EDITS, 102 IMAGE_AND_EDITS; 103 104 @Override 105 public StorageDirType getStorageDirType() { 106 return this; 107 } 108 109 @Override 110 public boolean isOfType(StorageDirType type) { 111 if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS)) 112 return true; 113 return this == type; 114 } 115 } 116 117 protected String blockpoolID = ""; // id of the block pool 118 119 /** 120 * flag that controls if we try to restore failed storages 121 */ 122 private boolean restoreFailedStorage = false; 123 private final Object restorationLock = new Object(); 124 private boolean disablePreUpgradableLayoutCheck = false; 125 126 127 /** 128 * TxId of the last transaction that was included in the most 129 * recent fsimage file. This does not include any transactions 130 * that have since been written to the edit log. 131 */ 132 protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID; 133 134 /** 135 * Time of the last checkpoint, in milliseconds since the epoch. 136 */ 137 private long mostRecentCheckpointTime = 0; 138 139 /** 140 * list of failed (and thus removed) storages 141 */ 142 final protected List<StorageDirectory> removedStorageDirs 143 = new CopyOnWriteArrayList<StorageDirectory>(); 144 145 /** 146 * Properties from old layout versions that may be needed 147 * during upgrade only. 148 */ 149 private HashMap<String, String> deprecatedProperties; 150 151 /** 152 * Construct the NNStorage. 153 * @param conf Namenode configuration. 154 * @param imageDirs Directories the image can be stored in. 155 * @param editsDirs Directories the editlog can be stored in. 156 * @throws IOException if any directories are inaccessible. 157 */ 158 public NNStorage(Configuration conf, 159 Collection<URI> imageDirs, Collection<URI> editsDirs) 160 throws IOException { 161 super(NodeType.NAME_NODE); 162 163 storageDirs = new CopyOnWriteArrayList<StorageDirectory>(); 164 165 // this may modify the editsDirs, so copy before passing in 166 setStorageDirectories(imageDirs, 167 Lists.newArrayList(editsDirs), 168 FSNamesystem.getSharedEditsDirs(conf)); 169 } 170 171 @Override // Storage 172 public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException { 173 if (disablePreUpgradableLayoutCheck) { 174 return false; 175 } 176 177 File oldImageDir = new File(sd.getRoot(), "image"); 178 if (!oldImageDir.exists()) { 179 return false; 180 } 181 // check the layout version inside the image file 182 File oldF = new File(oldImageDir, "fsimage"); 183 RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws"); 184 try { 185 oldFile.seek(0); 186 int oldVersion = oldFile.readInt(); 187 oldFile.close(); 188 oldFile = null; 189 if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION) 190 return false; 191 } finally { 192 IOUtils.cleanup(LOG, oldFile); 193 } 194 return true; 195 } 196 197 @Override // Closeable 198 public void close() throws IOException { 199 unlockAll(); 200 storageDirs.clear(); 201 } 202 203 /** 204 * Set flag whether an attempt should be made to restore failed storage 205 * directories at the next available oppurtuinity. 206 * 207 * @param val Whether restoration attempt should be made. 208 */ 209 void setRestoreFailedStorage(boolean val) { 210 LOG.warn("set restore failed storage to " + val); 211 restoreFailedStorage=val; 212 } 213 214 /** 215 * @return Whether failed storage directories are to be restored. 216 */ 217 boolean getRestoreFailedStorage() { 218 return restoreFailedStorage; 219 } 220 221 /** 222 * See if any of removed storages is "writable" again, and can be returned 223 * into service. 224 */ 225 void attemptRestoreRemovedStorage() { 226 // if directory is "alive" - copy the images there... 227 if(!restoreFailedStorage || removedStorageDirs.size() == 0) 228 return; //nothing to restore 229 230 /* We don't want more than one thread trying to restore at a time */ 231 synchronized (this.restorationLock) { 232 LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+ 233 "storarge. removedStorages size = " + removedStorageDirs.size()); 234 for(Iterator<StorageDirectory> it 235 = this.removedStorageDirs.iterator(); it.hasNext();) { 236 StorageDirectory sd = it.next(); 237 File root = sd.getRoot(); 238 LOG.info("currently disabled dir " + root.getAbsolutePath() + 239 "; type="+sd.getStorageDirType() 240 + ";canwrite="+FileUtil.canWrite(root)); 241 if(root.exists() && FileUtil.canWrite(root)) { 242 LOG.info("restoring dir " + sd.getRoot().getAbsolutePath()); 243 this.addStorageDir(sd); // restore 244 this.removedStorageDirs.remove(sd); 245 } 246 } 247 } 248 } 249 250 /** 251 * @return A list of storage directories which are in the errored state. 252 */ 253 List<StorageDirectory> getRemovedStorageDirs() { 254 return this.removedStorageDirs; 255 } 256 257 /** 258 * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)} 259 */ 260 @VisibleForTesting 261 synchronized void setStorageDirectories(Collection<URI> fsNameDirs, 262 Collection<URI> fsEditsDirs) 263 throws IOException { 264 setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>()); 265 } 266 267 /** 268 * Set the storage directories which will be used. This should only ever be 269 * called from inside NNStorage. However, it needs to remain package private 270 * for testing, as StorageDirectories need to be reinitialised after using 271 * Mockito.spy() on this class, as Mockito doesn't work well with inner 272 * classes, such as StorageDirectory in this case. 273 * 274 * Synchronized due to initialization of storageDirs and removedStorageDirs. 275 * 276 * @param fsNameDirs Locations to store images. 277 * @param fsEditsDirs Locations to store edit logs. 278 * @throws IOException 279 */ 280 @VisibleForTesting 281 synchronized void setStorageDirectories(Collection<URI> fsNameDirs, 282 Collection<URI> fsEditsDirs, 283 Collection<URI> sharedEditsDirs) 284 throws IOException { 285 this.storageDirs.clear(); 286 this.removedStorageDirs.clear(); 287 288 // Add all name dirs with appropriate NameNodeDirType 289 for (URI dirName : fsNameDirs) { 290 checkSchemeConsistency(dirName); 291 boolean isAlsoEdits = false; 292 for (URI editsDirName : fsEditsDirs) { 293 if (editsDirName.compareTo(dirName) == 0) { 294 isAlsoEdits = true; 295 fsEditsDirs.remove(editsDirName); 296 break; 297 } 298 } 299 NameNodeDirType dirType = (isAlsoEdits) ? 300 NameNodeDirType.IMAGE_AND_EDITS : 301 NameNodeDirType.IMAGE; 302 // Add to the list of storage directories, only if the 303 // URI is of type file:// 304 if(dirName.getScheme().compareTo("file") == 0) { 305 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()), 306 dirType, 307 sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared. 308 } 309 } 310 311 // Add edits dirs if they are different from name dirs 312 for (URI dirName : fsEditsDirs) { 313 checkSchemeConsistency(dirName); 314 // Add to the list of storage directories, only if the 315 // URI is of type file:// 316 if(dirName.getScheme().compareTo("file") == 0) 317 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()), 318 NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName))); 319 } 320 } 321 322 /** 323 * Return the storage directory corresponding to the passed URI 324 * @param uri URI of a storage directory 325 * @return The matching storage directory or null if none found 326 */ 327 StorageDirectory getStorageDirectory(URI uri) { 328 try { 329 uri = Util.fileAsURI(new File(uri)); 330 Iterator<StorageDirectory> it = dirIterator(); 331 for (; it.hasNext(); ) { 332 StorageDirectory sd = it.next(); 333 if (Util.fileAsURI(sd.getRoot()).equals(uri)) { 334 return sd; 335 } 336 } 337 } catch (IOException ioe) { 338 LOG.warn("Error converting file to URI", ioe); 339 } 340 return null; 341 } 342 343 /** 344 * Checks the consistency of a URI, in particular if the scheme 345 * is specified 346 * @param u URI whose consistency is being checked. 347 */ 348 private static void checkSchemeConsistency(URI u) throws IOException { 349 String scheme = u.getScheme(); 350 // the URI should have a proper scheme 351 if(scheme == null) { 352 throw new IOException("Undefined scheme for " + u); 353 } 354 } 355 356 /** 357 * Retrieve current directories of type IMAGE 358 * @return Collection of URI representing image directories 359 * @throws IOException in case of URI processing error 360 */ 361 Collection<URI> getImageDirectories() throws IOException { 362 return getDirectories(NameNodeDirType.IMAGE); 363 } 364 365 /** 366 * Retrieve current directories of type EDITS 367 * @return Collection of URI representing edits directories 368 * @throws IOException in case of URI processing error 369 */ 370 Collection<URI> getEditsDirectories() throws IOException { 371 return getDirectories(NameNodeDirType.EDITS); 372 } 373 374 /** 375 * Return number of storage directories of the given type. 376 * @param dirType directory type 377 * @return number of storage directories of type dirType 378 */ 379 int getNumStorageDirs(NameNodeDirType dirType) { 380 if(dirType == null) 381 return getNumStorageDirs(); 382 Iterator<StorageDirectory> it = dirIterator(dirType); 383 int numDirs = 0; 384 for(; it.hasNext(); it.next()) 385 numDirs++; 386 return numDirs; 387 } 388 389 /** 390 * Return the list of locations being used for a specific purpose. 391 * i.e. Image or edit log storage. 392 * 393 * @param dirType Purpose of locations requested. 394 * @throws IOException 395 */ 396 Collection<URI> getDirectories(NameNodeDirType dirType) 397 throws IOException { 398 ArrayList<URI> list = new ArrayList<URI>(); 399 Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() : 400 dirIterator(dirType); 401 for ( ;it.hasNext(); ) { 402 StorageDirectory sd = it.next(); 403 try { 404 list.add(Util.fileAsURI(sd.getRoot())); 405 } catch (IOException e) { 406 throw new IOException("Exception while processing " + 407 "StorageDirectory " + sd.getRoot(), e); 408 } 409 } 410 return list; 411 } 412 413 /** 414 * Determine the last transaction ID noted in this storage directory. 415 * This txid is stored in a special seen_txid file since it might not 416 * correspond to the latest image or edit log. For example, an image-only 417 * directory will have this txid incremented when edits logs roll, even 418 * though the edits logs are in a different directory. 419 * 420 * @param sd StorageDirectory to check 421 * @return If file exists and can be read, last recorded txid. If not, 0L. 422 * @throws IOException On errors processing file pointed to by sd 423 */ 424 static long readTransactionIdFile(StorageDirectory sd) throws IOException { 425 File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID); 426 return PersistentLongFile.readFile(txidFile, 0); 427 } 428 429 /** 430 * Write last checkpoint time into a separate file. 431 * @param sd storage directory 432 * @throws IOException 433 */ 434 void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException { 435 Preconditions.checkArgument(txid >= 0, "bad txid: " + txid); 436 437 File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID); 438 PersistentLongFile.writeFile(txIdFile, txid); 439 } 440 441 /** 442 * Set the transaction ID and time of the last checkpoint 443 * 444 * @param txid transaction id of the last checkpoint 445 * @param time time of the last checkpoint, in millis since the epoch 446 */ 447 void setMostRecentCheckpointInfo(long txid, long time) { 448 this.mostRecentCheckpointTxId = txid; 449 this.mostRecentCheckpointTime = time; 450 } 451 452 /** 453 * @return the transaction ID of the last checkpoint. 454 */ 455 public long getMostRecentCheckpointTxId() { 456 return mostRecentCheckpointTxId; 457 } 458 459 /** 460 * @return the time of the most recent checkpoint in millis since the epoch. 461 */ 462 long getMostRecentCheckpointTime() { 463 return mostRecentCheckpointTime; 464 } 465 466 /** 467 * Write a small file in all available storage directories that 468 * indicates that the namespace has reached some given transaction ID. 469 * 470 * This is used when the image is loaded to avoid accidental rollbacks 471 * in the case where an edit log is fully deleted but there is no 472 * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure() 473 * @param txid the txid that has been reached 474 */ 475 public void writeTransactionIdFileToStorage(long txid) { 476 // Write txid marker in all storage directories 477 for (StorageDirectory sd : storageDirs) { 478 try { 479 writeTransactionIdFile(sd, txid); 480 } catch(IOException e) { 481 // Close any edits stream associated with this dir and remove directory 482 LOG.warn("writeTransactionIdToStorage failed on " + sd, 483 e); 484 reportErrorsOnDirectory(sd); 485 } 486 } 487 } 488 489 /** 490 * Return the name of the image file that is uploaded by periodic 491 * checkpointing 492 * 493 * @return List of filenames to save checkpoints to. 494 */ 495 public File[] getFsImageNameCheckpoint(long txid) { 496 ArrayList<File> list = new ArrayList<File>(); 497 for (Iterator<StorageDirectory> it = 498 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) { 499 list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid)); 500 } 501 return list.toArray(new File[list.size()]); 502 } 503 504 /** 505 * @return The first image file with the given txid and image type. 506 */ 507 public File getFsImageName(long txid, NameNodeFile nnf) { 508 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); 509 it.hasNext();) { 510 StorageDirectory sd = it.next(); 511 File fsImage = getStorageFile(sd, nnf, txid); 512 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) { 513 return fsImage; 514 } 515 } 516 return null; 517 } 518 519 /** 520 * @return The first image file whose txid is the same with the given txid and 521 * image type is one of the given types. 522 */ 523 public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) { 524 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); 525 it.hasNext();) { 526 StorageDirectory sd = it.next(); 527 for (NameNodeFile nnf : nnfs) { 528 File fsImage = getStorageFile(sd, nnf, txid); 529 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) { 530 return fsImage; 531 } 532 } 533 } 534 return null; 535 } 536 537 public File getFsImageName(long txid) { 538 return getFsImageName(txid, NameNodeFile.IMAGE); 539 } 540 541 public File getHighestFsImageName() { 542 return getFsImageName(getMostRecentCheckpointTxId()); 543 } 544 545 /** Create new dfs name directory. Caution: this destroys all files 546 * in this filesystem. */ 547 private void format(StorageDirectory sd) throws IOException { 548 sd.clearDirectory(); // create currrent dir 549 writeProperties(sd); 550 writeTransactionIdFile(sd, 0); 551 552 LOG.info("Storage directory " + sd.getRoot() 553 + " has been successfully formatted."); 554 } 555 556 /** 557 * Format all available storage directories. 558 */ 559 public void format(NamespaceInfo nsInfo) throws IOException { 560 Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 || 561 nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION, 562 "Bad layout version: %s", nsInfo.getLayoutVersion()); 563 564 this.setStorageInfo(nsInfo); 565 this.blockpoolID = nsInfo.getBlockPoolID(); 566 for (Iterator<StorageDirectory> it = 567 dirIterator(); it.hasNext();) { 568 StorageDirectory sd = it.next(); 569 format(sd); 570 } 571 } 572 573 public static NamespaceInfo newNamespaceInfo() 574 throws UnknownHostException { 575 return new NamespaceInfo(newNamespaceID(), newClusterID(), 576 newBlockPoolID(), 0L); 577 } 578 579 public void format() throws IOException { 580 this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION; 581 for (Iterator<StorageDirectory> it = 582 dirIterator(); it.hasNext();) { 583 StorageDirectory sd = it.next(); 584 format(sd); 585 } 586 } 587 588 /** 589 * Generate new namespaceID. 590 * 591 * namespaceID is a persistent attribute of the namespace. 592 * It is generated when the namenode is formatted and remains the same 593 * during the life cycle of the namenode. 594 * When a datanodes register they receive it as the registrationID, 595 * which is checked every time the datanode is communicating with the 596 * namenode. Datanodes that do not 'know' the namespaceID are rejected. 597 * 598 * @return new namespaceID 599 */ 600 private static int newNamespaceID() { 601 int newID = 0; 602 while(newID == 0) 603 newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF); // use 31 bits only 604 return newID; 605 } 606 607 @Override // Storage 608 protected void setFieldsFromProperties( 609 Properties props, StorageDirectory sd) throws IOException { 610 super.setFieldsFromProperties(props, sd); 611 if (layoutVersion == 0) { 612 throw new IOException("NameNode directory " 613 + sd.getRoot() + " is not formatted."); 614 } 615 616 // Set Block pool ID in version with federation support 617 if (NameNodeLayoutVersion.supports( 618 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) { 619 String sbpid = props.getProperty("blockpoolID"); 620 setBlockPoolID(sd.getRoot(), sbpid); 621 } 622 setDeprecatedPropertiesForUpgrade(props); 623 } 624 625 void readProperties(StorageDirectory sd, StartupOption startupOption) 626 throws IOException { 627 Properties props = readPropertiesFile(sd.getVersionFile()); 628 if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches 629 (startupOption)) { 630 int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion")); 631 if (lv > getServiceLayoutVersion()) { 632 // we should not use a newer version for rollingUpgrade rollback 633 throw new IncorrectVersionException(getServiceLayoutVersion(), lv, 634 "storage directory " + sd.getRoot().getAbsolutePath()); 635 } 636 props.setProperty("layoutVersion", 637 Integer.toString(HdfsConstants.NAMENODE_LAYOUT_VERSION)); 638 } 639 setFieldsFromProperties(props, sd); 640 } 641 642 /** 643 * Pull any properties out of the VERSION file that are from older 644 * versions of HDFS and only necessary during upgrade. 645 */ 646 private void setDeprecatedPropertiesForUpgrade(Properties props) { 647 deprecatedProperties = new HashMap<String, String>(); 648 String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY); 649 if (md5 != null) { 650 deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5); 651 } 652 } 653 654 /** 655 * Return a property that was stored in an earlier version of HDFS. 656 * 657 * This should only be used during upgrades. 658 */ 659 String getDeprecatedProperty(String prop) { 660 assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION : 661 "getDeprecatedProperty should only be done when loading " + 662 "storage from past versions during upgrade."; 663 return deprecatedProperties.get(prop); 664 } 665 666 /** 667 * Write version file into the storage directory. 668 * 669 * The version file should always be written last. 670 * Missing or corrupted version file indicates that 671 * the checkpoint is not valid. 672 * 673 * @param sd storage directory 674 * @throws IOException 675 */ 676 @Override // Storage 677 protected void setPropertiesFromFields(Properties props, 678 StorageDirectory sd 679 ) throws IOException { 680 super.setPropertiesFromFields(props, sd); 681 // Set blockpoolID in version with federation support 682 if (NameNodeLayoutVersion.supports( 683 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) { 684 props.setProperty("blockpoolID", blockpoolID); 685 } 686 } 687 688 static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) { 689 return new File(sd.getCurrentDir(), 690 String.format("%s_%019d", type.getName(), imageTxId)); 691 } 692 693 /** 694 * Get a storage file for one of the files that doesn't need a txid associated 695 * (e.g version, seen_txid) 696 */ 697 static File getStorageFile(StorageDirectory sd, NameNodeFile type) { 698 return new File(sd.getCurrentDir(), type.getName()); 699 } 700 701 @VisibleForTesting 702 public static String getCheckpointImageFileName(long txid) { 703 return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid); 704 } 705 706 @VisibleForTesting 707 public static String getImageFileName(long txid) { 708 return getNameNodeFileName(NameNodeFile.IMAGE, txid); 709 } 710 711 @VisibleForTesting 712 public static String getRollbackImageFileName(long txid) { 713 return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid); 714 } 715 716 public static String getLegacyOIVImageFileName(long txid) { 717 return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid); 718 } 719 720 private static String getNameNodeFileName(NameNodeFile nnf, long txid) { 721 return String.format("%s_%019d", nnf.getName(), txid); 722 } 723 724 @VisibleForTesting 725 public static String getInProgressEditsFileName(long startTxId) { 726 return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId); 727 } 728 729 static File getInProgressEditsFile(StorageDirectory sd, long startTxId) { 730 return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId)); 731 } 732 733 static File getFinalizedEditsFile(StorageDirectory sd, 734 long startTxId, long endTxId) { 735 return new File(sd.getCurrentDir(), 736 getFinalizedEditsFileName(startTxId, endTxId)); 737 } 738 739 static File getTemporaryEditsFile(StorageDirectory sd, 740 long startTxId, long endTxId, long timestamp) { 741 return new File(sd.getCurrentDir(), 742 getTemporaryEditsFileName(startTxId, endTxId, timestamp)); 743 } 744 745 static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) { 746 return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid)); 747 } 748 749 @VisibleForTesting 750 public static String getFinalizedEditsFileName(long startTxId, long endTxId) { 751 return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(), 752 startTxId, endTxId); 753 } 754 755 public static String getTemporaryEditsFileName(long startTxId, long endTxId, 756 long timestamp) { 757 return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(), 758 startTxId, endTxId, timestamp); 759 } 760 761 /** 762 * Return the first readable finalized edits file for the given txid. 763 */ 764 File findFinalizedEditsFile(long startTxId, long endTxId) 765 throws IOException { 766 File ret = findFile(NameNodeDirType.EDITS, 767 getFinalizedEditsFileName(startTxId, endTxId)); 768 if (ret == null) { 769 throw new IOException( 770 "No edits file for txid " + startTxId + "-" + endTxId + " exists!"); 771 } 772 return ret; 773 } 774 775 /** 776 * Return the first readable image file for the given txid and image type, or 777 * null if no such image can be found 778 */ 779 File findImageFile(NameNodeFile nnf, long txid) { 780 return findFile(NameNodeDirType.IMAGE, 781 getNameNodeFileName(nnf, txid)); 782 } 783 784 /** 785 * Return the first readable storage file of the given name 786 * across any of the 'current' directories in SDs of the 787 * given type, or null if no such file exists. 788 */ 789 private File findFile(NameNodeDirType dirType, String name) { 790 for (StorageDirectory sd : dirIterable(dirType)) { 791 File candidate = new File(sd.getCurrentDir(), name); 792 if (FileUtil.canRead(sd.getCurrentDir()) && 793 candidate.exists()) { 794 return candidate; 795 } 796 } 797 return null; 798 } 799 800 /** 801 * Disable the check for pre-upgradable layouts. Needed for BackupImage. 802 * @param val Whether to disable the preupgradeable layout check. 803 */ 804 void setDisablePreUpgradableLayoutCheck(boolean val) { 805 disablePreUpgradableLayoutCheck = val; 806 } 807 808 /** 809 * Marks a list of directories as having experienced an error. 810 * 811 * @param sds A list of storage directories to mark as errored. 812 */ 813 void reportErrorsOnDirectories(List<StorageDirectory> sds) { 814 for (StorageDirectory sd : sds) { 815 reportErrorsOnDirectory(sd); 816 } 817 } 818 819 /** 820 * Reports that a directory has experienced an error. 821 * Notifies listeners that the directory is no longer 822 * available. 823 * 824 * @param sd A storage directory to mark as errored. 825 */ 826 private void reportErrorsOnDirectory(StorageDirectory sd) { 827 LOG.error("Error reported on storage directory " + sd); 828 829 String lsd = listStorageDirectories(); 830 LOG.debug("current list of storage dirs:" + lsd); 831 832 LOG.warn("About to remove corresponding storage: " 833 + sd.getRoot().getAbsolutePath()); 834 try { 835 sd.unlock(); 836 } catch (Exception e) { 837 LOG.warn("Unable to unlock bad storage directory: " 838 + sd.getRoot().getPath(), e); 839 } 840 841 if (this.storageDirs.remove(sd)) { 842 this.removedStorageDirs.add(sd); 843 } 844 845 lsd = listStorageDirectories(); 846 LOG.debug("at the end current list of storage dirs:" + lsd); 847 } 848 849 /** 850 * Processes the startup options for the clusterid and blockpoolid 851 * for the upgrade. 852 * @param startOpt Startup options 853 * @param layoutVersion Layout version for the upgrade 854 * @throws IOException 855 */ 856 void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion) 857 throws IOException { 858 if (startOpt == StartupOption.UPGRADE || startOpt == StartupOption.UPGRADEONLY) { 859 // If upgrade from a release that does not support federation, 860 // if clusterId is provided in the startupOptions use it. 861 // Else generate a new cluster ID 862 if (!NameNodeLayoutVersion.supports( 863 LayoutVersion.Feature.FEDERATION, layoutVersion)) { 864 if (startOpt.getClusterId() == null) { 865 startOpt.setClusterId(newClusterID()); 866 } 867 setClusterID(startOpt.getClusterId()); 868 setBlockPoolID(newBlockPoolID()); 869 } else { 870 // Upgrade from one version of federation to another supported 871 // version of federation doesn't require clusterID. 872 // Warn the user if the current clusterid didn't match with the input 873 // clusterid. 874 if (startOpt.getClusterId() != null 875 && !startOpt.getClusterId().equals(getClusterID())) { 876 LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID() 877 + ", Ignoring given clusterid: " + startOpt.getClusterId()); 878 } 879 } 880 LOG.info("Using clusterid: " + getClusterID()); 881 } 882 } 883 884 /** 885 * Report that an IOE has occurred on some file which may 886 * or may not be within one of the NN image storage directories. 887 */ 888 @Override 889 public void reportErrorOnFile(File f) { 890 // We use getAbsolutePath here instead of getCanonicalPath since we know 891 // that there is some IO problem on that drive. 892 // getCanonicalPath may need to call stat() or readlink() and it's likely 893 // those calls would fail due to the same underlying IO problem. 894 String absPath = f.getAbsolutePath(); 895 for (StorageDirectory sd : storageDirs) { 896 String dirPath = sd.getRoot().getAbsolutePath(); 897 if (!dirPath.endsWith(File.separator)) { 898 dirPath += File.separator; 899 } 900 if (absPath.startsWith(dirPath)) { 901 reportErrorsOnDirectory(sd); 902 return; 903 } 904 } 905 906 } 907 908 /** 909 * Generate new clusterID. 910 * 911 * clusterID is a persistent attribute of the cluster. 912 * It is generated when the cluster is created and remains the same 913 * during the life cycle of the cluster. When a new name node is formated, if 914 * this is a new cluster, a new clusterID is geneated and stored. Subsequent 915 * name node must be given the same ClusterID during its format to be in the 916 * same cluster. 917 * When a datanode register it receive the clusterID and stick with it. 918 * If at any point, name node or data node tries to join another cluster, it 919 * will be rejected. 920 * 921 * @return new clusterID 922 */ 923 public static String newClusterID() { 924 return "CID-" + UUID.randomUUID().toString(); 925 } 926 927 void setClusterID(String cid) { 928 clusterID = cid; 929 } 930 931 /** 932 * try to find current cluster id in the VERSION files 933 * returns first cluster id found in any VERSION file 934 * null in case none found 935 * @return clusterId or null in case no cluster id found 936 */ 937 public String determineClusterId() { 938 String cid = null; 939 Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE); 940 while(sdit.hasNext()) { 941 StorageDirectory sd = sdit.next(); 942 try { 943 Properties props = readPropertiesFile(sd.getVersionFile()); 944 cid = props.getProperty("clusterID"); 945 LOG.info("current cluster id for sd="+sd.getCurrentDir() + 946 ";lv=" + layoutVersion + ";cid=" + cid); 947 948 if(cid != null && !cid.equals("")) 949 return cid; 950 } catch (Exception e) { 951 LOG.warn("this sd not available: " + e.getLocalizedMessage()); 952 } //ignore 953 } 954 LOG.warn("couldn't find any VERSION file containing valid ClusterId"); 955 return null; 956 } 957 958 /** 959 * Generate new blockpoolID. 960 * 961 * @return new blockpoolID 962 */ 963 static String newBlockPoolID() throws UnknownHostException{ 964 String ip = "unknownIP"; 965 try { 966 ip = DNS.getDefaultIP("default"); 967 } catch (UnknownHostException e) { 968 LOG.warn("Could not find ip address of \"default\" inteface."); 969 throw e; 970 } 971 972 int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE); 973 String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now(); 974 return bpid; 975 } 976 977 /** Validate and set block pool ID */ 978 public void setBlockPoolID(String bpid) { 979 blockpoolID = bpid; 980 } 981 982 /** Validate and set block pool ID */ 983 private void setBlockPoolID(File storage, String bpid) 984 throws InconsistentFSStateException { 985 if (bpid == null || bpid.equals("")) { 986 throw new InconsistentFSStateException(storage, "file " 987 + Storage.STORAGE_FILE_VERSION + " has no block pool Id."); 988 } 989 990 if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) { 991 throw new InconsistentFSStateException(storage, 992 "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID); 993 } 994 setBlockPoolID(bpid); 995 } 996 997 public String getBlockPoolID() { 998 return blockpoolID; 999 } 1000 1001 /** 1002 * Iterate over all current storage directories, inspecting them 1003 * with the given inspector. 1004 */ 1005 void inspectStorageDirs(FSImageStorageInspector inspector) 1006 throws IOException { 1007 1008 // Process each of the storage directories to find the pair of 1009 // newest image file and edit file 1010 for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) { 1011 StorageDirectory sd = it.next(); 1012 inspector.inspectDirectory(sd); 1013 } 1014 } 1015 1016 /** 1017 * Iterate over all of the storage dirs, reading their contents to determine 1018 * their layout versions. Returns an FSImageStorageInspector which has 1019 * inspected each directory. 1020 * 1021 * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc). 1022 * @throws IOException if no valid storage dirs are found or no valid layout version 1023 */ 1024 FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes, 1025 StartupOption startupOption) throws IOException { 1026 Integer layoutVersion = null; 1027 boolean multipleLV = false; 1028 StringBuilder layoutVersions = new StringBuilder(); 1029 1030 // First determine what range of layout versions we're going to inspect 1031 for (Iterator<StorageDirectory> it = dirIterator(false); 1032 it.hasNext();) { 1033 StorageDirectory sd = it.next(); 1034 if (!sd.getVersionFile().exists()) { 1035 FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping..."); 1036 continue; 1037 } 1038 readProperties(sd, startupOption); // sets layoutVersion 1039 int lv = getLayoutVersion(); 1040 if (layoutVersion == null) { 1041 layoutVersion = Integer.valueOf(lv); 1042 } else if (!layoutVersion.equals(lv)) { 1043 multipleLV = true; 1044 } 1045 layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") "); 1046 } 1047 1048 if (layoutVersion == null) { 1049 throw new IOException("No storage directories contained VERSION information"); 1050 } 1051 if (multipleLV) { 1052 throw new IOException( 1053 "Storage directories contain multiple layout versions: " 1054 + layoutVersions); 1055 } 1056 // If the storage directories are with the new layout version 1057 // (ie edits_<txnid>) then use the new inspector, which will ignore 1058 // the old format dirs. 1059 FSImageStorageInspector inspector; 1060 if (NameNodeLayoutVersion.supports( 1061 LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) { 1062 inspector = new FSImageTransactionalStorageInspector(fileTypes); 1063 } else { 1064 inspector = new FSImagePreTransactionalStorageInspector(); 1065 } 1066 1067 inspectStorageDirs(inspector); 1068 return inspector; 1069 } 1070 1071 public NamespaceInfo getNamespaceInfo() { 1072 return new NamespaceInfo( 1073 getNamespaceID(), 1074 getClusterID(), 1075 getBlockPoolID(), 1076 getCTime()); 1077 } 1078}