001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.common; 019 020import java.io.File; 021import java.io.FileOutputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.RandomAccessFile; 025import java.lang.management.ManagementFactory; 026import java.nio.channels.FileLock; 027import java.nio.channels.OverlappingFileLockException; 028import java.util.ArrayList; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Properties; 032 033import org.apache.commons.logging.Log; 034import org.apache.commons.logging.LogFactory; 035import org.apache.hadoop.classification.InterfaceAudience; 036import org.apache.hadoop.fs.FileUtil; 037import org.apache.hadoop.fs.Path; 038import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; 039import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 040import org.apache.hadoop.io.nativeio.NativeIO; 041import org.apache.hadoop.io.nativeio.NativeIOException; 042import org.apache.hadoop.util.ToolRunner; 043import org.apache.hadoop.util.VersionInfo; 044 045import com.google.common.base.Charsets; 046import com.google.common.base.Preconditions; 047 048 049 050/** 051 * Storage information file. 052 * <p> 053 * Local storage information is stored in a separate file VERSION. 054 * It contains type of the node, 055 * the storage layout version, the namespace id, and 056 * the fs state creation time. 057 * <p> 058 * Local storage can reside in multiple directories. 059 * Each directory should contain the same VERSION file as the others. 060 * During startup Hadoop servers (name-node and data-nodes) read their local 061 * storage information from them. 062 * <p> 063 * The servers hold a lock for each storage directory while they run so that 064 * other nodes were not able to startup sharing the same storage. 065 * The locks are released when the servers stop (normally or abnormally). 066 * 067 */ 068@InterfaceAudience.Private 069public abstract class Storage extends StorageInfo { 070 public static final Log LOG = LogFactory.getLog(Storage.class.getName()); 071 072 // last layout version that did not support upgrades 073 public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3; 074 075 // this corresponds to Hadoop-0.18 076 public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16; 077 protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18"; 078 079 /** Layout versions of 0.20.203 release */ 080 public static final int[] LAYOUT_VERSIONS_203 = {-19, -31}; 081 082 public static final String STORAGE_FILE_LOCK = "in_use.lock"; 083 public static final String STORAGE_DIR_CURRENT = "current"; 084 public static final String STORAGE_DIR_PREVIOUS = "previous"; 085 public static final String STORAGE_TMP_REMOVED = "removed.tmp"; 086 public static final String STORAGE_TMP_PREVIOUS = "previous.tmp"; 087 public static final String STORAGE_TMP_FINALIZED = "finalized.tmp"; 088 public static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp"; 089 public static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint"; 090 091 /** 092 * The blocksBeingWritten directory which was used in some 1.x and earlier 093 * releases. 094 */ 095 public static final String STORAGE_1_BBW = "blocksBeingWritten"; 096 097 public enum StorageState { 098 NON_EXISTENT, 099 NOT_FORMATTED, 100 COMPLETE_UPGRADE, 101 RECOVER_UPGRADE, 102 COMPLETE_FINALIZE, 103 COMPLETE_ROLLBACK, 104 RECOVER_ROLLBACK, 105 COMPLETE_CHECKPOINT, 106 RECOVER_CHECKPOINT, 107 NORMAL; 108 } 109 110 /** 111 * An interface to denote storage directory type 112 * Implementations can define a type for storage directory by implementing 113 * this interface. 114 */ 115 @InterfaceAudience.Private 116 public interface StorageDirType { 117 public StorageDirType getStorageDirType(); 118 public boolean isOfType(StorageDirType type); 119 } 120 121 protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>(); 122 123 private class DirIterator implements Iterator<StorageDirectory> { 124 final StorageDirType dirType; 125 final boolean includeShared; 126 int prevIndex; // for remove() 127 int nextIndex; // for next() 128 129 DirIterator(StorageDirType dirType, boolean includeShared) { 130 this.dirType = dirType; 131 this.nextIndex = 0; 132 this.prevIndex = 0; 133 this.includeShared = includeShared; 134 } 135 136 @Override 137 public boolean hasNext() { 138 if (storageDirs.isEmpty() || nextIndex >= storageDirs.size()) 139 return false; 140 if (dirType != null || !includeShared) { 141 while (nextIndex < storageDirs.size()) { 142 if (shouldReturnNextDir()) 143 break; 144 nextIndex++; 145 } 146 if (nextIndex >= storageDirs.size()) 147 return false; 148 } 149 return true; 150 } 151 152 @Override 153 public StorageDirectory next() { 154 StorageDirectory sd = getStorageDir(nextIndex); 155 prevIndex = nextIndex; 156 nextIndex++; 157 if (dirType != null || !includeShared) { 158 while (nextIndex < storageDirs.size()) { 159 if (shouldReturnNextDir()) 160 break; 161 nextIndex++; 162 } 163 } 164 return sd; 165 } 166 167 @Override 168 public void remove() { 169 nextIndex = prevIndex; // restore previous state 170 storageDirs.remove(prevIndex); // remove last returned element 171 hasNext(); // reset nextIndex to correct place 172 } 173 174 private boolean shouldReturnNextDir() { 175 StorageDirectory sd = getStorageDir(nextIndex); 176 return (dirType == null || sd.getStorageDirType().isOfType(dirType)) && 177 (includeShared || !sd.isShared()); 178 } 179 } 180 181 /** 182 * @return A list of the given File in every available storage directory, 183 * regardless of whether it might exist. 184 */ 185 public List<File> getFiles(StorageDirType dirType, String fileName) { 186 ArrayList<File> list = new ArrayList<File>(); 187 Iterator<StorageDirectory> it = 188 (dirType == null) ? dirIterator() : dirIterator(dirType); 189 for ( ;it.hasNext(); ) { 190 list.add(new File(it.next().getCurrentDir(), fileName)); 191 } 192 return list; 193 } 194 195 196 /** 197 * Return default iterator 198 * This iterator returns all entries in storageDirs 199 */ 200 public Iterator<StorageDirectory> dirIterator() { 201 return dirIterator(null); 202 } 203 204 /** 205 * Return iterator based on Storage Directory Type 206 * This iterator selects entries in storageDirs of type dirType and returns 207 * them via the Iterator 208 */ 209 public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) { 210 return dirIterator(dirType, true); 211 } 212 213 /** 214 * Return all entries in storageDirs, potentially excluding shared dirs. 215 * @param includeShared whether or not to include shared dirs. 216 * @return an iterator over the configured storage dirs. 217 */ 218 public Iterator<StorageDirectory> dirIterator(boolean includeShared) { 219 return dirIterator(null, includeShared); 220 } 221 222 /** 223 * @param dirType all entries will be of this type of dir 224 * @param includeShared true to include any shared directories, 225 * false otherwise 226 * @return an iterator over the configured storage dirs. 227 */ 228 public Iterator<StorageDirectory> dirIterator(StorageDirType dirType, 229 boolean includeShared) { 230 return new DirIterator(dirType, includeShared); 231 } 232 233 public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) { 234 return new Iterable<StorageDirectory>() { 235 @Override 236 public Iterator<StorageDirectory> iterator() { 237 return dirIterator(dirType); 238 } 239 }; 240 } 241 242 243 /** 244 * generate storage list (debug line) 245 */ 246 public String listStorageDirectories() { 247 StringBuilder buf = new StringBuilder(); 248 for (StorageDirectory sd : storageDirs) { 249 buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");"); 250 } 251 return buf.toString(); 252 } 253 254 /** 255 * One of the storage directories. 256 */ 257 @InterfaceAudience.Private 258 public static class StorageDirectory implements FormatConfirmable { 259 final File root; // root directory 260 // whether or not this dir is shared between two separate NNs for HA, or 261 // between multiple block pools in the case of federation. 262 final boolean isShared; 263 final StorageDirType dirType; // storage dir type 264 FileLock lock; // storage lock 265 266 private String storageUuid = null; // Storage directory identifier. 267 268 public StorageDirectory(File dir) { 269 // default dirType is null 270 this(dir, null, false); 271 } 272 273 public StorageDirectory(File dir, StorageDirType dirType) { 274 this(dir, dirType, false); 275 } 276 277 public void setStorageUuid(String storageUuid) { 278 this.storageUuid = storageUuid; 279 } 280 281 public String getStorageUuid() { 282 return storageUuid; 283 } 284 285 /** 286 * Constructor 287 * @param dir directory corresponding to the storage 288 * @param dirType storage directory type 289 * @param isShared whether or not this dir is shared between two NNs. true 290 * disables locking on the storage directory, false enables locking 291 */ 292 public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) { 293 this.root = dir; 294 this.lock = null; 295 this.dirType = dirType; 296 this.isShared = isShared; 297 } 298 299 /** 300 * Get root directory of this storage 301 */ 302 public File getRoot() { 303 return root; 304 } 305 306 /** 307 * Get storage directory type 308 */ 309 public StorageDirType getStorageDirType() { 310 return dirType; 311 } 312 313 public void read(File from, Storage storage) throws IOException { 314 Properties props = readPropertiesFile(from); 315 storage.setFieldsFromProperties(props, this); 316 } 317 318 /** 319 * Clear and re-create storage directory. 320 * <p> 321 * Removes contents of the current directory and creates an empty directory. 322 * 323 * This does not fully format storage directory. 324 * It cannot write the version file since it should be written last after 325 * all other storage type dependent files are written. 326 * Derived storage is responsible for setting specific storage values and 327 * writing the version file to disk. 328 * 329 * @throws IOException 330 */ 331 public void clearDirectory() throws IOException { 332 File curDir = this.getCurrentDir(); 333 if (curDir.exists()) 334 if (!(FileUtil.fullyDelete(curDir))) 335 throw new IOException("Cannot remove current directory: " + curDir); 336 if (!curDir.mkdirs()) 337 throw new IOException("Cannot create directory " + curDir); 338 } 339 340 /** 341 * Directory {@code current} contains latest files defining 342 * the file system meta-data. 343 * 344 * @return the directory path 345 */ 346 public File getCurrentDir() { 347 return new File(root, STORAGE_DIR_CURRENT); 348 } 349 350 /** 351 * File {@code VERSION} contains the following fields: 352 * <ol> 353 * <li>node type</li> 354 * <li>layout version</li> 355 * <li>namespaceID</li> 356 * <li>fs state creation time</li> 357 * <li>other fields specific for this node type</li> 358 * </ol> 359 * The version file is always written last during storage directory updates. 360 * The existence of the version file indicates that all other files have 361 * been successfully written in the storage directory, the storage is valid 362 * and does not need to be recovered. 363 * 364 * @return the version file path 365 */ 366 public File getVersionFile() { 367 return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION); 368 } 369 370 /** 371 * File {@code VERSION} from the {@code previous} directory. 372 * 373 * @return the previous version file path 374 */ 375 public File getPreviousVersionFile() { 376 return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION); 377 } 378 379 /** 380 * Directory {@code previous} contains the previous file system state, 381 * which the system can be rolled back to. 382 * 383 * @return the directory path 384 */ 385 public File getPreviousDir() { 386 return new File(root, STORAGE_DIR_PREVIOUS); 387 } 388 389 /** 390 * {@code previous.tmp} is a transient directory, which holds 391 * current file system state while the new state is saved into the new 392 * {@code current} during upgrade. 393 * If the saving succeeds {@code previous.tmp} will be moved to 394 * {@code previous}, otherwise it will be renamed back to 395 * {@code current} by the recovery procedure during startup. 396 * 397 * @return the directory path 398 */ 399 public File getPreviousTmp() { 400 return new File(root, STORAGE_TMP_PREVIOUS); 401 } 402 403 /** 404 * {@code removed.tmp} is a transient directory, which holds 405 * current file system state while the previous state is moved into 406 * {@code current} during rollback. 407 * If the moving succeeds {@code removed.tmp} will be removed, 408 * otherwise it will be renamed back to 409 * {@code current} by the recovery procedure during startup. 410 * 411 * @return the directory path 412 */ 413 public File getRemovedTmp() { 414 return new File(root, STORAGE_TMP_REMOVED); 415 } 416 417 /** 418 * {@code finalized.tmp} is a transient directory, which holds 419 * the {@code previous} file system state while it is being removed 420 * in response to the finalize request. 421 * Finalize operation will remove {@code finalized.tmp} when completed, 422 * otherwise the removal will resume upon the system startup. 423 * 424 * @return the directory path 425 */ 426 public File getFinalizedTmp() { 427 return new File(root, STORAGE_TMP_FINALIZED); 428 } 429 430 /** 431 * {@code lastcheckpoint.tmp} is a transient directory, which holds 432 * current file system state while the new state is saved into the new 433 * {@code current} during regular namespace updates. 434 * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to 435 * {@code previous.checkpoint}, otherwise it will be renamed back to 436 * {@code current} by the recovery procedure during startup. 437 * 438 * @return the directory path 439 */ 440 public File getLastCheckpointTmp() { 441 return new File(root, STORAGE_TMP_LAST_CKPT); 442 } 443 444 /** 445 * {@code previous.checkpoint} is a directory, which holds the previous 446 * (before the last save) state of the storage directory. 447 * The directory is created as a reference only, it does not play role 448 * in state recovery procedures, and is recycled automatically, 449 * but it may be useful for manual recovery of a stale state of the system. 450 * 451 * @return the directory path 452 */ 453 public File getPreviousCheckpoint() { 454 return new File(root, STORAGE_PREVIOUS_CKPT); 455 } 456 457 /** 458 * Check consistency of the storage directory 459 * 460 * @param startOpt a startup option. 461 * 462 * @return state {@link StorageState} of the storage directory 463 * @throws InconsistentFSStateException if directory state is not 464 * consistent and cannot be recovered. 465 * @throws IOException 466 */ 467 public StorageState analyzeStorage(StartupOption startOpt, Storage storage) 468 throws IOException { 469 assert root != null : "root is null"; 470 boolean hadMkdirs = false; 471 String rootPath = root.getCanonicalPath(); 472 try { // check that storage exists 473 if (!root.exists()) { 474 // storage directory does not exist 475 if (startOpt != StartupOption.FORMAT && 476 startOpt != StartupOption.HOTSWAP) { 477 LOG.warn("Storage directory " + rootPath + " does not exist"); 478 return StorageState.NON_EXISTENT; 479 } 480 LOG.info(rootPath + " does not exist. Creating ..."); 481 if (!root.mkdirs()) 482 throw new IOException("Cannot create directory " + rootPath); 483 hadMkdirs = true; 484 } 485 // or is inaccessible 486 if (!root.isDirectory()) { 487 LOG.warn(rootPath + "is not a directory"); 488 return StorageState.NON_EXISTENT; 489 } 490 if (!FileUtil.canWrite(root)) { 491 LOG.warn("Cannot access storage directory " + rootPath); 492 return StorageState.NON_EXISTENT; 493 } 494 } catch(SecurityException ex) { 495 LOG.warn("Cannot access storage directory " + rootPath, ex); 496 return StorageState.NON_EXISTENT; 497 } 498 499 this.lock(); // lock storage if it exists 500 501 // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory, 502 // while it also checks the layout version. 503 if (startOpt == HdfsServerConstants.StartupOption.FORMAT || 504 (startOpt == StartupOption.HOTSWAP && hadMkdirs)) 505 return StorageState.NOT_FORMATTED; 506 507 if (startOpt != HdfsServerConstants.StartupOption.IMPORT) { 508 storage.checkOldLayoutStorage(this); 509 } 510 511 // check whether current directory is valid 512 File versionFile = getVersionFile(); 513 boolean hasCurrent = versionFile.exists(); 514 515 // check which directories exist 516 boolean hasPrevious = getPreviousDir().exists(); 517 boolean hasPreviousTmp = getPreviousTmp().exists(); 518 boolean hasRemovedTmp = getRemovedTmp().exists(); 519 boolean hasFinalizedTmp = getFinalizedTmp().exists(); 520 boolean hasCheckpointTmp = getLastCheckpointTmp().exists(); 521 522 if (!(hasPreviousTmp || hasRemovedTmp 523 || hasFinalizedTmp || hasCheckpointTmp)) { 524 // no temp dirs - no recovery 525 if (hasCurrent) 526 return StorageState.NORMAL; 527 if (hasPrevious) 528 throw new InconsistentFSStateException(root, 529 "version file in current directory is missing."); 530 return StorageState.NOT_FORMATTED; 531 } 532 533 if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0) 534 + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1) 535 // more than one temp dirs 536 throw new InconsistentFSStateException(root, 537 "too many temporary directories."); 538 539 // # of temp dirs == 1 should either recover or complete a transition 540 if (hasCheckpointTmp) { 541 return hasCurrent ? StorageState.COMPLETE_CHECKPOINT 542 : StorageState.RECOVER_CHECKPOINT; 543 } 544 545 if (hasFinalizedTmp) { 546 if (hasPrevious) 547 throw new InconsistentFSStateException(root, 548 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED 549 + "cannot exist together."); 550 return StorageState.COMPLETE_FINALIZE; 551 } 552 553 if (hasPreviousTmp) { 554 if (hasPrevious) 555 throw new InconsistentFSStateException(root, 556 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS 557 + " cannot exist together."); 558 if (hasCurrent) 559 return StorageState.COMPLETE_UPGRADE; 560 return StorageState.RECOVER_UPGRADE; 561 } 562 563 assert hasRemovedTmp : "hasRemovedTmp must be true"; 564 if (!(hasCurrent ^ hasPrevious)) 565 throw new InconsistentFSStateException(root, 566 "one and only one directory " + STORAGE_DIR_CURRENT 567 + " or " + STORAGE_DIR_PREVIOUS 568 + " must be present when " + STORAGE_TMP_REMOVED 569 + " exists."); 570 if (hasCurrent) 571 return StorageState.COMPLETE_ROLLBACK; 572 return StorageState.RECOVER_ROLLBACK; 573 } 574 575 /** 576 * Complete or recover storage state from previously failed transition. 577 * 578 * @param curState specifies what/how the state should be recovered 579 * @throws IOException 580 */ 581 public void doRecover(StorageState curState) throws IOException { 582 File curDir = getCurrentDir(); 583 String rootPath = root.getCanonicalPath(); 584 switch(curState) { 585 case COMPLETE_UPGRADE: // mv previous.tmp -> previous 586 LOG.info("Completing previous upgrade for storage directory " 587 + rootPath); 588 rename(getPreviousTmp(), getPreviousDir()); 589 return; 590 case RECOVER_UPGRADE: // mv previous.tmp -> current 591 LOG.info("Recovering storage directory " + rootPath 592 + " from previous upgrade"); 593 if (curDir.exists()) 594 deleteDir(curDir); 595 rename(getPreviousTmp(), curDir); 596 return; 597 case COMPLETE_ROLLBACK: // rm removed.tmp 598 LOG.info("Completing previous rollback for storage directory " 599 + rootPath); 600 deleteDir(getRemovedTmp()); 601 return; 602 case RECOVER_ROLLBACK: // mv removed.tmp -> current 603 LOG.info("Recovering storage directory " + rootPath 604 + " from previous rollback"); 605 rename(getRemovedTmp(), curDir); 606 return; 607 case COMPLETE_FINALIZE: // rm finalized.tmp 608 LOG.info("Completing previous finalize for storage directory " 609 + rootPath); 610 deleteDir(getFinalizedTmp()); 611 return; 612 case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint 613 LOG.info("Completing previous checkpoint for storage directory " 614 + rootPath); 615 File prevCkptDir = getPreviousCheckpoint(); 616 if (prevCkptDir.exists()) 617 deleteDir(prevCkptDir); 618 rename(getLastCheckpointTmp(), prevCkptDir); 619 return; 620 case RECOVER_CHECKPOINT: // mv lastcheckpoint.tmp -> current 621 LOG.info("Recovering storage directory " + rootPath 622 + " from failed checkpoint"); 623 if (curDir.exists()) 624 deleteDir(curDir); 625 rename(getLastCheckpointTmp(), curDir); 626 return; 627 default: 628 throw new IOException("Unexpected FS state: " + curState); 629 } 630 } 631 632 /** 633 * @return true if the storage directory should prompt the user prior 634 * to formatting (i.e if the directory appears to contain some data) 635 * @throws IOException if the SD cannot be accessed due to an IO error 636 */ 637 @Override 638 public boolean hasSomeData() throws IOException { 639 // Its alright for a dir not to exist, or to exist (properly accessible) 640 // and be completely empty. 641 if (!root.exists()) return false; 642 643 if (!root.isDirectory()) { 644 // a file where you expect a directory should not cause silent 645 // formatting 646 return true; 647 } 648 649 if (FileUtil.listFiles(root).length == 0) { 650 // Empty dir can format without prompt. 651 return false; 652 } 653 654 return true; 655 } 656 657 public boolean isShared() { 658 return isShared; 659 } 660 661 662 /** 663 * Lock storage to provide exclusive access. 664 * 665 * <p> Locking is not supported by all file systems. 666 * E.g., NFS does not consistently support exclusive locks. 667 * 668 * <p> If locking is supported we guarantee exclusive access to the 669 * storage directory. Otherwise, no guarantee is given. 670 * 671 * @throws IOException if locking fails 672 */ 673 public void lock() throws IOException { 674 if (isShared()) { 675 LOG.info("Locking is disabled for " + this.root); 676 return; 677 } 678 FileLock newLock = tryLock(); 679 if (newLock == null) { 680 String msg = "Cannot lock storage " + this.root 681 + ". The directory is already locked"; 682 LOG.info(msg); 683 throw new IOException(msg); 684 } 685 // Don't overwrite lock until success - this way if we accidentally 686 // call lock twice, the internal state won't be cleared by the second 687 // (failed) lock attempt 688 lock = newLock; 689 } 690 691 /** 692 * Attempts to acquire an exclusive lock on the storage. 693 * 694 * @return A lock object representing the newly-acquired lock or 695 * <code>null</code> if storage is already locked. 696 * @throws IOException if locking fails. 697 */ 698 @SuppressWarnings("resource") 699 FileLock tryLock() throws IOException { 700 boolean deletionHookAdded = false; 701 File lockF = new File(root, STORAGE_FILE_LOCK); 702 if (!lockF.exists()) { 703 lockF.deleteOnExit(); 704 deletionHookAdded = true; 705 } 706 RandomAccessFile file = new RandomAccessFile(lockF, "rws"); 707 String jvmName = ManagementFactory.getRuntimeMXBean().getName(); 708 FileLock res = null; 709 try { 710 res = file.getChannel().tryLock(); 711 if (null == res) { 712 throw new OverlappingFileLockException(); 713 } 714 file.write(jvmName.getBytes(Charsets.UTF_8)); 715 LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName); 716 } catch(OverlappingFileLockException oe) { 717 // Cannot read from the locked file on Windows. 718 String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine()); 719 LOG.error("It appears that another node " + lockingJvmName 720 + " has already locked the storage directory: " + root, oe); 721 file.close(); 722 return null; 723 } catch(IOException e) { 724 LOG.error("Failed to acquire lock on " + lockF 725 + ". If this storage directory is mounted via NFS, " 726 + "ensure that the appropriate nfs lock services are running.", e); 727 file.close(); 728 throw e; 729 } 730 if (!deletionHookAdded) { 731 // If the file existed prior to our startup, we didn't 732 // call deleteOnExit above. But since we successfully locked 733 // the dir, we can take care of cleaning it up. 734 lockF.deleteOnExit(); 735 } 736 return res; 737 } 738 739 /** 740 * Unlock storage. 741 * 742 * @throws IOException 743 */ 744 public void unlock() throws IOException { 745 if (this.lock == null) 746 return; 747 this.lock.release(); 748 lock.channel().close(); 749 lock = null; 750 } 751 752 @Override 753 public String toString() { 754 return "Storage Directory " + this.root; 755 } 756 757 /** 758 * Check whether underlying file system supports file locking. 759 * 760 * @return <code>true</code> if exclusive locks are supported or 761 * <code>false</code> otherwise. 762 * @throws IOException 763 * @see StorageDirectory#lock() 764 */ 765 public boolean isLockSupported() throws IOException { 766 FileLock firstLock = null; 767 FileLock secondLock = null; 768 try { 769 firstLock = lock; 770 if(firstLock == null) { 771 firstLock = tryLock(); 772 if(firstLock == null) 773 return true; 774 } 775 secondLock = tryLock(); 776 if(secondLock == null) 777 return true; 778 } finally { 779 if(firstLock != null && firstLock != lock) { 780 firstLock.release(); 781 firstLock.channel().close(); 782 } 783 if(secondLock != null) { 784 secondLock.release(); 785 secondLock.channel().close(); 786 } 787 } 788 return false; 789 } 790 } 791 792 /** 793 * Create empty storage info of the specified type 794 */ 795 protected Storage(NodeType type) { 796 super(type); 797 } 798 799 protected Storage(StorageInfo storageInfo) { 800 super(storageInfo); 801 } 802 803 public int getNumStorageDirs() { 804 return storageDirs.size(); 805 } 806 807 public StorageDirectory getStorageDir(int idx) { 808 return storageDirs.get(idx); 809 } 810 811 /** 812 * @return the storage directory, with the precondition that this storage 813 * has exactly one storage directory 814 */ 815 public StorageDirectory getSingularStorageDir() { 816 Preconditions.checkState(storageDirs.size() == 1); 817 return storageDirs.get(0); 818 } 819 820 protected void addStorageDir(StorageDirectory sd) { 821 storageDirs.add(sd); 822 } 823 824 /** 825 * Returns true if the storage directory on the given directory is already 826 * loaded. 827 * @param root the root directory of a {@link StorageDirectory} 828 * @throws IOException if failed to get canonical path. 829 */ 830 protected boolean containsStorageDir(File root) throws IOException { 831 for (StorageDirectory sd : storageDirs) { 832 if (sd.getRoot().getCanonicalPath().equals(root.getCanonicalPath())) { 833 return true; 834 } 835 } 836 return false; 837 } 838 839 /** 840 * Return true if the layout of the given storage directory is from a version 841 * of Hadoop prior to the introduction of the "current" and "previous" 842 * directories which allow upgrade and rollback. 843 */ 844 public abstract boolean isPreUpgradableLayout(StorageDirectory sd) 845 throws IOException; 846 847 /** 848 * Check if the given storage directory comes from a version of Hadoop 849 * prior to when the directory layout changed (ie 0.13). If this is 850 * the case, this method throws an IOException. 851 */ 852 private void checkOldLayoutStorage(StorageDirectory sd) throws IOException { 853 if (isPreUpgradableLayout(sd)) { 854 checkVersionUpgradable(0); 855 } 856 } 857 858 /** 859 * Checks if the upgrade from {@code oldVersion} is supported. 860 * @param oldVersion the version of the metadata to check with the current 861 * version 862 * @throws IOException if upgrade is not supported 863 */ 864 public static void checkVersionUpgradable(int oldVersion) 865 throws IOException { 866 if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) { 867 String msg = "*********** Upgrade is not supported from this " + 868 " older version " + oldVersion + 869 " of storage to the current version." + 870 " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION + 871 " or a later version and then upgrade to current" + 872 " version. Old layout version is " + 873 (oldVersion == 0 ? "'too old'" : (""+oldVersion)) + 874 " and latest layout version this software version can" + 875 " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION + 876 ". ************"; 877 LOG.error(msg); 878 throw new IOException(msg); 879 } 880 881 } 882 883 /** 884 * Iterate over each of the {@link FormatConfirmable} objects, 885 * potentially checking with the user whether it should be formatted. 886 * 887 * If running in interactive mode, will prompt the user for each 888 * directory to allow them to format anyway. Otherwise, returns 889 * false, unless 'force' is specified. 890 * 891 * @param force format regardless of whether dirs exist 892 * @param interactive prompt the user when a dir exists 893 * @return true if formatting should proceed 894 * @throws IOException if some storage cannot be accessed 895 */ 896 public static boolean confirmFormat( 897 Iterable<? extends FormatConfirmable> items, 898 boolean force, boolean interactive) throws IOException { 899 for (FormatConfirmable item : items) { 900 if (!item.hasSomeData()) 901 continue; 902 if (force) { // Don't confirm, always format. 903 System.err.println( 904 "Data exists in " + item + ". Formatting anyway."); 905 continue; 906 } 907 if (!interactive) { // Don't ask - always don't format 908 System.err.println( 909 "Running in non-interactive mode, and data appears to exist in " + 910 item + ". Not formatting."); 911 return false; 912 } 913 if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) { 914 System.err.println("Format aborted in " + item); 915 return false; 916 } 917 } 918 919 return true; 920 } 921 922 /** 923 * Interface for classes which need to have the user confirm their 924 * formatting during NameNode -format and other similar operations. 925 * 926 * This is currently a storage directory or journal manager. 927 */ 928 @InterfaceAudience.Private 929 public interface FormatConfirmable { 930 /** 931 * @return true if the storage seems to have some valid data in it, 932 * and the user should be required to confirm the format. Otherwise, 933 * false. 934 * @throws IOException if the storage cannot be accessed at all. 935 */ 936 public boolean hasSomeData() throws IOException; 937 938 /** 939 * @return a string representation of the formattable item, suitable 940 * for display to the user inside a prompt 941 */ 942 public String toString(); 943 } 944 945 /** 946 * Set common storage fields into the given properties object. 947 * Should be overloaded if additional fields need to be set. 948 * 949 * @param props the Properties object to write into 950 */ 951 protected void setPropertiesFromFields(Properties props, 952 StorageDirectory sd) 953 throws IOException { 954 props.setProperty("layoutVersion", String.valueOf(layoutVersion)); 955 props.setProperty("storageType", storageType.toString()); 956 props.setProperty("namespaceID", String.valueOf(namespaceID)); 957 // Set clusterID in version with federation support 958 if (versionSupportsFederation(getServiceLayoutFeatureMap())) { 959 props.setProperty("clusterID", clusterID); 960 } 961 props.setProperty("cTime", String.valueOf(cTime)); 962 } 963 964 /** 965 * Write properties to the VERSION file in the given storage directory. 966 */ 967 public void writeProperties(StorageDirectory sd) throws IOException { 968 writeProperties(sd.getVersionFile(), sd); 969 } 970 971 public void writeProperties(File to, StorageDirectory sd) throws IOException { 972 Properties props = new Properties(); 973 setPropertiesFromFields(props, sd); 974 writeProperties(to, sd, props); 975 } 976 977 public static void writeProperties(File to, StorageDirectory sd, 978 Properties props) throws IOException { 979 RandomAccessFile file = new RandomAccessFile(to, "rws"); 980 FileOutputStream out = null; 981 try { 982 file.seek(0); 983 out = new FileOutputStream(file.getFD()); 984 /* 985 * If server is interrupted before this line, 986 * the version file will remain unchanged. 987 */ 988 props.store(out, null); 989 /* 990 * Now the new fields are flushed to the head of the file, but file 991 * length can still be larger then required and therefore the file can 992 * contain whole or corrupted fields from its old contents in the end. 993 * If server is interrupted here and restarted later these extra fields 994 * either should not effect server behavior or should be handled 995 * by the server correctly. 996 */ 997 file.setLength(out.getChannel().position()); 998 } finally { 999 if (out != null) { 1000 out.close(); 1001 } 1002 file.close(); 1003 } 1004 } 1005 1006 public static void rename(File from, File to) throws IOException { 1007 try { 1008 NativeIO.renameTo(from, to); 1009 } catch (NativeIOException e) { 1010 throw new IOException("Failed to rename " + from.getCanonicalPath() 1011 + " to " + to.getCanonicalPath() + " due to failure in native rename. " 1012 + e.toString()); 1013 } 1014 } 1015 1016 /** 1017 * Copies a file (usually large) to a new location using native unbuffered IO. 1018 * <p> 1019 * This method copies the contents of the specified source file 1020 * to the specified destination file using OS specific unbuffered IO. 1021 * The goal is to avoid churning the file system buffer cache when copying 1022 * large files. 1023 * 1024 * We can't use FileUtils#copyFile from apache-commons-io because it 1025 * is a buffered IO based on FileChannel#transferFrom, which uses MmapByteBuffer 1026 * internally. 1027 * 1028 * The directory holding the destination file is created if it does not exist. 1029 * If the destination file exists, then this method will delete it first. 1030 * <p> 1031 * <strong>Note:</strong> Setting <code>preserveFileDate</code> to 1032 * {@code true} tries to preserve the file's last modified 1033 * date/times using {@link File#setLastModified(long)}, however it is 1034 * not guaranteed that the operation will succeed. 1035 * If the modification operation fails, no indication is provided. 1036 * 1037 * @param srcFile an existing file to copy, must not be {@code null} 1038 * @param destFile the new file, must not be {@code null} 1039 * @param preserveFileDate true if the file date of the copy 1040 * should be the same as the original 1041 * 1042 * @throws NullPointerException if source or destination is {@code null} 1043 * @throws IOException if source or destination is invalid 1044 * @throws IOException if an IO error occurs during copying 1045 */ 1046 public static void nativeCopyFileUnbuffered(File srcFile, File destFile, 1047 boolean preserveFileDate) throws IOException { 1048 if (srcFile == null) { 1049 throw new NullPointerException("Source must not be null"); 1050 } 1051 if (destFile == null) { 1052 throw new NullPointerException("Destination must not be null"); 1053 } 1054 if (srcFile.exists() == false) { 1055 throw new FileNotFoundException("Source '" + srcFile + "' does not exist"); 1056 } 1057 if (srcFile.isDirectory()) { 1058 throw new IOException("Source '" + srcFile + "' exists but is a directory"); 1059 } 1060 if (srcFile.getCanonicalPath().equals(destFile.getCanonicalPath())) { 1061 throw new IOException("Source '" + srcFile + "' and destination '" + 1062 destFile + "' are the same"); 1063 } 1064 File parentFile = destFile.getParentFile(); 1065 if (parentFile != null) { 1066 if (!parentFile.mkdirs() && !parentFile.isDirectory()) { 1067 throw new IOException("Destination '" + parentFile 1068 + "' directory cannot be created"); 1069 } 1070 } 1071 if (destFile.exists()) { 1072 if (FileUtil.canWrite(destFile) == false) { 1073 throw new IOException("Destination '" + destFile 1074 + "' exists but is read-only"); 1075 } else { 1076 if (destFile.delete() == false) { 1077 throw new IOException("Destination '" + destFile 1078 + "' exists but cannot be deleted"); 1079 } 1080 } 1081 } 1082 try { 1083 NativeIO.copyFileUnbuffered(srcFile, destFile); 1084 } catch (NativeIOException e) { 1085 throw new IOException("Failed to copy " + srcFile.getCanonicalPath() 1086 + " to " + destFile.getCanonicalPath() 1087 + " due to failure in NativeIO#copyFileUnbuffered(). " 1088 + e.toString()); 1089 } 1090 if (srcFile.length() != destFile.length()) { 1091 throw new IOException("Failed to copy full contents from '" + srcFile 1092 + "' to '" + destFile + "'"); 1093 } 1094 if (preserveFileDate) { 1095 if (destFile.setLastModified(srcFile.lastModified()) == false) { 1096 if (LOG.isDebugEnabled()) { 1097 LOG.debug("Failed to preserve last modified date from'" + srcFile 1098 + "' to '" + destFile + "'"); 1099 } 1100 } 1101 } 1102 } 1103 1104 /** 1105 * Recursively delete all the content of the directory first and then 1106 * the directory itself from the local filesystem. 1107 * @param dir The directory to delete 1108 * @throws IOException 1109 */ 1110 public static void deleteDir(File dir) throws IOException { 1111 if (!FileUtil.fullyDelete(dir)) 1112 throw new IOException("Failed to delete " + dir.getCanonicalPath()); 1113 } 1114 1115 /** 1116 * Write all data storage files. 1117 * @throws IOException 1118 */ 1119 public void writeAll() throws IOException { 1120 this.layoutVersion = getServiceLayoutVersion(); 1121 for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) { 1122 writeProperties(it.next()); 1123 } 1124 } 1125 1126 /** 1127 * Unlock all storage directories. 1128 * @throws IOException 1129 */ 1130 public void unlockAll() throws IOException { 1131 for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) { 1132 it.next().unlock(); 1133 } 1134 } 1135 1136 public static String getBuildVersion() { 1137 return VersionInfo.getRevision(); 1138 } 1139 1140 public static String getRegistrationID(StorageInfo storage) { 1141 return "NS-" + Integer.toString(storage.getNamespaceID()) 1142 + "-" + storage.getClusterID() 1143 + "-" + Long.toString(storage.getCTime()); 1144 } 1145 1146 public static boolean is203LayoutVersion(int layoutVersion) { 1147 for (int lv203 : LAYOUT_VERSIONS_203) { 1148 if (lv203 == layoutVersion) { 1149 return true; 1150 } 1151 } 1152 return false; 1153 } 1154}