001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.Closeable;
021import java.io.File;
022import java.io.IOException;
023import java.io.RandomAccessFile;
024import java.net.URI;
025import java.net.UnknownHostException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.EnumSet;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Properties;
033import java.util.UUID;
034import java.util.concurrent.CopyOnWriteArrayList;
035
036import org.apache.hadoop.classification.InterfaceAudience;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileUtil;
039import org.apache.hadoop.hdfs.DFSUtil;
040import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
045import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
046import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
047import org.apache.hadoop.hdfs.server.common.Storage;
048import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
049import org.apache.hadoop.hdfs.server.common.Util;
050import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
051import org.apache.hadoop.hdfs.util.PersistentLongFile;
052import org.apache.hadoop.io.IOUtils;
053import org.apache.hadoop.net.DNS;
054import org.apache.hadoop.util.Time;
055
056import com.google.common.annotations.VisibleForTesting;
057import com.google.common.base.Preconditions;
058import com.google.common.collect.Lists;
059
060/**
061 * NNStorage is responsible for management of the StorageDirectories used by
062 * the NameNode.
063 */
064@InterfaceAudience.Private
065public class NNStorage extends Storage implements Closeable,
066    StorageErrorReporter {
067  static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
068  static final String LOCAL_URI_SCHEME = "file";
069
070  //
071  // The filenames used for storing the images
072  //
073  public enum NameNodeFile {
074    IMAGE     ("fsimage"),
075    TIME      ("fstime"), // from "old" pre-HDFS-1073 format
076    SEEN_TXID ("seen_txid"),
077    EDITS     ("edits"),
078    IMAGE_NEW ("fsimage.ckpt"),
079    IMAGE_ROLLBACK("fsimage_rollback"),
080    EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
081    EDITS_INPROGRESS ("edits_inprogress"),
082    EDITS_TMP ("edits_tmp"),
083    IMAGE_LEGACY_OIV ("fsimage_legacy_oiv");  // For pre-PB format
084
085    private String fileName = null;
086    private NameNodeFile(String name) { this.fileName = name; }
087    @VisibleForTesting
088    public String getName() { return fileName; }
089  }
090
091  /**
092   * Implementation of StorageDirType specific to namenode storage
093   * A Storage directory could be of type IMAGE which stores only fsimage,
094   * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
095   * stores both fsimage and edits.
096   */
097  @VisibleForTesting
098  public static enum NameNodeDirType implements StorageDirType {
099    UNDEFINED,
100    IMAGE,
101    EDITS,
102    IMAGE_AND_EDITS;
103
104    @Override
105    public StorageDirType getStorageDirType() {
106      return this;
107    }
108
109    @Override
110    public boolean isOfType(StorageDirType type) {
111      if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
112        return true;
113      return this == type;
114    }
115  }
116
117  protected String blockpoolID = ""; // id of the block pool
118  
119  /**
120   * flag that controls if we try to restore failed storages
121   */
122  private boolean restoreFailedStorage = false;
123  private final Object restorationLock = new Object();
124  private boolean disablePreUpgradableLayoutCheck = false;
125
126
127  /**
128   * TxId of the last transaction that was included in the most
129   * recent fsimage file. This does not include any transactions
130   * that have since been written to the edit log.
131   */
132  protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
133  
134  /**
135   * Time of the last checkpoint, in milliseconds since the epoch.
136   */
137  private long mostRecentCheckpointTime = 0;
138
139  /**
140   * list of failed (and thus removed) storages
141   */
142  final protected List<StorageDirectory> removedStorageDirs
143    = new CopyOnWriteArrayList<StorageDirectory>();
144
145  /**
146   * Properties from old layout versions that may be needed
147   * during upgrade only.
148   */
149  private HashMap<String, String> deprecatedProperties;
150
151  /**
152   * Construct the NNStorage.
153   * @param conf Namenode configuration.
154   * @param imageDirs Directories the image can be stored in.
155   * @param editsDirs Directories the editlog can be stored in.
156   * @throws IOException if any directories are inaccessible.
157   */
158  public NNStorage(Configuration conf, 
159                   Collection<URI> imageDirs, Collection<URI> editsDirs) 
160      throws IOException {
161    super(NodeType.NAME_NODE);
162
163    storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
164    
165    // this may modify the editsDirs, so copy before passing in
166    setStorageDirectories(imageDirs, 
167                          Lists.newArrayList(editsDirs),
168                          FSNamesystem.getSharedEditsDirs(conf));
169  }
170
171  @Override // Storage
172  public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
173    if (disablePreUpgradableLayoutCheck) {
174      return false;
175    }
176
177    File oldImageDir = new File(sd.getRoot(), "image");
178    if (!oldImageDir.exists()) {
179      return false;
180    }
181    // check the layout version inside the image file
182    File oldF = new File(oldImageDir, "fsimage");
183    RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
184    try {
185      oldFile.seek(0);
186      int oldVersion = oldFile.readInt();
187      oldFile.close();
188      oldFile = null;
189      if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
190        return false;
191    } finally {
192      IOUtils.cleanup(LOG, oldFile);
193    }
194    return true;
195  }
196
197  @Override // Closeable
198  public void close() throws IOException {
199    unlockAll();
200    storageDirs.clear();
201  }
202
203  /**
204   * Set flag whether an attempt should be made to restore failed storage
205   * directories at the next available oppurtuinity.
206   *
207   * @param val Whether restoration attempt should be made.
208   */
209  void setRestoreFailedStorage(boolean val) {
210    LOG.warn("set restore failed storage to " + val);
211    restoreFailedStorage=val;
212  }
213
214  /**
215   * @return Whether failed storage directories are to be restored.
216   */
217  boolean getRestoreFailedStorage() {
218    return restoreFailedStorage;
219  }
220
221  /**
222   * See if any of removed storages is "writable" again, and can be returned
223   * into service.
224   */
225  void attemptRestoreRemovedStorage() {
226    // if directory is "alive" - copy the images there...
227    if(!restoreFailedStorage || removedStorageDirs.size() == 0)
228      return; //nothing to restore
229
230    /* We don't want more than one thread trying to restore at a time */
231    synchronized (this.restorationLock) {
232      LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
233               "storarge. removedStorages size = " + removedStorageDirs.size());
234      for(Iterator<StorageDirectory> it
235            = this.removedStorageDirs.iterator(); it.hasNext();) {
236        StorageDirectory sd = it.next();
237        File root = sd.getRoot();
238        LOG.info("currently disabled dir " + root.getAbsolutePath() +
239                 "; type="+sd.getStorageDirType() 
240                 + ";canwrite="+FileUtil.canWrite(root));
241        if(root.exists() && FileUtil.canWrite(root)) {
242          LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
243          this.addStorageDir(sd); // restore
244          this.removedStorageDirs.remove(sd);
245        }
246      }
247    }
248  }
249
250  /**
251   * @return A list of storage directories which are in the errored state.
252   */
253  List<StorageDirectory> getRemovedStorageDirs() {
254    return this.removedStorageDirs;
255  }
256  
257  /**
258   * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
259   */
260  @VisibleForTesting
261  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
262                                          Collection<URI> fsEditsDirs)
263      throws IOException {
264    setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
265  }
266
267  /**
268   * Set the storage directories which will be used. This should only ever be
269   * called from inside NNStorage. However, it needs to remain package private
270   * for testing, as StorageDirectories need to be reinitialised after using
271   * Mockito.spy() on this class, as Mockito doesn't work well with inner
272   * classes, such as StorageDirectory in this case.
273   *
274   * Synchronized due to initialization of storageDirs and removedStorageDirs.
275   *
276   * @param fsNameDirs Locations to store images.
277   * @param fsEditsDirs Locations to store edit logs.
278   * @throws IOException
279   */
280  @VisibleForTesting
281  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
282                                          Collection<URI> fsEditsDirs,
283                                          Collection<URI> sharedEditsDirs)
284      throws IOException {
285    this.storageDirs.clear();
286    this.removedStorageDirs.clear();
287
288   // Add all name dirs with appropriate NameNodeDirType
289    for (URI dirName : fsNameDirs) {
290      checkSchemeConsistency(dirName);
291      boolean isAlsoEdits = false;
292      for (URI editsDirName : fsEditsDirs) {
293        if (editsDirName.compareTo(dirName) == 0) {
294          isAlsoEdits = true;
295          fsEditsDirs.remove(editsDirName);
296          break;
297        }
298      }
299      NameNodeDirType dirType = (isAlsoEdits) ?
300                          NameNodeDirType.IMAGE_AND_EDITS :
301                          NameNodeDirType.IMAGE;
302      // Add to the list of storage directories, only if the
303      // URI is of type file://
304      if(dirName.getScheme().compareTo("file") == 0) {
305        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
306            dirType,
307            sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
308      }
309    }
310
311    // Add edits dirs if they are different from name dirs
312    for (URI dirName : fsEditsDirs) {
313      checkSchemeConsistency(dirName);
314      // Add to the list of storage directories, only if the
315      // URI is of type file://
316      if(dirName.getScheme().compareTo("file") == 0)
317        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
318                    NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
319    }
320  }
321
322  /**
323   * Return the storage directory corresponding to the passed URI
324   * @param uri URI of a storage directory
325   * @return The matching storage directory or null if none found
326   */
327  StorageDirectory getStorageDirectory(URI uri) {
328    try {
329      uri = Util.fileAsURI(new File(uri));
330      Iterator<StorageDirectory> it = dirIterator();
331      for (; it.hasNext(); ) {
332        StorageDirectory sd = it.next();
333        if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
334          return sd;
335        }
336      }
337    } catch (IOException ioe) {
338      LOG.warn("Error converting file to URI", ioe);
339    }
340    return null;
341  }
342
343  /**
344   * Checks the consistency of a URI, in particular if the scheme
345   * is specified 
346   * @param u URI whose consistency is being checked.
347   */
348  private static void checkSchemeConsistency(URI u) throws IOException {
349    String scheme = u.getScheme();
350    // the URI should have a proper scheme
351    if(scheme == null) {
352      throw new IOException("Undefined scheme for " + u);
353    }
354  }
355
356  /**
357   * Retrieve current directories of type IMAGE
358   * @return Collection of URI representing image directories
359   * @throws IOException in case of URI processing error
360   */
361  Collection<URI> getImageDirectories() throws IOException {
362    return getDirectories(NameNodeDirType.IMAGE);
363  }
364
365  /**
366   * Retrieve current directories of type EDITS
367   * @return Collection of URI representing edits directories
368   * @throws IOException in case of URI processing error
369   */
370  Collection<URI> getEditsDirectories() throws IOException {
371    return getDirectories(NameNodeDirType.EDITS);
372  }
373
374  /**
375   * Return number of storage directories of the given type.
376   * @param dirType directory type
377   * @return number of storage directories of type dirType
378   */
379  int getNumStorageDirs(NameNodeDirType dirType) {
380    if(dirType == null)
381      return getNumStorageDirs();
382    Iterator<StorageDirectory> it = dirIterator(dirType);
383    int numDirs = 0;
384    for(; it.hasNext(); it.next())
385      numDirs++;
386    return numDirs;
387  }
388
389  /**
390   * Return the list of locations being used for a specific purpose.
391   * i.e. Image or edit log storage.
392   *
393   * @param dirType Purpose of locations requested.
394   * @throws IOException
395   */
396  Collection<URI> getDirectories(NameNodeDirType dirType)
397      throws IOException {
398    ArrayList<URI> list = new ArrayList<URI>();
399    Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
400                                    dirIterator(dirType);
401    for ( ;it.hasNext(); ) {
402      StorageDirectory sd = it.next();
403      try {
404        list.add(Util.fileAsURI(sd.getRoot()));
405      } catch (IOException e) {
406        throw new IOException("Exception while processing " +
407            "StorageDirectory " + sd.getRoot(), e);
408      }
409    }
410    return list;
411  }
412  
413  /**
414   * Determine the last transaction ID noted in this storage directory.
415   * This txid is stored in a special seen_txid file since it might not
416   * correspond to the latest image or edit log. For example, an image-only
417   * directory will have this txid incremented when edits logs roll, even
418   * though the edits logs are in a different directory.
419   *
420   * @param sd StorageDirectory to check
421   * @return If file exists and can be read, last recorded txid. If not, 0L.
422   * @throws IOException On errors processing file pointed to by sd
423   */
424  static long readTransactionIdFile(StorageDirectory sd) throws IOException {
425    File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
426    return PersistentLongFile.readFile(txidFile, 0);
427  }
428  
429  /**
430   * Write last checkpoint time into a separate file.
431   * @param sd storage directory
432   * @throws IOException
433   */
434  void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
435    Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
436    
437    File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
438    PersistentLongFile.writeFile(txIdFile, txid);
439  }
440
441  /**
442   * Set the transaction ID and time of the last checkpoint
443   * 
444   * @param txid transaction id of the last checkpoint
445   * @param time time of the last checkpoint, in millis since the epoch
446   */
447  void setMostRecentCheckpointInfo(long txid, long time) {
448    this.mostRecentCheckpointTxId = txid;
449    this.mostRecentCheckpointTime = time;
450  }
451
452  /**
453   * @return the transaction ID of the last checkpoint.
454   */
455  public long getMostRecentCheckpointTxId() {
456    return mostRecentCheckpointTxId;
457  }
458  
459  /**
460   * @return the time of the most recent checkpoint in millis since the epoch.
461   */
462  long getMostRecentCheckpointTime() {
463    return mostRecentCheckpointTime;
464  }
465
466  /**
467   * Write a small file in all available storage directories that
468   * indicates that the namespace has reached some given transaction ID.
469   * 
470   * This is used when the image is loaded to avoid accidental rollbacks
471   * in the case where an edit log is fully deleted but there is no
472   * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
473   * @param txid the txid that has been reached
474   */
475  public void writeTransactionIdFileToStorage(long txid) {
476    // Write txid marker in all storage directories
477    for (StorageDirectory sd : storageDirs) {
478      try {
479        writeTransactionIdFile(sd, txid);
480      } catch(IOException e) {
481        // Close any edits stream associated with this dir and remove directory
482        LOG.warn("writeTransactionIdToStorage failed on " + sd,
483            e);
484        reportErrorsOnDirectory(sd);
485      }
486    }
487  }
488
489  /**
490   * Return the name of the image file that is uploaded by periodic
491   * checkpointing
492   *
493   * @return List of filenames to save checkpoints to.
494   */
495  public File[] getFsImageNameCheckpoint(long txid) {
496    ArrayList<File> list = new ArrayList<File>();
497    for (Iterator<StorageDirectory> it =
498                 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
499      list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
500    }
501    return list.toArray(new File[list.size()]);
502  }
503
504  /**
505   * @return The first image file with the given txid and image type.
506   */
507  public File getFsImageName(long txid, NameNodeFile nnf) {
508    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
509        it.hasNext();) {
510      StorageDirectory sd = it.next();
511      File fsImage = getStorageFile(sd, nnf, txid);
512      if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
513        return fsImage;
514      }
515    }
516    return null;
517  }
518
519  /**
520   * @return The first image file whose txid is the same with the given txid and
521   * image type is one of the given types.
522   */
523  public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
524    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
525        it.hasNext();) {
526      StorageDirectory sd = it.next();
527      for (NameNodeFile nnf : nnfs) {
528        File fsImage = getStorageFile(sd, nnf, txid);
529        if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
530          return fsImage;
531        }
532      }
533    }
534    return null;
535  }
536
537  public File getFsImageName(long txid) {
538    return getFsImageName(txid, NameNodeFile.IMAGE);
539  }
540
541  public File getHighestFsImageName() {
542    return getFsImageName(getMostRecentCheckpointTxId());
543  }
544
545  /** Create new dfs name directory.  Caution: this destroys all files
546   * in this filesystem. */
547  private void format(StorageDirectory sd) throws IOException {
548    sd.clearDirectory(); // create currrent dir
549    writeProperties(sd);
550    writeTransactionIdFile(sd, 0);
551
552    LOG.info("Storage directory " + sd.getRoot()
553             + " has been successfully formatted.");
554  }
555
556  /**
557   * Format all available storage directories.
558   */
559  public void format(NamespaceInfo nsInfo) throws IOException {
560    Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
561        nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION,
562        "Bad layout version: %s", nsInfo.getLayoutVersion());
563    
564    this.setStorageInfo(nsInfo);
565    this.blockpoolID = nsInfo.getBlockPoolID();
566    for (Iterator<StorageDirectory> it =
567                           dirIterator(); it.hasNext();) {
568      StorageDirectory sd = it.next();
569      format(sd);
570    }
571  }
572  
573  public static NamespaceInfo newNamespaceInfo()
574      throws UnknownHostException {
575    return new NamespaceInfo(newNamespaceID(), newClusterID(),
576        newBlockPoolID(), 0L);
577  }
578  
579  public void format() throws IOException {
580    this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
581    for (Iterator<StorageDirectory> it =
582                           dirIterator(); it.hasNext();) {
583      StorageDirectory sd = it.next();
584      format(sd);
585    }
586  }
587
588  /**
589   * Generate new namespaceID.
590   *
591   * namespaceID is a persistent attribute of the namespace.
592   * It is generated when the namenode is formatted and remains the same
593   * during the life cycle of the namenode.
594   * When a datanodes register they receive it as the registrationID,
595   * which is checked every time the datanode is communicating with the
596   * namenode. Datanodes that do not 'know' the namespaceID are rejected.
597   *
598   * @return new namespaceID
599   */
600  private static int newNamespaceID() {
601    int newID = 0;
602    while(newID == 0)
603      newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF);  // use 31 bits only
604    return newID;
605  }
606
607  @Override // Storage
608  protected void setFieldsFromProperties(
609      Properties props, StorageDirectory sd) throws IOException {
610    super.setFieldsFromProperties(props, sd);
611    if (layoutVersion == 0) {
612      throw new IOException("NameNode directory "
613                            + sd.getRoot() + " is not formatted.");
614    }
615
616    // Set Block pool ID in version with federation support
617    if (NameNodeLayoutVersion.supports(
618        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
619      String sbpid = props.getProperty("blockpoolID");
620      setBlockPoolID(sd.getRoot(), sbpid);
621    }
622    setDeprecatedPropertiesForUpgrade(props);
623  }
624
625  void readProperties(StorageDirectory sd, StartupOption startupOption)
626      throws IOException {
627    Properties props = readPropertiesFile(sd.getVersionFile());
628    if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches
629        (startupOption)) {
630      int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
631      if (lv > getServiceLayoutVersion()) {
632        // we should not use a newer version for rollingUpgrade rollback
633        throw new IncorrectVersionException(getServiceLayoutVersion(), lv,
634            "storage directory " + sd.getRoot().getAbsolutePath());
635      }
636      props.setProperty("layoutVersion",
637          Integer.toString(HdfsConstants.NAMENODE_LAYOUT_VERSION));
638    }
639    setFieldsFromProperties(props, sd);
640  }
641
642  /**
643   * Pull any properties out of the VERSION file that are from older
644   * versions of HDFS and only necessary during upgrade.
645   */
646  private void setDeprecatedPropertiesForUpgrade(Properties props) {
647    deprecatedProperties = new HashMap<String, String>();
648    String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
649    if (md5 != null) {
650      deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
651    }
652  }
653  
654  /**
655   * Return a property that was stored in an earlier version of HDFS.
656   * 
657   * This should only be used during upgrades.
658   */
659  String getDeprecatedProperty(String prop) {
660    assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION :
661      "getDeprecatedProperty should only be done when loading " +
662      "storage from past versions during upgrade.";
663    return deprecatedProperties.get(prop);
664  }
665
666  /**
667   * Write version file into the storage directory.
668   *
669   * The version file should always be written last.
670   * Missing or corrupted version file indicates that
671   * the checkpoint is not valid.
672   *
673   * @param sd storage directory
674   * @throws IOException
675   */
676  @Override // Storage
677  protected void setPropertiesFromFields(Properties props,
678                           StorageDirectory sd
679                           ) throws IOException {
680    super.setPropertiesFromFields(props, sd);
681    // Set blockpoolID in version with federation support
682    if (NameNodeLayoutVersion.supports(
683        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
684      props.setProperty("blockpoolID", blockpoolID);
685    }
686  }
687  
688  static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
689    return new File(sd.getCurrentDir(),
690                    String.format("%s_%019d", type.getName(), imageTxId));
691  }
692  
693  /**
694   * Get a storage file for one of the files that doesn't need a txid associated
695   * (e.g version, seen_txid)
696   */
697  static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
698    return new File(sd.getCurrentDir(), type.getName());
699  }
700
701  @VisibleForTesting
702  public static String getCheckpointImageFileName(long txid) {
703    return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
704  }
705
706  @VisibleForTesting
707  public static String getImageFileName(long txid) {
708    return getNameNodeFileName(NameNodeFile.IMAGE, txid);
709  }
710
711  @VisibleForTesting
712  public static String getRollbackImageFileName(long txid) {
713    return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
714  }
715
716  public static String getLegacyOIVImageFileName(long txid) {
717    return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
718  }
719
720  private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
721    return String.format("%s_%019d", nnf.getName(), txid);
722  }
723
724  @VisibleForTesting
725  public static String getInProgressEditsFileName(long startTxId) {
726    return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
727  }
728  
729  static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
730    return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
731  }
732  
733  static File getFinalizedEditsFile(StorageDirectory sd,
734      long startTxId, long endTxId) {
735    return new File(sd.getCurrentDir(),
736        getFinalizedEditsFileName(startTxId, endTxId));
737  }
738
739  static File getTemporaryEditsFile(StorageDirectory sd,
740      long startTxId, long endTxId, long timestamp) {
741    return new File(sd.getCurrentDir(),
742        getTemporaryEditsFileName(startTxId, endTxId, timestamp));
743  }
744
745  static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
746    return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
747  }
748
749  @VisibleForTesting
750  public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
751    return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
752                         startTxId, endTxId);
753  }
754
755  public static String getTemporaryEditsFileName(long startTxId, long endTxId,
756      long timestamp) {
757    return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
758                         startTxId, endTxId, timestamp);
759  }
760  
761  /**
762   * Return the first readable finalized edits file for the given txid.
763   */
764  File findFinalizedEditsFile(long startTxId, long endTxId)
765  throws IOException {
766    File ret = findFile(NameNodeDirType.EDITS,
767        getFinalizedEditsFileName(startTxId, endTxId));
768    if (ret == null) {
769      throw new IOException(
770          "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
771    }
772    return ret;
773  }
774    
775  /**
776   * Return the first readable image file for the given txid and image type, or
777   * null if no such image can be found
778   */
779  File findImageFile(NameNodeFile nnf, long txid) {
780    return findFile(NameNodeDirType.IMAGE,
781        getNameNodeFileName(nnf, txid));
782  }
783
784  /**
785   * Return the first readable storage file of the given name
786   * across any of the 'current' directories in SDs of the
787   * given type, or null if no such file exists.
788   */
789  private File findFile(NameNodeDirType dirType, String name) {
790    for (StorageDirectory sd : dirIterable(dirType)) {
791      File candidate = new File(sd.getCurrentDir(), name);
792      if (FileUtil.canRead(sd.getCurrentDir()) &&
793          candidate.exists()) {
794        return candidate;
795      }
796    }
797    return null;
798  }
799
800  /**
801   * Disable the check for pre-upgradable layouts. Needed for BackupImage.
802   * @param val Whether to disable the preupgradeable layout check.
803   */
804  void setDisablePreUpgradableLayoutCheck(boolean val) {
805    disablePreUpgradableLayoutCheck = val;
806  }
807
808  /**
809   * Marks a list of directories as having experienced an error.
810   *
811   * @param sds A list of storage directories to mark as errored.
812   */
813  void reportErrorsOnDirectories(List<StorageDirectory> sds) {
814    for (StorageDirectory sd : sds) {
815      reportErrorsOnDirectory(sd);
816    }
817  }
818
819  /**
820   * Reports that a directory has experienced an error.
821   * Notifies listeners that the directory is no longer
822   * available.
823   *
824   * @param sd A storage directory to mark as errored.
825   */
826  private void reportErrorsOnDirectory(StorageDirectory sd) {
827    LOG.error("Error reported on storage directory " + sd);
828
829    String lsd = listStorageDirectories();
830    LOG.debug("current list of storage dirs:" + lsd);
831
832    LOG.warn("About to remove corresponding storage: "
833             + sd.getRoot().getAbsolutePath());
834    try {
835      sd.unlock();
836    } catch (Exception e) {
837      LOG.warn("Unable to unlock bad storage directory: "
838               +  sd.getRoot().getPath(), e);
839    }
840
841    if (this.storageDirs.remove(sd)) {
842      this.removedStorageDirs.add(sd);
843    }
844    
845    lsd = listStorageDirectories();
846    LOG.debug("at the end current list of storage dirs:" + lsd);
847  }
848  
849  /** 
850   * Processes the startup options for the clusterid and blockpoolid 
851   * for the upgrade. 
852   * @param startOpt Startup options 
853   * @param layoutVersion Layout version for the upgrade 
854   * @throws IOException
855   */
856  void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
857      throws IOException {
858    if (startOpt == StartupOption.UPGRADE || startOpt == StartupOption.UPGRADEONLY) {
859      // If upgrade from a release that does not support federation,
860      // if clusterId is provided in the startupOptions use it.
861      // Else generate a new cluster ID      
862      if (!NameNodeLayoutVersion.supports(
863          LayoutVersion.Feature.FEDERATION, layoutVersion)) {
864        if (startOpt.getClusterId() == null) {
865          startOpt.setClusterId(newClusterID());
866        }
867        setClusterID(startOpt.getClusterId());
868        setBlockPoolID(newBlockPoolID());
869      } else {
870        // Upgrade from one version of federation to another supported
871        // version of federation doesn't require clusterID.
872        // Warn the user if the current clusterid didn't match with the input
873        // clusterid.
874        if (startOpt.getClusterId() != null
875            && !startOpt.getClusterId().equals(getClusterID())) {
876          LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
877              + ", Ignoring given clusterid: " + startOpt.getClusterId());
878        }
879      }
880      LOG.info("Using clusterid: " + getClusterID());
881    }
882  }
883  
884  /**
885   * Report that an IOE has occurred on some file which may
886   * or may not be within one of the NN image storage directories.
887   */
888  @Override
889  public void reportErrorOnFile(File f) {
890    // We use getAbsolutePath here instead of getCanonicalPath since we know
891    // that there is some IO problem on that drive.
892    // getCanonicalPath may need to call stat() or readlink() and it's likely
893    // those calls would fail due to the same underlying IO problem.
894    String absPath = f.getAbsolutePath();
895    for (StorageDirectory sd : storageDirs) {
896      String dirPath = sd.getRoot().getAbsolutePath();
897      if (!dirPath.endsWith(File.separator)) {
898        dirPath += File.separator;
899      }
900      if (absPath.startsWith(dirPath)) {
901        reportErrorsOnDirectory(sd);
902        return;
903      }
904    }
905    
906  }
907  
908  /**
909   * Generate new clusterID.
910   * 
911   * clusterID is a persistent attribute of the cluster.
912   * It is generated when the cluster is created and remains the same
913   * during the life cycle of the cluster.  When a new name node is formated, if 
914   * this is a new cluster, a new clusterID is geneated and stored.  Subsequent 
915   * name node must be given the same ClusterID during its format to be in the 
916   * same cluster.
917   * When a datanode register it receive the clusterID and stick with it.
918   * If at any point, name node or data node tries to join another cluster, it 
919   * will be rejected.
920   * 
921   * @return new clusterID
922   */ 
923  public static String newClusterID() {
924    return "CID-" + UUID.randomUUID().toString();
925  }
926
927  void setClusterID(String cid) {
928    clusterID = cid;
929  }
930
931  /**
932   * try to find current cluster id in the VERSION files
933   * returns first cluster id found in any VERSION file
934   * null in case none found
935   * @return clusterId or null in case no cluster id found
936   */
937  public String determineClusterId() {
938    String cid = null;
939    Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
940    while(sdit.hasNext()) {
941      StorageDirectory sd = sdit.next();
942      try {
943        Properties props = readPropertiesFile(sd.getVersionFile());
944        cid = props.getProperty("clusterID");
945        LOG.info("current cluster id for sd="+sd.getCurrentDir() + 
946            ";lv=" + layoutVersion + ";cid=" + cid);
947        
948        if(cid != null && !cid.equals(""))
949          return cid;
950      } catch (Exception e) {
951        LOG.warn("this sd not available: " + e.getLocalizedMessage());
952      } //ignore
953    }
954    LOG.warn("couldn't find any VERSION file containing valid ClusterId");
955    return null;
956  }
957
958  /**
959   * Generate new blockpoolID.
960   * 
961   * @return new blockpoolID
962   */ 
963  static String newBlockPoolID() throws UnknownHostException{
964    String ip = "unknownIP";
965    try {
966      ip = DNS.getDefaultIP("default");
967    } catch (UnknownHostException e) {
968      LOG.warn("Could not find ip address of \"default\" inteface.");
969      throw e;
970    }
971    
972    int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
973    String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
974    return bpid;
975  }
976
977  /** Validate and set block pool ID */
978  public void setBlockPoolID(String bpid) {
979    blockpoolID = bpid;
980  }
981
982  /** Validate and set block pool ID */
983  private void setBlockPoolID(File storage, String bpid)
984      throws InconsistentFSStateException {
985    if (bpid == null || bpid.equals("")) {
986      throw new InconsistentFSStateException(storage, "file "
987          + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
988    }
989    
990    if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
991      throw new InconsistentFSStateException(storage,
992          "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
993    }
994    setBlockPoolID(bpid);
995  }
996  
997  public String getBlockPoolID() {
998    return blockpoolID;
999  }
1000
1001  /**
1002   * Iterate over all current storage directories, inspecting them
1003   * with the given inspector.
1004   */
1005  void inspectStorageDirs(FSImageStorageInspector inspector)
1006      throws IOException {
1007
1008    // Process each of the storage directories to find the pair of
1009    // newest image file and edit file
1010    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
1011      StorageDirectory sd = it.next();
1012      inspector.inspectDirectory(sd);
1013    }
1014  }
1015
1016  /**
1017   * Iterate over all of the storage dirs, reading their contents to determine
1018   * their layout versions. Returns an FSImageStorageInspector which has
1019   * inspected each directory.
1020   * 
1021   * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1022   * @throws IOException if no valid storage dirs are found or no valid layout version
1023   */
1024  FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes,
1025      StartupOption startupOption) throws IOException {
1026    Integer layoutVersion = null;
1027    boolean multipleLV = false;
1028    StringBuilder layoutVersions = new StringBuilder();
1029
1030    // First determine what range of layout versions we're going to inspect
1031    for (Iterator<StorageDirectory> it = dirIterator(false);
1032         it.hasNext();) {
1033      StorageDirectory sd = it.next();
1034      if (!sd.getVersionFile().exists()) {
1035        FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1036        continue;
1037      }
1038      readProperties(sd, startupOption); // sets layoutVersion
1039      int lv = getLayoutVersion();
1040      if (layoutVersion == null) {
1041        layoutVersion = Integer.valueOf(lv);
1042      } else if (!layoutVersion.equals(lv)) {
1043        multipleLV = true;
1044      }
1045      layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1046    }
1047    
1048    if (layoutVersion == null) {
1049      throw new IOException("No storage directories contained VERSION information");
1050    }
1051    if (multipleLV) {            
1052      throw new IOException(
1053          "Storage directories contain multiple layout versions: "
1054              + layoutVersions);
1055    }
1056    // If the storage directories are with the new layout version
1057    // (ie edits_<txnid>) then use the new inspector, which will ignore
1058    // the old format dirs.
1059    FSImageStorageInspector inspector;
1060    if (NameNodeLayoutVersion.supports(
1061        LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1062      inspector = new FSImageTransactionalStorageInspector(fileTypes);
1063    } else {
1064      inspector = new FSImagePreTransactionalStorageInspector();
1065    }
1066    
1067    inspectStorageDirs(inspector);
1068    return inspector;
1069  }
1070
1071  public NamespaceInfo getNamespaceInfo() {
1072    return new NamespaceInfo(
1073        getNamespaceID(),
1074        getClusterID(),
1075        getBlockPoolID(),
1076        getCTime());
1077  }
1078}