001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode.fsdataset; 019 020 021import java.io.EOFException; 022import java.io.File; 023import java.io.FileDescriptor; 024import java.io.FileNotFoundException; 025import java.io.IOException; 026import java.io.InputStream; 027import java.util.Collection; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.fs.StorageType; 035import org.apache.hadoop.hdfs.DFSConfigKeys; 036import org.apache.hadoop.hdfs.protocol.Block; 037import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; 038import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo; 039import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 040import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata; 041import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; 042import org.apache.hadoop.hdfs.server.datanode.DataNode; 043import org.apache.hadoop.hdfs.server.datanode.DataStorage; 044import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; 045import org.apache.hadoop.hdfs.server.datanode.Replica; 046import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface; 047import org.apache.hadoop.hdfs.server.datanode.ReplicaHandler; 048import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; 049import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException; 050import org.apache.hadoop.hdfs.server.datanode.StorageLocation; 051import org.apache.hadoop.hdfs.server.datanode.UnexpectedReplicaStateException; 052import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory; 053import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; 054import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean; 055import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock; 056import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; 057import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 058import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo; 059import org.apache.hadoop.hdfs.server.protocol.StorageReport; 060import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; 061import org.apache.hadoop.util.DiskChecker.DiskErrorException; 062import org.apache.hadoop.util.ReflectionUtils; 063 064/** 065 * This is a service provider interface for the underlying storage that 066 * stores replicas for a data node. 067 * The default implementation stores replicas on local drives. 068 */ 069@InterfaceAudience.Private 070public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean { 071 /** 072 * A factory for creating {@link FsDatasetSpi} objects. 073 */ 074 public static abstract class Factory<D extends FsDatasetSpi<?>> { 075 /** @return the configured factory. */ 076 public static Factory<?> getFactory(Configuration conf) { 077 @SuppressWarnings("rawtypes") 078 final Class<? extends Factory> clazz = conf.getClass( 079 DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY, 080 FsDatasetFactory.class, 081 Factory.class); 082 return ReflectionUtils.newInstance(clazz, conf); 083 } 084 085 /** Create a new object. */ 086 public abstract D newInstance(DataNode datanode, DataStorage storage, 087 Configuration conf) throws IOException; 088 089 /** Does the factory create simulated objects? */ 090 public boolean isSimulated() { 091 return false; 092 } 093 } 094 095 /** @return a list of volumes. */ 096 public List<V> getVolumes(); 097 098 /** 099 * Add a new volume to the FsDataset.<p/> 100 * 101 * If the FSDataset supports block scanning, this function registers 102 * the new volume with the block scanner. 103 * 104 * @param location The storage location for the new volume. 105 * @param nsInfos Namespace information for the new volume. 106 */ 107 public void addVolume( 108 final StorageLocation location, 109 final List<NamespaceInfo> nsInfos) throws IOException; 110 111 /** 112 * Removes a collection of volumes from FsDataset. 113 * 114 * If the FSDataset supports block scanning, this function removes 115 * the volumes from the block scanner. 116 * 117 * @param volumes The paths of the volumes to be removed. 118 * @param clearFailure set true to clear the failure information about the 119 * volumes. 120 */ 121 public void removeVolumes(Set<File> volumes, boolean clearFailure); 122 123 /** @return a storage with the given storage ID */ 124 public DatanodeStorage getStorage(final String storageUuid); 125 126 /** @return one or more storage reports for attached volumes. */ 127 public StorageReport[] getStorageReports(String bpid) 128 throws IOException; 129 130 /** @return the volume that contains a replica of the block. */ 131 public V getVolume(ExtendedBlock b); 132 133 /** @return a volume information map (name => info). */ 134 public Map<String, Object> getVolumeInfoMap(); 135 136 /** 137 * Returns info about volume failures. 138 * 139 * @return info about volume failures, possibly null 140 */ 141 VolumeFailureSummary getVolumeFailureSummary(); 142 143 /** @return a list of finalized blocks for the given block pool. */ 144 public List<FinalizedReplica> getFinalizedBlocks(String bpid); 145 146 /** @return a list of finalized blocks for the given block pool. */ 147 public List<FinalizedReplica> getFinalizedBlocksOnPersistentStorage(String bpid); 148 149 /** 150 * Check whether the in-memory block record matches the block on the disk, 151 * and, in case that they are not matched, update the record or mark it 152 * as corrupted. 153 */ 154 public void checkAndUpdate(String bpid, long blockId, File diskFile, 155 File diskMetaFile, FsVolumeSpi vol) throws IOException; 156 157 /** 158 * @param b - the block 159 * @return a stream if the meta-data of the block exists; 160 * otherwise, return null. 161 * @throws IOException 162 */ 163 public LengthInputStream getMetaDataInputStream(ExtendedBlock b 164 ) throws IOException; 165 166 /** 167 * Returns the specified block's on-disk length (excluding metadata) 168 * @return the specified block's on-disk length (excluding metadta) 169 * @throws IOException on error 170 */ 171 public long getLength(ExtendedBlock b) throws IOException; 172 173 /** 174 * Get reference to the replica meta info in the replicasMap. 175 * To be called from methods that are synchronized on {@link FSDataset} 176 * @return replica from the replicas map 177 */ 178 @Deprecated 179 public Replica getReplica(String bpid, long blockId); 180 181 /** 182 * @return replica meta information 183 */ 184 public String getReplicaString(String bpid, long blockId); 185 186 /** 187 * @return the generation stamp stored with the block. 188 */ 189 public Block getStoredBlock(String bpid, long blkid) throws IOException; 190 191 /** 192 * Returns an input stream at specified offset of the specified block 193 * @param b block 194 * @param seekOffset offset with in the block to seek to 195 * @return an input stream to read the contents of the specified block, 196 * starting at the offset 197 * @throws IOException 198 */ 199 public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset) 200 throws IOException; 201 202 /** 203 * Returns an input stream at specified offset of the specified block 204 * The block is still in the tmp directory and is not finalized 205 * @return an input stream to read the contents of the specified block, 206 * starting at the offset 207 * @throws IOException 208 */ 209 public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff, 210 long ckoff) throws IOException; 211 212 /** 213 * Creates a temporary replica and returns the meta information of the replica 214 * 215 * @param b block 216 * @return the meta info of the replica which is being written to 217 * @throws IOException if an error occurs 218 */ 219 public ReplicaHandler createTemporary(StorageType storageType, 220 ExtendedBlock b) throws IOException; 221 222 /** 223 * Creates a RBW replica and returns the meta info of the replica 224 * 225 * @param b block 226 * @return the meta info of the replica which is being written to 227 * @throws IOException if an error occurs 228 */ 229 public ReplicaHandler createRbw(StorageType storageType, 230 ExtendedBlock b, boolean allowLazyPersist) throws IOException; 231 232 /** 233 * Recovers a RBW replica and returns the meta info of the replica 234 * 235 * @param b block 236 * @param newGS the new generation stamp for the replica 237 * @param minBytesRcvd the minimum number of bytes that the replica could have 238 * @param maxBytesRcvd the maximum number of bytes that the replica could have 239 * @return the meta info of the replica which is being written to 240 * @throws IOException if an error occurs 241 */ 242 public ReplicaHandler recoverRbw(ExtendedBlock b, 243 long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException; 244 245 /** 246 * Covert a temporary replica to a RBW. 247 * @param temporary the temporary replica being converted 248 * @return the result RBW 249 */ 250 public ReplicaInPipelineInterface convertTemporaryToRbw( 251 ExtendedBlock temporary) throws IOException; 252 253 /** 254 * Append to a finalized replica and returns the meta info of the replica 255 * 256 * @param b block 257 * @param newGS the new generation stamp for the replica 258 * @param expectedBlockLen the number of bytes the replica is expected to have 259 * @return the meata info of the replica which is being written to 260 * @throws IOException 261 */ 262 public ReplicaHandler append(ExtendedBlock b, long newGS, 263 long expectedBlockLen) throws IOException; 264 265 /** 266 * Recover a failed append to a finalized replica 267 * and returns the meta info of the replica 268 * 269 * @param b block 270 * @param newGS the new generation stamp for the replica 271 * @param expectedBlockLen the number of bytes the replica is expected to have 272 * @return the meta info of the replica which is being written to 273 * @throws IOException 274 */ 275 public ReplicaHandler recoverAppend( 276 ExtendedBlock b, long newGS, long expectedBlockLen) throws IOException; 277 278 /** 279 * Recover a failed pipeline close 280 * It bumps the replica's generation stamp and finalize it if RBW replica 281 * 282 * @param b block 283 * @param newGS the new generation stamp for the replica 284 * @param expectedBlockLen the number of bytes the replica is expected to have 285 * @return the storage uuid of the replica. 286 * @throws IOException 287 */ 288 public String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen 289 ) throws IOException; 290 291 /** 292 * Finalizes the block previously opened for writing using writeToBlock. 293 * The block size is what is in the parameter b and it must match the amount 294 * of data written 295 * @throws IOException 296 * @throws ReplicaNotFoundException if the replica can not be found when the 297 * block is been finalized. For instance, the block resides on an HDFS volume 298 * that has been removed. 299 */ 300 public void finalizeBlock(ExtendedBlock b) throws IOException; 301 302 /** 303 * Unfinalizes the block previously opened for writing using writeToBlock. 304 * The temporary file associated with this block is deleted. 305 * @throws IOException 306 */ 307 public void unfinalizeBlock(ExtendedBlock b) throws IOException; 308 309 /** 310 * Returns one block report per volume. 311 * @param bpid Block Pool Id 312 * @return - a map of DatanodeStorage to block report for the volume. 313 */ 314 public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid); 315 316 /** 317 * Returns the cache report - the full list of cached block IDs of a 318 * block pool. 319 * @param bpid Block Pool Id 320 * @return the cache report - the full list of cached block IDs. 321 */ 322 public List<Long> getCacheReport(String bpid); 323 324 /** Does the dataset contain the block? */ 325 public boolean contains(ExtendedBlock block); 326 327 /** 328 * Check if a block is valid. 329 * 330 * @param b The block to check. 331 * @param minLength The minimum length that the block must have. May be 0. 332 * @param state If this is null, it is ignored. If it is non-null, we 333 * will check that the replica has this state. 334 * 335 * @throws ReplicaNotFoundException If the replica is not found 336 * 337 * @throws UnexpectedReplicaStateException If the replica is not in the 338 * expected state. 339 * @throws FileNotFoundException If the block file is not found or there 340 * was an error locating it. 341 * @throws EOFException If the replica length is too short. 342 * 343 * @throws IOException May be thrown from the methods called. 344 */ 345 public void checkBlock(ExtendedBlock b, long minLength, ReplicaState state) 346 throws ReplicaNotFoundException, UnexpectedReplicaStateException, 347 FileNotFoundException, EOFException, IOException; 348 349 350 /** 351 * Is the block valid? 352 * @return - true if the specified block is valid 353 */ 354 public boolean isValidBlock(ExtendedBlock b); 355 356 /** 357 * Is the block a valid RBW? 358 * @return - true if the specified block is a valid RBW 359 */ 360 public boolean isValidRbw(ExtendedBlock b); 361 362 /** 363 * Invalidates the specified blocks 364 * @param bpid Block pool Id 365 * @param invalidBlks - the blocks to be invalidated 366 * @throws IOException 367 */ 368 public void invalidate(String bpid, Block invalidBlks[]) throws IOException; 369 370 /** 371 * Caches the specified blocks 372 * @param bpid Block pool id 373 * @param blockIds - block ids to cache 374 */ 375 public void cache(String bpid, long[] blockIds); 376 377 /** 378 * Uncaches the specified blocks 379 * @param bpid Block pool id 380 * @param blockIds - blocks ids to uncache 381 */ 382 public void uncache(String bpid, long[] blockIds); 383 384 /** 385 * Determine if the specified block is cached. 386 * @param bpid Block pool id 387 * @param blockIds - block id 388 * @return true if the block is cached 389 */ 390 public boolean isCached(String bpid, long blockId); 391 392 /** 393 * Check if all the data directories are healthy 394 * @return A set of unhealthy data directories. 395 */ 396 public Set<File> checkDataDir(); 397 398 /** 399 * Shutdown the FSDataset 400 */ 401 public void shutdown(); 402 403 /** 404 * Sets the file pointer of the checksum stream so that the last checksum 405 * will be overwritten 406 * @param b block 407 * @param outs The streams for the data file and checksum file 408 * @param checksumSize number of bytes each checksum has 409 * @throws IOException 410 */ 411 public void adjustCrcChannelPosition(ExtendedBlock b, 412 ReplicaOutputStreams outs, int checksumSize) throws IOException; 413 414 /** 415 * Checks how many valid storage volumes there are in the DataNode. 416 * @return true if more than the minimum number of valid volumes are left 417 * in the FSDataSet. 418 */ 419 public boolean hasEnoughResource(); 420 421 /** 422 * Get visible length of the specified replica. 423 */ 424 long getReplicaVisibleLength(final ExtendedBlock block) throws IOException; 425 426 /** 427 * Initialize a replica recovery. 428 * @return actual state of the replica on this data-node or 429 * null if data-node does not have the replica. 430 */ 431 public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock 432 ) throws IOException; 433 434 /** 435 * Update replica's generation stamp and length and finalize it. 436 * @return the ID of storage that stores the block 437 */ 438 public String updateReplicaUnderRecovery(ExtendedBlock oldBlock, 439 long recoveryId, long newBlockId, long newLength) throws IOException; 440 441 /** 442 * add new block pool ID 443 * @param bpid Block pool Id 444 * @param conf Configuration 445 */ 446 public void addBlockPool(String bpid, Configuration conf) throws IOException; 447 448 /** 449 * Shutdown and remove the block pool from underlying storage. 450 * @param bpid Block pool Id to be removed 451 */ 452 public void shutdownBlockPool(String bpid) ; 453 454 /** 455 * Deletes the block pool directories. If force is false, directories are 456 * deleted only if no block files exist for the block pool. If force 457 * is true entire directory for the blockpool is deleted along with its 458 * contents. 459 * @param bpid BlockPool Id to be deleted. 460 * @param force If force is false, directories are deleted only if no 461 * block files exist for the block pool, otherwise entire 462 * directory for the blockpool is deleted along with its contents. 463 * @throws IOException 464 */ 465 public void deleteBlockPool(String bpid, boolean force) throws IOException; 466 467 /** 468 * Get {@link BlockLocalPathInfo} for the given block. 469 */ 470 public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b 471 ) throws IOException; 472 473 /** 474 * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in 475 * <code>blocks</code>. 476 * 477 * @param bpid pool to query 478 * @param blockIds List of block ids for which to return metadata 479 * @return metadata Metadata for the list of blocks 480 * @throws IOException 481 */ 482 public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid, 483 long[] blockIds) throws IOException; 484 485 /** 486 * Enable 'trash' for the given dataset. When trash is enabled, files are 487 * moved to a separate trash directory instead of being deleted immediately. 488 * This can be useful for example during rolling upgrades. 489 */ 490 public void enableTrash(String bpid); 491 492 /** 493 * Restore trash 494 */ 495 public void restoreTrash(String bpid); 496 497 /** 498 * @return true when trash is enabled 499 */ 500 public boolean trashEnabled(String bpid); 501 502 /** 503 * Create a marker file indicating that a rolling upgrade is in progress. 504 */ 505 public void setRollingUpgradeMarker(String bpid) throws IOException; 506 507 /** 508 * Delete the rolling upgrade marker file if it exists. 509 * @param bpid 510 */ 511 public void clearRollingUpgradeMarker(String bpid) throws IOException; 512 513 /** 514 * submit a sync_file_range request to AsyncDiskService 515 */ 516 public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block, 517 final FileDescriptor fd, final long offset, final long nbytes, 518 final int flags); 519 520 /** 521 * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task end 522 */ 523 public void onCompleteLazyPersist(String bpId, long blockId, 524 long creationTime, File[] savedFiles, V targetVolume); 525 526 /** 527 * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task fail 528 */ 529 public void onFailLazyPersist(String bpId, long blockId); 530 531 /** 532 * Move block from one storage to another storage 533 */ 534 public ReplicaInfo moveBlockAcrossStorage(final ExtendedBlock block, 535 StorageType targetStorageType) throws IOException; 536 537 /** 538 * Set a block to be pinned on this datanode so that it cannot be moved 539 * by Balancer/Mover. 540 * 541 * It is a no-op when dfs.datanode.block-pinning.enabled is set to false. 542 */ 543 public void setPinning(ExtendedBlock block) throws IOException; 544 545 /** 546 * Check whether the block was pinned 547 */ 548 public boolean getPinning(ExtendedBlock block) throws IOException; 549 550 /** 551 * Confirm whether the block is deleting 552 */ 553 public boolean isDeletingBlock(String bpid, long blockId); 554}