001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.blockmanagement; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Iterator; 023import java.util.List; 024 025import org.apache.hadoop.hdfs.protocol.Block; 026import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 027import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; 028import org.apache.hadoop.hdfs.server.namenode.NameNode; 029 030/** 031 * Represents a block that is currently being constructed.<br> 032 * This is usually the last block of a file opened for write or append. 033 */ 034public class BlockInfoContiguousUnderConstruction extends BlockInfoContiguous { 035 /** Block state. See {@link BlockUCState} */ 036 private BlockUCState blockUCState; 037 038 /** 039 * Block replicas as assigned when the block was allocated. 040 * This defines the pipeline order. 041 */ 042 private List<ReplicaUnderConstruction> replicas; 043 044 /** 045 * Index of the primary data node doing the recovery. Useful for log 046 * messages. 047 */ 048 private int primaryNodeIndex = -1; 049 050 /** 051 * The new generation stamp, which this block will have 052 * after the recovery succeeds. Also used as a recovery id to identify 053 * the right recovery if any of the abandoned recoveries re-appear. 054 */ 055 private long blockRecoveryId = 0; 056 057 /** 058 * The block source to use in the event of copy-on-write truncate. 059 */ 060 private Block truncateBlock; 061 062 /** 063 * ReplicaUnderConstruction contains information about replicas while 064 * they are under construction. 065 * The GS, the length and the state of the replica is as reported by 066 * the data-node. 067 * It is not guaranteed, but expected, that data-nodes actually have 068 * corresponding replicas. 069 */ 070 static class ReplicaUnderConstruction extends Block { 071 private final DatanodeStorageInfo expectedLocation; 072 private ReplicaState state; 073 private boolean chosenAsPrimary; 074 075 ReplicaUnderConstruction(Block block, 076 DatanodeStorageInfo target, 077 ReplicaState state) { 078 super(block); 079 this.expectedLocation = target; 080 this.state = state; 081 this.chosenAsPrimary = false; 082 } 083 084 /** 085 * Expected block replica location as assigned when the block was allocated. 086 * This defines the pipeline order. 087 * It is not guaranteed, but expected, that the data-node actually has 088 * the replica. 089 */ 090 private DatanodeStorageInfo getExpectedStorageLocation() { 091 return expectedLocation; 092 } 093 094 /** 095 * Get replica state as reported by the data-node. 096 */ 097 ReplicaState getState() { 098 return state; 099 } 100 101 /** 102 * Whether the replica was chosen for recovery. 103 */ 104 boolean getChosenAsPrimary() { 105 return chosenAsPrimary; 106 } 107 108 /** 109 * Set replica state. 110 */ 111 void setState(ReplicaState s) { 112 state = s; 113 } 114 115 /** 116 * Set whether this replica was chosen for recovery. 117 */ 118 void setChosenAsPrimary(boolean chosenAsPrimary) { 119 this.chosenAsPrimary = chosenAsPrimary; 120 } 121 122 /** 123 * Is data-node the replica belongs to alive. 124 */ 125 boolean isAlive() { 126 return expectedLocation.getDatanodeDescriptor().isAlive; 127 } 128 129 @Override // Block 130 public int hashCode() { 131 return super.hashCode(); 132 } 133 134 @Override // Block 135 public boolean equals(Object obj) { 136 // Sufficient to rely on super's implementation 137 return (this == obj) || super.equals(obj); 138 } 139 140 @Override 141 public String toString() { 142 final StringBuilder b = new StringBuilder(50); 143 appendStringTo(b); 144 return b.toString(); 145 } 146 147 @Override 148 public void appendStringTo(StringBuilder sb) { 149 sb.append("ReplicaUC[") 150 .append(expectedLocation) 151 .append("|") 152 .append(state) 153 .append("]"); 154 } 155 } 156 157 /** 158 * Create block and set its state to 159 * {@link BlockUCState#UNDER_CONSTRUCTION}. 160 */ 161 public BlockInfoContiguousUnderConstruction(Block blk, short replication) { 162 this(blk, replication, BlockUCState.UNDER_CONSTRUCTION, null); 163 } 164 165 /** 166 * Create a block that is currently being constructed. 167 */ 168 public BlockInfoContiguousUnderConstruction(Block blk, short replication, BlockUCState state, DatanodeStorageInfo[] targets) { 169 super(blk, replication); 170 assert getBlockUCState() != BlockUCState.COMPLETE : 171 "BlockInfoUnderConstruction cannot be in COMPLETE state"; 172 this.blockUCState = state; 173 setExpectedLocations(targets); 174 } 175 176 /** 177 * Convert an under construction block to a complete block. 178 * 179 * @return BlockInfo - a complete block. 180 * @throws IOException if the state of the block 181 * (the generation stamp and the length) has not been committed by 182 * the client or it does not have at least a minimal number of replicas 183 * reported from data-nodes. 184 */ 185 BlockInfoContiguous convertToCompleteBlock() throws IOException { 186 assert getBlockUCState() != BlockUCState.COMPLETE : 187 "Trying to convert a COMPLETE block"; 188 return new BlockInfoContiguous(this); 189 } 190 191 /** Set expected locations */ 192 public void setExpectedLocations(DatanodeStorageInfo[] targets) { 193 int numLocations = targets == null ? 0 : targets.length; 194 this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations); 195 for(int i = 0; i < numLocations; i++) 196 replicas.add( 197 new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW)); 198 } 199 200 /** 201 * Create array of expected replica locations 202 * (as has been assigned by chooseTargets()). 203 */ 204 public DatanodeStorageInfo[] getExpectedStorageLocations() { 205 int numLocations = replicas == null ? 0 : replicas.size(); 206 DatanodeStorageInfo[] storages = new DatanodeStorageInfo[numLocations]; 207 for(int i = 0; i < numLocations; i++) 208 storages[i] = replicas.get(i).getExpectedStorageLocation(); 209 return storages; 210 } 211 212 /** Get the number of expected locations */ 213 public int getNumExpectedLocations() { 214 return replicas == null ? 0 : replicas.size(); 215 } 216 217 /** 218 * Return the state of the block under construction. 219 * @see BlockUCState 220 */ 221 @Override // BlockInfo 222 public BlockUCState getBlockUCState() { 223 return blockUCState; 224 } 225 226 void setBlockUCState(BlockUCState s) { 227 blockUCState = s; 228 } 229 230 /** Get block recovery ID */ 231 public long getBlockRecoveryId() { 232 return blockRecoveryId; 233 } 234 235 /** Get recover block */ 236 public Block getTruncateBlock() { 237 return truncateBlock; 238 } 239 240 public void setTruncateBlock(Block recoveryBlock) { 241 this.truncateBlock = recoveryBlock; 242 } 243 244 /** 245 * Process the recorded replicas. When about to commit or finish the 246 * pipeline recovery sort out bad replicas. 247 * @param genStamp The final generation stamp for the block. 248 */ 249 public void setGenerationStampAndVerifyReplicas(long genStamp) { 250 // Set the generation stamp for the block. 251 setGenerationStamp(genStamp); 252 if (replicas == null) 253 return; 254 255 // Remove the replicas with wrong gen stamp. 256 // The replica list is unchanged. 257 for (ReplicaUnderConstruction r : replicas) { 258 if (genStamp != r.getGenerationStamp()) { 259 r.getExpectedStorageLocation().removeBlock(this); 260 NameNode.blockStateChangeLog.info("BLOCK* Removing stale replica " 261 + "from location: {}", r.getExpectedStorageLocation()); 262 } 263 } 264 } 265 266 /** 267 * Commit block's length and generation stamp as reported by the client. 268 * Set block state to {@link BlockUCState#COMMITTED}. 269 * @param block - contains client reported block length and generation 270 * @throws IOException if block ids are inconsistent. 271 */ 272 void commitBlock(Block block) throws IOException { 273 if(getBlockId() != block.getBlockId()) 274 throw new IOException("Trying to commit inconsistent block: id = " 275 + block.getBlockId() + ", expected id = " + getBlockId()); 276 blockUCState = BlockUCState.COMMITTED; 277 this.set(getBlockId(), block.getNumBytes(), block.getGenerationStamp()); 278 // Sort out invalid replicas. 279 setGenerationStampAndVerifyReplicas(block.getGenerationStamp()); 280 } 281 282 /** 283 * Initialize lease recovery for this block. 284 * Find the first alive data-node starting from the previous primary and 285 * make it primary. 286 */ 287 public void initializeBlockRecovery(long recoveryId) { 288 setBlockUCState(BlockUCState.UNDER_RECOVERY); 289 blockRecoveryId = recoveryId; 290 if (replicas.size() == 0) { 291 NameNode.blockStateChangeLog.warn("BLOCK*" 292 + " BlockInfoUnderConstruction.initLeaseRecovery:" 293 + " No blocks found, lease removed."); 294 } 295 boolean allLiveReplicasTriedAsPrimary = true; 296 for (int i = 0; i < replicas.size(); i++) { 297 // Check if all replicas have been tried or not. 298 if (replicas.get(i).isAlive()) { 299 allLiveReplicasTriedAsPrimary = 300 (allLiveReplicasTriedAsPrimary && replicas.get(i).getChosenAsPrimary()); 301 } 302 } 303 if (allLiveReplicasTriedAsPrimary) { 304 // Just set all the replicas to be chosen whether they are alive or not. 305 for (int i = 0; i < replicas.size(); i++) { 306 replicas.get(i).setChosenAsPrimary(false); 307 } 308 } 309 long mostRecentLastUpdate = 0; 310 ReplicaUnderConstruction primary = null; 311 primaryNodeIndex = -1; 312 for(int i = 0; i < replicas.size(); i++) { 313 // Skip alive replicas which have been chosen for recovery. 314 if (!(replicas.get(i).isAlive() && !replicas.get(i).getChosenAsPrimary())) { 315 continue; 316 } 317 final ReplicaUnderConstruction ruc = replicas.get(i); 318 final long lastUpdate = ruc.getExpectedStorageLocation() 319 .getDatanodeDescriptor().getLastUpdateMonotonic(); 320 if (lastUpdate > mostRecentLastUpdate) { 321 primaryNodeIndex = i; 322 primary = ruc; 323 mostRecentLastUpdate = lastUpdate; 324 } 325 } 326 if (primary != null) { 327 primary.getExpectedStorageLocation().getDatanodeDescriptor().addBlockToBeRecovered(this); 328 primary.setChosenAsPrimary(true); 329 NameNode.blockStateChangeLog.info( 330 "BLOCK* {} recovery started, primary={}", this, primary); 331 } 332 } 333 334 void addReplicaIfNotPresent(DatanodeStorageInfo storage, 335 Block block, 336 ReplicaState rState) { 337 Iterator<ReplicaUnderConstruction> it = replicas.iterator(); 338 while (it.hasNext()) { 339 ReplicaUnderConstruction r = it.next(); 340 DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation(); 341 if(expectedLocation == storage) { 342 // Record the gen stamp from the report 343 r.setGenerationStamp(block.getGenerationStamp()); 344 return; 345 } else if (expectedLocation != null && 346 expectedLocation.getDatanodeDescriptor() == 347 storage.getDatanodeDescriptor()) { 348 349 // The Datanode reported that the block is on a different storage 350 // than the one chosen by BlockPlacementPolicy. This can occur as 351 // we allow Datanodes to choose the target storage. Update our 352 // state by removing the stale entry and adding a new one. 353 it.remove(); 354 break; 355 } 356 } 357 replicas.add(new ReplicaUnderConstruction(block, storage, rState)); 358 } 359 360 @Override // BlockInfo 361 // BlockInfoUnderConstruction participates in maps the same way as BlockInfo 362 public int hashCode() { 363 return super.hashCode(); 364 } 365 366 @Override // BlockInfo 367 public boolean equals(Object obj) { 368 // Sufficient to rely on super's implementation 369 return (this == obj) || super.equals(obj); 370 } 371 372 @Override 373 public String toString() { 374 final StringBuilder b = new StringBuilder(100); 375 appendStringTo(b); 376 return b.toString(); 377 } 378 379 @Override 380 public void appendStringTo(StringBuilder sb) { 381 super.appendStringTo(sb); 382 appendUCParts(sb); 383 } 384 385 private void appendUCParts(StringBuilder sb) { 386 sb.append("{UCState=").append(blockUCState) 387 .append(", truncateBlock=" + truncateBlock) 388 .append(", primaryNodeIndex=").append(primaryNodeIndex) 389 .append(", replicas=["); 390 if (replicas != null) { 391 Iterator<ReplicaUnderConstruction> iter = replicas.iterator(); 392 if (iter.hasNext()) { 393 iter.next().appendStringTo(sb); 394 while (iter.hasNext()) { 395 sb.append(", "); 396 iter.next().appendStringTo(sb); 397 } 398 } 399 } 400 sb.append("]}"); 401 } 402}