001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.blockmanagement;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Iterator;
023import java.util.List;
024
025import org.apache.hadoop.hdfs.protocol.Block;
026import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
027import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
028import org.apache.hadoop.hdfs.server.namenode.NameNode;
029
030/**
031 * Represents a block that is currently being constructed.<br>
032 * This is usually the last block of a file opened for write or append.
033 */
034public class BlockInfoContiguousUnderConstruction extends BlockInfoContiguous {
035  /** Block state. See {@link BlockUCState} */
036  private BlockUCState blockUCState;
037
038  /**
039   * Block replicas as assigned when the block was allocated.
040   * This defines the pipeline order.
041   */
042  private List<ReplicaUnderConstruction> replicas;
043
044  /**
045   * Index of the primary data node doing the recovery. Useful for log
046   * messages.
047   */
048  private int primaryNodeIndex = -1;
049
050  /**
051   * The new generation stamp, which this block will have
052   * after the recovery succeeds. Also used as a recovery id to identify
053   * the right recovery if any of the abandoned recoveries re-appear.
054   */
055  private long blockRecoveryId = 0;
056
057  /**
058   * The block source to use in the event of copy-on-write truncate.
059   */
060  private Block truncateBlock;
061
062  /**
063   * ReplicaUnderConstruction contains information about replicas while
064   * they are under construction.
065   * The GS, the length and the state of the replica is as reported by 
066   * the data-node.
067   * It is not guaranteed, but expected, that data-nodes actually have
068   * corresponding replicas.
069   */
070  static class ReplicaUnderConstruction extends Block {
071    private final DatanodeStorageInfo expectedLocation;
072    private ReplicaState state;
073    private boolean chosenAsPrimary;
074
075    ReplicaUnderConstruction(Block block,
076                             DatanodeStorageInfo target,
077                             ReplicaState state) {
078      super(block);
079      this.expectedLocation = target;
080      this.state = state;
081      this.chosenAsPrimary = false;
082    }
083
084    /**
085     * Expected block replica location as assigned when the block was allocated.
086     * This defines the pipeline order.
087     * It is not guaranteed, but expected, that the data-node actually has
088     * the replica.
089     */
090    private DatanodeStorageInfo getExpectedStorageLocation() {
091      return expectedLocation;
092    }
093
094    /**
095     * Get replica state as reported by the data-node.
096     */
097    ReplicaState getState() {
098      return state;
099    }
100
101    /**
102     * Whether the replica was chosen for recovery.
103     */
104    boolean getChosenAsPrimary() {
105      return chosenAsPrimary;
106    }
107
108    /**
109     * Set replica state.
110     */
111    void setState(ReplicaState s) {
112      state = s;
113    }
114
115    /**
116     * Set whether this replica was chosen for recovery.
117     */
118    void setChosenAsPrimary(boolean chosenAsPrimary) {
119      this.chosenAsPrimary = chosenAsPrimary;
120    }
121
122    /**
123     * Is data-node the replica belongs to alive.
124     */
125    boolean isAlive() {
126      return expectedLocation.getDatanodeDescriptor().isAlive;
127    }
128
129    @Override // Block
130    public int hashCode() {
131      return super.hashCode();
132    }
133
134    @Override // Block
135    public boolean equals(Object obj) {
136      // Sufficient to rely on super's implementation
137      return (this == obj) || super.equals(obj);
138    }
139
140    @Override
141    public String toString() {
142      final StringBuilder b = new StringBuilder(50);
143      appendStringTo(b);
144      return b.toString();
145    }
146    
147    @Override
148    public void appendStringTo(StringBuilder sb) {
149      sb.append("ReplicaUC[")
150        .append(expectedLocation)
151        .append("|")
152        .append(state)
153        .append("]");
154    }
155  }
156
157  /**
158   * Create block and set its state to
159   * {@link BlockUCState#UNDER_CONSTRUCTION}.
160   */
161  public BlockInfoContiguousUnderConstruction(Block blk, short replication) {
162    this(blk, replication, BlockUCState.UNDER_CONSTRUCTION, null);
163  }
164
165  /**
166   * Create a block that is currently being constructed.
167   */
168  public BlockInfoContiguousUnderConstruction(Block blk, short replication, BlockUCState state, DatanodeStorageInfo[] targets) {
169    super(blk, replication);
170    assert getBlockUCState() != BlockUCState.COMPLETE :
171      "BlockInfoUnderConstruction cannot be in COMPLETE state";
172    this.blockUCState = state;
173    setExpectedLocations(targets);
174  }
175
176  /**
177   * Convert an under construction block to a complete block.
178   * 
179   * @return BlockInfo - a complete block.
180   * @throws IOException if the state of the block 
181   * (the generation stamp and the length) has not been committed by 
182   * the client or it does not have at least a minimal number of replicas 
183   * reported from data-nodes. 
184   */
185  BlockInfoContiguous convertToCompleteBlock() throws IOException {
186    assert getBlockUCState() != BlockUCState.COMPLETE :
187      "Trying to convert a COMPLETE block";
188    return new BlockInfoContiguous(this);
189  }
190
191  /** Set expected locations */
192  public void setExpectedLocations(DatanodeStorageInfo[] targets) {
193    int numLocations = targets == null ? 0 : targets.length;
194    this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations);
195    for(int i = 0; i < numLocations; i++)
196      replicas.add(
197        new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW));
198  }
199
200  /**
201   * Create array of expected replica locations
202   * (as has been assigned by chooseTargets()).
203   */
204  public DatanodeStorageInfo[] getExpectedStorageLocations() {
205    int numLocations = replicas == null ? 0 : replicas.size();
206    DatanodeStorageInfo[] storages = new DatanodeStorageInfo[numLocations];
207    for(int i = 0; i < numLocations; i++)
208      storages[i] = replicas.get(i).getExpectedStorageLocation();
209    return storages;
210  }
211
212  /** Get the number of expected locations */
213  public int getNumExpectedLocations() {
214    return replicas == null ? 0 : replicas.size();
215  }
216
217  /**
218   * Return the state of the block under construction.
219   * @see BlockUCState
220   */
221  @Override // BlockInfo
222  public BlockUCState getBlockUCState() {
223    return blockUCState;
224  }
225
226  void setBlockUCState(BlockUCState s) {
227    blockUCState = s;
228  }
229
230  /** Get block recovery ID */
231  public long getBlockRecoveryId() {
232    return blockRecoveryId;
233  }
234
235  /** Get recover block */
236  public Block getTruncateBlock() {
237    return truncateBlock;
238  }
239
240  public void setTruncateBlock(Block recoveryBlock) {
241    this.truncateBlock = recoveryBlock;
242  }
243
244  /**
245   * Process the recorded replicas. When about to commit or finish the
246   * pipeline recovery sort out bad replicas.
247   * @param genStamp  The final generation stamp for the block.
248   */
249  public void setGenerationStampAndVerifyReplicas(long genStamp) {
250    // Set the generation stamp for the block.
251    setGenerationStamp(genStamp);
252    if (replicas == null)
253      return;
254
255    // Remove the replicas with wrong gen stamp.
256    // The replica list is unchanged.
257    for (ReplicaUnderConstruction r : replicas) {
258      if (genStamp != r.getGenerationStamp()) {
259        r.getExpectedStorageLocation().removeBlock(this);
260        NameNode.blockStateChangeLog.info("BLOCK* Removing stale replica "
261            + "from location: {}", r.getExpectedStorageLocation());
262      }
263    }
264  }
265
266  /**
267   * Commit block's length and generation stamp as reported by the client.
268   * Set block state to {@link BlockUCState#COMMITTED}.
269   * @param block - contains client reported block length and generation 
270   * @throws IOException if block ids are inconsistent.
271   */
272  void commitBlock(Block block) throws IOException {
273    if(getBlockId() != block.getBlockId())
274      throw new IOException("Trying to commit inconsistent block: id = "
275          + block.getBlockId() + ", expected id = " + getBlockId());
276    blockUCState = BlockUCState.COMMITTED;
277    this.set(getBlockId(), block.getNumBytes(), block.getGenerationStamp());
278    // Sort out invalid replicas.
279    setGenerationStampAndVerifyReplicas(block.getGenerationStamp());
280  }
281
282  /**
283   * Initialize lease recovery for this block.
284   * Find the first alive data-node starting from the previous primary and
285   * make it primary.
286   */
287  public void initializeBlockRecovery(long recoveryId) {
288    setBlockUCState(BlockUCState.UNDER_RECOVERY);
289    blockRecoveryId = recoveryId;
290    if (replicas.size() == 0) {
291      NameNode.blockStateChangeLog.warn("BLOCK*"
292        + " BlockInfoUnderConstruction.initLeaseRecovery:"
293        + " No blocks found, lease removed.");
294    }
295    boolean allLiveReplicasTriedAsPrimary = true;
296    for (int i = 0; i < replicas.size(); i++) {
297      // Check if all replicas have been tried or not.
298      if (replicas.get(i).isAlive()) {
299        allLiveReplicasTriedAsPrimary =
300            (allLiveReplicasTriedAsPrimary && replicas.get(i).getChosenAsPrimary());
301      }
302    }
303    if (allLiveReplicasTriedAsPrimary) {
304      // Just set all the replicas to be chosen whether they are alive or not.
305      for (int i = 0; i < replicas.size(); i++) {
306        replicas.get(i).setChosenAsPrimary(false);
307      }
308    }
309    long mostRecentLastUpdate = 0;
310    ReplicaUnderConstruction primary = null;
311    primaryNodeIndex = -1;
312    for(int i = 0; i < replicas.size(); i++) {
313      // Skip alive replicas which have been chosen for recovery.
314      if (!(replicas.get(i).isAlive() && !replicas.get(i).getChosenAsPrimary())) {
315        continue;
316      }
317      final ReplicaUnderConstruction ruc = replicas.get(i);
318      final long lastUpdate = ruc.getExpectedStorageLocation()
319          .getDatanodeDescriptor().getLastUpdateMonotonic();
320      if (lastUpdate > mostRecentLastUpdate) {
321        primaryNodeIndex = i;
322        primary = ruc;
323        mostRecentLastUpdate = lastUpdate;
324      }
325    }
326    if (primary != null) {
327      primary.getExpectedStorageLocation().getDatanodeDescriptor().addBlockToBeRecovered(this);
328      primary.setChosenAsPrimary(true);
329      NameNode.blockStateChangeLog.info(
330          "BLOCK* {} recovery started, primary={}", this, primary);
331    }
332  }
333
334  void addReplicaIfNotPresent(DatanodeStorageInfo storage,
335                     Block block,
336                     ReplicaState rState) {
337    Iterator<ReplicaUnderConstruction> it = replicas.iterator();
338    while (it.hasNext()) {
339      ReplicaUnderConstruction r = it.next();
340      DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation();
341      if(expectedLocation == storage) {
342        // Record the gen stamp from the report
343        r.setGenerationStamp(block.getGenerationStamp());
344        return;
345      } else if (expectedLocation != null &&
346                 expectedLocation.getDatanodeDescriptor() ==
347                     storage.getDatanodeDescriptor()) {
348
349        // The Datanode reported that the block is on a different storage
350        // than the one chosen by BlockPlacementPolicy. This can occur as
351        // we allow Datanodes to choose the target storage. Update our
352        // state by removing the stale entry and adding a new one.
353        it.remove();
354        break;
355      }
356    }
357    replicas.add(new ReplicaUnderConstruction(block, storage, rState));
358  }
359
360  @Override // BlockInfo
361  // BlockInfoUnderConstruction participates in maps the same way as BlockInfo
362  public int hashCode() {
363    return super.hashCode();
364  }
365
366  @Override // BlockInfo
367  public boolean equals(Object obj) {
368    // Sufficient to rely on super's implementation
369    return (this == obj) || super.equals(obj);
370  }
371
372  @Override
373  public String toString() {
374    final StringBuilder b = new StringBuilder(100);
375    appendStringTo(b);
376    return b.toString();
377  }
378
379  @Override
380  public void appendStringTo(StringBuilder sb) {
381    super.appendStringTo(sb);
382    appendUCParts(sb);
383  }
384
385  private void appendUCParts(StringBuilder sb) {
386    sb.append("{UCState=").append(blockUCState)
387      .append(", truncateBlock=" + truncateBlock)
388      .append(", primaryNodeIndex=").append(primaryNodeIndex)
389      .append(", replicas=[");
390    if (replicas != null) {
391      Iterator<ReplicaUnderConstruction> iter = replicas.iterator();
392      if (iter.hasNext()) {
393        iter.next().appendStringTo(sb);
394        while (iter.hasNext()) {
395          sb.append(", ");
396          iter.next().appendStringTo(sb);
397        }
398      }
399    }
400    sb.append("]}");
401  }
402}