package org.apache.iceberg.mr.mapreduce;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.BiFunction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.iceberg.CombinedScanTask;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.common.DynMethods;
import org.apache.iceberg.data.GenericDeleteFilter;
import org.apache.iceberg.data.IdentityPartitionConverters;
import org.apache.iceberg.data.InternalRecordWrapper;
import org.apache.iceberg.data.avro.DataReader;
import org.apache.iceberg.data.orc.GenericOrcReader;
import org.apache.iceberg.data.parquet.GenericParquetReaders;
import org.apache.iceberg.encryption.EncryptedFiles;
import org.apache.iceberg.encryption.EncryptionManager;
import org.apache.iceberg.expressions.Evaluator;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.hive.MetastoreUtil;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.mr.Catalogs;
import org.apache.iceberg.mr.InputFormatConfig;
import org.apache.iceberg.mr.hive.HiveIcebergStorageHandler;
import org.apache.iceberg.orc.ORC;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.util.PartitionUtil;
import org.apache.iceberg.util.SerializationUtil;

/* loaded from: input_file:org/apache/iceberg/mr/mapreduce/IcebergInputFormat.class */
public class IcebergInputFormat<T> extends InputFormat<Void, T> {

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.apache.iceberg.mr.mapreduce.IcebergInputFormat$1, reason: invalid class name */
    /* loaded from: input_file:org/apache/iceberg/mr/mapreduce/IcebergInputFormat$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$iceberg$FileFormat;
        static final /* synthetic */ int[] $SwitchMap$org$apache$iceberg$mr$InputFormatConfig$InMemoryDataModel = new int[InputFormatConfig.InMemoryDataModel.values().length];

        static {
            try {
                $SwitchMap$org$apache$iceberg$mr$InputFormatConfig$InMemoryDataModel[InputFormatConfig.InMemoryDataModel.PIG.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$iceberg$mr$InputFormatConfig$InMemoryDataModel[InputFormatConfig.InMemoryDataModel.HIVE.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$apache$iceberg$mr$InputFormatConfig$InMemoryDataModel[InputFormatConfig.InMemoryDataModel.GENERIC.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            $SwitchMap$org$apache$iceberg$FileFormat = new int[FileFormat.values().length];
            try {
                $SwitchMap$org$apache$iceberg$FileFormat[FileFormat.AVRO.ordinal()] = 1;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$apache$iceberg$FileFormat[FileFormat.ORC.ordinal()] = 2;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$apache$iceberg$FileFormat[FileFormat.PARQUET.ordinal()] = 3;
            } catch (NoSuchFieldError e6) {
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/iceberg/mr/mapreduce/IcebergInputFormat$IcebergRecordReader.class */
    public static final class IcebergRecordReader<T> extends RecordReader<Void, T> {
        private static final String HIVE_VECTORIZED_READER_CLASS = "org.apache.iceberg.mr.hive.vector.HiveVectorizedReader";
        private static final DynMethods.StaticMethod HIVE_VECTORIZED_READER_BUILDER;
        private TaskAttemptContext context;
        private Schema tableSchema;
        private Schema expectedSchema;
        private boolean reuseContainers;
        private boolean caseSensitive;
        private InputFormatConfig.InMemoryDataModel inMemoryDataModel;
        private Iterator<FileScanTask> tasks;
        private T current;
        private CloseableIterator<T> currentIterator;
        private FileIO io;
        private EncryptionManager encryptionManager;

        private IcebergRecordReader() {
        }

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
            Configuration configuration = taskAttemptContext.getConfiguration();
            CombinedScanTask task = ((IcebergSplit) inputSplit).task();
            this.context = taskAttemptContext;
            this.io = ((IcebergSplit) inputSplit).io();
            this.encryptionManager = ((IcebergSplit) inputSplit).encryptionManager();
            this.tasks = task.files().iterator();
            this.tableSchema = InputFormatConfig.tableSchema(configuration);
            this.caseSensitive = configuration.getBoolean(InputFormatConfig.CASE_SENSITIVE, true);
            this.expectedSchema = readSchema(configuration, this.tableSchema, this.caseSensitive);
            this.reuseContainers = configuration.getBoolean(InputFormatConfig.REUSE_CONTAINERS, false);
            this.inMemoryDataModel = (InputFormatConfig.InMemoryDataModel) configuration.getEnum(InputFormatConfig.IN_MEMORY_DATA_MODEL, InputFormatConfig.InMemoryDataModel.GENERIC);
            this.currentIterator = open(this.tasks.next(), this.expectedSchema).iterator();
        }

        public boolean nextKeyValue() throws IOException {
            while (!this.currentIterator.hasNext()) {
                if (!this.tasks.hasNext()) {
                    this.currentIterator.close();
                    return false;
                }
                this.currentIterator.close();
                this.currentIterator = open(this.tasks.next(), this.expectedSchema).iterator();
            }
            this.current = (T) this.currentIterator.next();
            return true;
        }

        /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
        public Void m44getCurrentKey() {
            return null;
        }

        public T getCurrentValue() {
            return this.current;
        }

        public float getProgress() {
            return this.context.getProgress();
        }

        public void close() throws IOException {
            this.currentIterator.close();
        }

        private CloseableIterable<T> openTask(FileScanTask fileScanTask, Schema schema) {
            CloseableIterable<T> newParquetIterable;
            DataFile file = fileScanTask.file();
            InputFile decrypt = this.encryptionManager.decrypt(EncryptedFiles.encryptedInput(this.io.newInputFile(file.path().toString()), file.keyMetadata()));
            switch (AnonymousClass1.$SwitchMap$org$apache$iceberg$FileFormat[file.format().ordinal()]) {
                case InputFormatConfig.CASE_SENSITIVE_DEFAULT /* 1 */:
                    newParquetIterable = newAvroIterable(decrypt, fileScanTask, schema);
                    break;
                case 2:
                    newParquetIterable = newOrcIterable(decrypt, fileScanTask, schema);
                    break;
                case 3:
                    newParquetIterable = newParquetIterable(decrypt, fileScanTask, schema);
                    break;
                default:
                    throw new UnsupportedOperationException(String.format("Cannot read %s file: %s", file.format().name(), file.path()));
            }
            return newParquetIterable;
        }

        private CloseableIterable<T> open(FileScanTask fileScanTask, Schema schema) {
            switch (AnonymousClass1.$SwitchMap$org$apache$iceberg$mr$InputFormatConfig$InMemoryDataModel[this.inMemoryDataModel.ordinal()]) {
                case InputFormatConfig.CASE_SENSITIVE_DEFAULT /* 1 */:
                    throw new UnsupportedOperationException("Pig and Hive object models are not supported.");
                case 2:
                    return openTask(fileScanTask, schema);
                case 3:
                    GenericDeleteFilter genericDeleteFilter = new GenericDeleteFilter(this.io, fileScanTask, this.tableSchema, schema);
                    return genericDeleteFilter.filter(openTask(fileScanTask, genericDeleteFilter.requiredSchema()));
                default:
                    throw new UnsupportedOperationException("Unsupported memory model");
            }
        }

        private CloseableIterable<T> applyResidualFiltering(CloseableIterable<T> closeableIterable, Expression expression, Schema schema) {
            if (!(!this.context.getConfiguration().getBoolean(InputFormatConfig.SKIP_RESIDUAL_FILTERING, false)) || expression == null || expression == Expressions.alwaysTrue()) {
                return closeableIterable;
            }
            InternalRecordWrapper internalRecordWrapper = new InternalRecordWrapper(schema.asStruct());
            Evaluator evaluator = new Evaluator(schema.asStruct(), expression, this.caseSensitive);
            return CloseableIterable.filter(closeableIterable, obj -> {
                return evaluator.eval(internalRecordWrapper.wrap((StructLike) obj));
            });
        }

        private CloseableIterable<T> newAvroIterable(InputFile inputFile, FileScanTask fileScanTask, Schema schema) {
            Avro.ReadBuilder split = Avro.read(inputFile).project(schema).split(fileScanTask.start(), fileScanTask.length());
            if (this.reuseContainers) {
                split.reuseContainers();
            }
            switch (AnonymousClass1.$SwitchMap$org$apache$iceberg$mr$InputFormatConfig$InMemoryDataModel[this.inMemoryDataModel.ordinal()]) {
                case InputFormatConfig.CASE_SENSITIVE_DEFAULT /* 1 */:
                case 2:
                    throw new UnsupportedOperationException("Avro support not yet supported for Pig and Hive");
                case 3:
                    split.createReaderFunc((schema2, schema3) -> {
                        return DataReader.create(schema2, schema3, constantsMap(fileScanTask, IdentityPartitionConverters::convertConstant));
                    });
                    break;
            }
            return applyResidualFiltering(split.build(), fileScanTask.residual(), schema);
        }

        private CloseableIterable<T> newParquetIterable(InputFile inputFile, FileScanTask fileScanTask, Schema schema) {
            Parquet.ReadBuilder split = Parquet.read(inputFile).project(schema).filter(fileScanTask.residual()).caseSensitive(this.caseSensitive).split(fileScanTask.start(), fileScanTask.length());
            if (this.reuseContainers) {
                split.reuseContainers();
            }
            switch (AnonymousClass1.$SwitchMap$org$apache$iceberg$mr$InputFormatConfig$InMemoryDataModel[this.inMemoryDataModel.ordinal()]) {
                case InputFormatConfig.CASE_SENSITIVE_DEFAULT /* 1 */:
                case 2:
                    throw new UnsupportedOperationException("Parquet support not yet supported for Pig and Hive");
                case 3:
                    split.createReaderFunc(messageType -> {
                        return GenericParquetReaders.buildReader(schema, messageType, constantsMap(fileScanTask, IdentityPartitionConverters::convertConstant));
                    });
                    break;
            }
            return applyResidualFiltering(split.build(), fileScanTask.residual(), schema);
        }

        private CloseableIterable<T> newOrcIterable(InputFile inputFile, FileScanTask fileScanTask, Schema schema) {
            Map<Integer, ?> constantsMap = constantsMap(fileScanTask, IdentityPartitionConverters::convertConstant);
            Schema selectNot = TypeUtil.selectNot(schema, Sets.union(constantsMap.keySet(), MetadataColumns.metadataFieldIds()));
            CloseableIterable<T> closeableIterable = null;
            switch (AnonymousClass1.$SwitchMap$org$apache$iceberg$mr$InputFormatConfig$InMemoryDataModel[this.inMemoryDataModel.ordinal()]) {
                case InputFormatConfig.CASE_SENSITIVE_DEFAULT /* 1 */:
                    throw new UnsupportedOperationException("ORC support not yet supported for Pig and Hive");
                case 2:
                    if (!MetastoreUtil.hive3PresentOnClasspath()) {
                        throw new UnsupportedOperationException("Vectorized read is unsupported for Hive 2 integration.");
                    }
                    closeableIterable = (CloseableIterable) HIVE_VECTORIZED_READER_BUILDER.invoke(new Object[]{inputFile, fileScanTask, constantsMap, this.context});
                    break;
                case 3:
                    ORC.ReadBuilder split = ORC.read(inputFile).project(selectNot).filter(fileScanTask.residual()).caseSensitive(this.caseSensitive).split(fileScanTask.start(), fileScanTask.length());
                    split.createReaderFunc(typeDescription -> {
                        return GenericOrcReader.buildReader(schema, typeDescription, constantsMap);
                    });
                    closeableIterable = split.build();
                    break;
            }
            return applyResidualFiltering(closeableIterable, fileScanTask.residual(), schema);
        }

        private Map<Integer, ?> constantsMap(FileScanTask fileScanTask, BiFunction<Type, Object, Object> biFunction) {
            return !TypeUtil.select(this.expectedSchema, fileScanTask.spec().identitySourceIds()).columns().isEmpty() ? PartitionUtil.constantsMap(fileScanTask, biFunction) : Collections.emptyMap();
        }

        private static Schema readSchema(Configuration configuration, Schema schema, boolean z) {
            Schema readSchema = InputFormatConfig.readSchema(configuration);
            if (readSchema != null) {
                return readSchema;
            }
            String[] selectedColumns = InputFormatConfig.selectedColumns(configuration);
            return selectedColumns == null ? schema : z ? schema.select(selectedColumns) : schema.caseInsensitiveSelect(selectedColumns);
        }

        /* synthetic */ IcebergRecordReader(AnonymousClass1 anonymousClass1) {
            this();
        }

        static {
            if (MetastoreUtil.hive3PresentOnClasspath()) {
                HIVE_VECTORIZED_READER_BUILDER = DynMethods.builder("reader").impl(HIVE_VECTORIZED_READER_CLASS, new Class[]{InputFile.class, FileScanTask.class, Map.class, TaskAttemptContext.class}).buildStatic();
            } else {
                HIVE_VECTORIZED_READER_BUILDER = null;
            }
        }
    }

    public static InputFormatConfig.ConfigBuilder configure(Job job) {
        job.setInputFormatClass(IcebergInputFormat.class);
        return new InputFormatConfig.ConfigBuilder(job.getConfiguration());
    }

    public List<InputSplit> getSplits(JobContext jobContext) {
        Configuration configuration = jobContext.getConfiguration();
        Table table = (Table) Optional.ofNullable(HiveIcebergStorageHandler.table(configuration, configuration.get(InputFormatConfig.TABLE_IDENTIFIER))).orElseGet(() -> {
            return Catalogs.loadTable(configuration);
        });
        TableScan caseSensitive = table.newScan().caseSensitive(configuration.getBoolean(InputFormatConfig.CASE_SENSITIVE, true));
        long j = configuration.getLong(InputFormatConfig.SNAPSHOT_ID, -1L);
        if (j != -1) {
            caseSensitive = caseSensitive.useSnapshot(j);
        }
        long j2 = configuration.getLong(InputFormatConfig.AS_OF_TIMESTAMP, -1L);
        if (j2 != -1) {
            caseSensitive = caseSensitive.asOfTime(j2);
        }
        long j3 = configuration.getLong(InputFormatConfig.SPLIT_SIZE, 0L);
        if (j3 > 0) {
            caseSensitive = caseSensitive.option("read.split.target-size", String.valueOf(j3));
        }
        String str = configuration.get(InputFormatConfig.READ_SCHEMA);
        if (str != null) {
            caseSensitive.project(SchemaParser.fromJson(str));
        }
        String[] strings = configuration.getStrings(InputFormatConfig.SELECTED_COLUMNS);
        if (strings != null) {
            caseSensitive.select(strings);
        }
        Expression expression = (Expression) SerializationUtil.deserializeFromBase64(configuration.get(InputFormatConfig.FILTER_EXPRESSION));
        if (expression != null) {
            caseSensitive = caseSensitive.filter(expression);
        }
        ArrayList newArrayList = Lists.newArrayList();
        boolean z = !configuration.getBoolean(InputFormatConfig.SKIP_RESIDUAL_FILTERING, false);
        InputFormatConfig.InMemoryDataModel inMemoryDataModel = (InputFormatConfig.InMemoryDataModel) configuration.getEnum(InputFormatConfig.IN_MEMORY_DATA_MODEL, InputFormatConfig.InMemoryDataModel.GENERIC);
        try {
            CloseableIterable planTasks = caseSensitive.planTasks();
            Throwable th = null;
            try {
                try {
                    planTasks.forEach(combinedScanTask -> {
                        if (z && (inMemoryDataModel == InputFormatConfig.InMemoryDataModel.HIVE || inMemoryDataModel == InputFormatConfig.InMemoryDataModel.PIG)) {
                            checkResiduals(combinedScanTask);
                        }
                        newArrayList.add(new IcebergSplit(configuration, combinedScanTask, table.io(), table.encryption()));
                    });
                    if (planTasks != null) {
                        if (0 != 0) {
                            try {
                                planTasks.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            planTasks.close();
                        }
                    }
                    return newArrayList;
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            throw new UncheckedIOException(String.format("Failed to close table scan: %s", caseSensitive), e);
        }
    }

    private static void checkResiduals(CombinedScanTask combinedScanTask) {
        combinedScanTask.files().forEach(fileScanTask -> {
            Expression residual = fileScanTask.residual();
            if (residual != null && !residual.equals(Expressions.alwaysTrue())) {
                throw new UnsupportedOperationException(String.format("Filter expression %s is not completely satisfied. Additional rows can be returned not satisfied by the filter expression", residual));
            }
        });
    }

    public RecordReader<Void, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
        return new IcebergRecordReader(null);
    }
}
