/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import org.apache.spark.annotation.Experimental;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.CountVectorizerParams;
import org.apache.spark.ml.feature.CountVectorizerParams$class;
import org.apache.spark.ml.param.DoubleParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.shared.HasInputCol$class;
import org.apache.spark.ml.param.shared.HasOutputCol$class;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.util.collection.OpenHashMap;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.Predef$;
import scala.Serializable;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterable$;
import scala.collection.Seq;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

@Experimental
@ScalaSignature(bytes="\u0006\u0001M4A!\u0001\u0002\u0001\u001b\ty1i\\;oiZ+7\r^8sSj,'O\u0003\u0002\u0004\t\u00059a-Z1ukJ,'BA\u0003\u0007\u0003\tiGN\u0003\u0002\b\u0011\u0005)1\u000f]1sW*\u0011\u0011BC\u0001\u0007CB\f7\r[3\u000b\u0003-\t1a\u001c:h\u0007\u0001\u00192\u0001\u0001\b\u0017!\ry\u0001CE\u0007\u0002\t%\u0011\u0011\u0003\u0002\u0002\n\u000bN$\u0018.\\1u_J\u0004\"a\u0005\u000b\u000e\u0003\tI!!\u0006\u0002\u0003)\r{WO\u001c;WK\u000e$xN]5{KJlu\u000eZ3m!\t\u0019r#\u0003\u0002\u0019\u0005\t)2i\\;oiZ+7\r^8sSj,'\u000fU1sC6\u001c\b\u0002\u0003\u000e\u0001\u0005\u000b\u0007I\u0011I\u000e\u0002\u0007ULG-F\u0001\u001d!\ti2E\u0004\u0002\u001fC5\tqDC\u0001!\u0003\u0015\u00198-\u00197b\u0013\t\u0011s$\u0001\u0004Qe\u0016$WMZ\u0005\u0003I\u0015\u0012aa\u0015;sS:<'B\u0001\u0012 \u0011!9\u0003A!A!\u0002\u0013a\u0012\u0001B;jI\u0002BQ!\u000b\u0001\u0005\u0002)\na\u0001P5oSRtDCA\u0016-!\t\u0019\u0002\u0001C\u0003\u001bQ\u0001\u0007A\u0004C\u0003*\u0001\u0011\u0005a\u0006F\u0001,\u0011\u0015\u0001\u0004\u0001\"\u00012\u0003-\u0019X\r^%oaV$8i\u001c7\u0015\u0005I\u001aT\"\u0001\u0001\t\u000bQz\u0003\u0019\u0001\u000f\u0002\u000bY\fG.^3\t\u000bY\u0002A\u0011A\u001c\u0002\u0019M,GoT;uaV$8i\u001c7\u0015\u0005IB\u0004\"\u0002\u001b6\u0001\u0004a\u0002\"\u0002\u001e\u0001\t\u0003Y\u0014\u0001D:fiZ{7-\u00192TSj,GC\u0001\u001a=\u0011\u0015!\u0014\b1\u0001>!\tqb(\u0003\u0002@?\t\u0019\u0011J\u001c;\t\u000b\u0005\u0003A\u0011\u0001\"\u0002\u0011M,G/T5o\t\u001a#\"AM\"\t\u000bQ\u0002\u0005\u0019\u0001#\u0011\u0005y)\u0015B\u0001$ \u0005\u0019!u.\u001e2mK\")\u0001\n\u0001C\u0001\u0013\u0006A1/\u001a;NS:$f\t\u0006\u00023\u0015\")Ag\u0012a\u0001\t\")A\n\u0001C!\u001b\u0006\u0019a-\u001b;\u0015\u0005Iq\u0005\"B(L\u0001\u0004\u0001\u0016a\u00023bi\u0006\u001cX\r\u001e\t\u0003#Rk\u0011A\u0015\u0006\u0003'\u001a\t1a]9m\u0013\t)&KA\u0005ECR\fgI]1nK\")q\u000b\u0001C!1\u0006yAO]1og\u001a|'/\\*dQ\u0016l\u0017\r\u0006\u0002Z?B\u0011!,X\u0007\u00027*\u0011ALU\u0001\u0006if\u0004Xm]\u0005\u0003=n\u0013!b\u0015;sk\u000e$H+\u001f9f\u0011\u0015\u0001g\u000b1\u0001Z\u0003\u0019\u00198\r[3nC\")!\r\u0001C!G\u0006!1m\u001c9z)\tYC\rC\u0003fC\u0002\u0007a-A\u0003fqR\u0014\u0018\r\u0005\u0002hU6\t\u0001N\u0003\u0002j\t\u0005)\u0001/\u0019:b[&\u00111\u000e\u001b\u0002\t!\u0006\u0014\u0018-\\'ba\"\u0012\u0001!\u001c\t\u0003]Fl\u0011a\u001c\u0006\u0003a\u001a\t!\"\u00198o_R\fG/[8o\u0013\t\u0011xN\u0001\u0007FqB,'/[7f]R\fG\u000e")
public class CountVectorizer
extends Estimator<CountVectorizerModel>
implements CountVectorizerParams {
    private final String uid;
    private final IntParam vocabSize;
    private final DoubleParam minDF;
    private final DoubleParam minTF;
    private final Param<String> outputCol;
    private final Param<String> inputCol;

    @Override
    public IntParam vocabSize() {
        return this.vocabSize;
    }

    @Override
    public DoubleParam minDF() {
        return this.minDF;
    }

    @Override
    public DoubleParam minTF() {
        return this.minTF;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$vocabSize_$eq(IntParam x$1) {
        this.vocabSize = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minDF_$eq(DoubleParam x$1) {
        this.minDF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minTF_$eq(DoubleParam x$1) {
        this.minTF = x$1;
    }

    @Override
    public int getVocabSize() {
        return CountVectorizerParams$class.getVocabSize(this);
    }

    @Override
    public double getMinDF() {
        return CountVectorizerParams$class.getMinDF(this);
    }

    @Override
    public StructType validateAndTransformSchema(StructType schema) {
        return CountVectorizerParams$class.validateAndTransformSchema(this, schema);
    }

    @Override
    public double getMinTF() {
        return CountVectorizerParams$class.getMinTF(this);
    }

    @Override
    public final Param<String> outputCol() {
        return this.outputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasOutputCol$_setter_$outputCol_$eq(Param x$1) {
        this.outputCol = x$1;
    }

    @Override
    public final String getOutputCol() {
        return HasOutputCol$class.getOutputCol(this);
    }

    @Override
    public final Param<String> inputCol() {
        return this.inputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasInputCol$_setter_$inputCol_$eq(Param x$1) {
        this.inputCol = x$1;
    }

    @Override
    public final String getInputCol() {
        return HasInputCol$class.getInputCol(this);
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public CountVectorizer setInputCol(String value) {
        return (CountVectorizer)this.set(this.inputCol(), value);
    }

    public CountVectorizer setOutputCol(String value) {
        return (CountVectorizer)this.set(this.outputCol(), value);
    }

    public CountVectorizer setVocabSize(int value) {
        return (CountVectorizer)this.set(this.vocabSize(), BoxesRunTime.boxToInteger((int)value));
    }

    public CountVectorizer setMinDF(double value) {
        return (CountVectorizer)this.set(this.minDF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setMinTF(double value) {
        return (CountVectorizer)this.set(this.minTF(), BoxesRunTime.boxToDouble((double)value));
    }

    @Override
    public CountVectorizerModel fit(DataFrame dataset) {
        this.transformSchema(dataset.schema(), true);
        int vocSize = BoxesRunTime.unboxToInt((Object)this.$(this.vocabSize()));
        RDD input = dataset.select(this.$(this.inputCol()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Seq<String> apply(Row x$1) {
                return (Seq)x$1.getAs(0);
            }
        }, ClassTag$.MODULE$.apply(Seq.class));
        double minDf = BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) : BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) * (double)input.cache().count();
        RDD wordCounts = RDD$.MODULE$.rddToPairRDDFunctions(input.flatMap((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Iterable<Tuple2<String, Tuple2<Object, Object>>> apply(Seq<String> x0$1) {
                Seq<String> seq = x0$1;
                OpenHashMap.mcJ.sp wc = new OpenHashMap.mcJ.sp(ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Long());
                seq.foreach((Function1)new Serializable(this, (OpenHashMap)wc){
                    public static final long serialVersionUID = 0L;
                    private final OpenHashMap wc$1;

                    public final long apply(String w2) {
                        return this.wc$1.changeValue$mcJ$sp((Object)w2, (Function0)new Serializable(this){
                            public static final long serialVersionUID = 0L;

                            public final long apply() {
                                return this.apply$mcJ$sp();
                            }

                            public long apply$mcJ$sp() {
                                return 1L;
                            }
                        }, (Function1)new Serializable(this){
                            public static final long serialVersionUID = 0L;

                            public final long apply(long x$2) {
                                return this.apply$mcJJ$sp(x$2);
                            }

                            public long apply$mcJJ$sp(long x$2) {
                                return x$2 + 1L;
                            }
                        });
                    }
                    {
                        this.wc$1 = wc$1;
                    }
                });
                Iterable iterable = (Iterable)wc.map((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final Tuple2<String, Tuple2<Object, Object>> apply(Tuple2<String, Object> x0$2) {
                        Tuple2<String, Object> tuple2 = x0$2;
                        if (tuple2 != null) {
                            String word = (String)tuple2._1();
                            long count = tuple2._2$mcJ$sp();
                            Tuple2 tuple22 = new Tuple2((Object)word, (Object)new Tuple2.mcJI.sp(count, 1));
                            return tuple22;
                        }
                        throw new MatchError(tuple2);
                    }
                }, Iterable$.MODULE$.canBuildFrom());
                return iterable;
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), (Ordering)Ordering.String$.MODULE$).reduceByKey((Function2)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<Object, Object> apply(Tuple2<Object, Object> x0$3, Tuple2<Object, Object> x1$1) {
                Tuple2 tuple2 = new Tuple2(x0$3, x1$1);
                if (tuple2 != null) {
                    Tuple2 tuple22 = (Tuple2)tuple2._1();
                    Tuple2 tuple23 = (Tuple2)tuple2._2();
                    if (tuple22 != null) {
                        long wc1 = tuple22._1$mcJ$sp();
                        int df1 = tuple22._2$mcI$sp();
                        if (tuple23 != null) {
                            long wc2 = tuple23._1$mcJ$sp();
                            int df2 = tuple23._2$mcI$sp();
                            Tuple2.mcJI.sp sp2 = new Tuple2.mcJI.sp(wc1 + wc2, df1 + df2);
                            return sp2;
                        }
                    }
                }
                throw new MatchError((Object)tuple2);
            }
        }).filter((Function1)new Serializable(this, minDf){
            public static final long serialVersionUID = 0L;
            private final double minDf$1;

            public final boolean apply(Tuple2<String, Tuple2<Object, Object>> x0$4) {
                Tuple2 tuple2;
                Tuple2<String, Tuple2<Object, Object>> tuple22 = x0$4;
                if (tuple22 != null && (tuple2 = (Tuple2)tuple22._2()) != null) {
                    int df = tuple2._2$mcI$sp();
                    boolean bl = (double)df >= this.minDf$1;
                    return bl;
                }
                throw new MatchError(tuple22);
            }
            {
                this.minDf$1 = minDf$1;
            }
        }).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<String, Object> apply(Tuple2<String, Tuple2<Object, Object>> x0$5) {
                Tuple2<String, Tuple2<Object, Object>> tuple2 = x0$5;
                if (tuple2 != null) {
                    String word = (String)tuple2._1();
                    Tuple2 tuple22 = (Tuple2)tuple2._2();
                    if (tuple22 != null) {
                        long count = tuple22._1$mcJ$sp();
                        Tuple2 tuple23 = new Tuple2((Object)word, (Object)BoxesRunTime.boxToLong((long)count));
                        return tuple23;
                    }
                }
                throw new MatchError(tuple2);
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class)).cache();
        long fullVocabSize = wordCounts.count();
        Tuple2[] tmpSortedWC = fullVocabSize <= (long)vocSize ? (Tuple2[])Predef$.MODULE$.refArrayOps((Object[])wordCounts.collect()).sortBy((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final long apply(Tuple2<String, Object> x$3) {
                return -x$3._2$mcJ$sp();
            }
        }, (Ordering)Ordering.Long$.MODULE$) : (Tuple2[])wordCounts.sortBy((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final long apply(Tuple2<String, Object> x$4) {
                return x$4._2$mcJ$sp();
            }
        }, false, wordCounts.sortBy$default$3(), (Ordering)Ordering.Long$.MODULE$, ClassTag$.MODULE$.Long()).take(vocSize);
        String[] vocab = (String[])Predef$.MODULE$.refArrayOps((Object[])tmpSortedWC).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply(Tuple2<String, Object> x$5) {
                return (String)x$5._1();
            }
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        Predef$.MODULE$.require(vocab.length > 0, (Function0)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply() {
                return "The vocabulary size should be > 0. Lower minDF as necessary.";
            }
        });
        return this.copyValues(new CountVectorizerModel(this.uid(), vocab).setParent(this), this.copyValues$default$2());
    }

    @Override
    public StructType transformSchema(StructType schema) {
        return this.validateAndTransformSchema(schema);
    }

    @Override
    public CountVectorizer copy(ParamMap extra) {
        return (CountVectorizer)this.defaultCopy(extra);
    }

    public CountVectorizer(String uid) {
        this.uid = uid;
        HasInputCol$class.$init$(this);
        HasOutputCol$class.$init$(this);
        CountVectorizerParams$class.$init$(this);
        this.setDefault((Seq<ParamPair<?>>)Predef$.MODULE$.wrapRefArray((Object[])new ParamPair[]{this.vocabSize().$minus$greater(BoxesRunTime.boxToInteger((int)262144)), this.minDF().$minus$greater(BoxesRunTime.boxToDouble((double)1.0))}));
    }

    public CountVectorizer() {
        this(Identifiable$.MODULE$.randomUID("cntVec"));
    }
}

