/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.io.IOException;
import java.io.Serializable;
import org.apache.spark.ml.UnaryTransformer;
import org.apache.spark.ml.feature.RegexTokenizer$;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.ParamValidators$;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.util.matching.Regex;

@ScalaSignature(bytes="\u0006\u0001\u0005%g\u0001\u0002\u0010 \u0001)B\u0001\"\u0014\u0001\u0003\u0006\u0004%\tE\u0014\u0005\t1\u0002\u0011\t\u0011)A\u0005_!)!\f\u0001C\u00017\")!\f\u0001C\u0001?\"9\u0011\r\u0001b\u0001\n\u0003\u0011\u0007B\u00026\u0001A\u0003%1\rC\u0003m\u0001\u0011\u0005Q\u000eC\u0003w\u0001\u0011\u0005q\u000fC\u0004z\u0001\t\u0007I\u0011\u0001>\t\r}\u0004\u0001\u0015!\u0003|\u0011\u001d\t\u0019\u0001\u0001C\u0001\u0003\u000bAq!!\u0005\u0001\t\u0003\t\u0019\u0002C\u0005\u0002\u0018\u0001\u0011\r\u0011\"\u0001\u0002\u001a!A\u00111\u0005\u0001!\u0002\u0013\tY\u0002C\u0004\u0002(\u0001!\t!!\u000b\t\r\u0005=\u0002\u0001\"\u0001O\u0011!\t\u0019\u0004\u0001b\u0001\n\u000bQ\bbBA\u001e\u0001\u0001\u0006ia\u001f\u0005\b\u0003\u007f\u0001A\u0011AA!\u0011\u001d\t9\u0005\u0001C\u0001\u0003'Aq!a\u0013\u0001\t#\ni\u0005C\u0004\u0002V\u0001!\t&a\u0016\t\u000f\u0005M\u0004\u0001\"\u0015\u0002v!9\u0011q\u000f\u0001\u0005B\u0005etaBAG?!\u0005\u0011q\u0012\u0004\u0007=}A\t!!%\t\riSB\u0011AAS\u0011\u001d\t9K\u0007C!\u0003SC\u0011\"!-\u001b\u0003\u0003%I!a-\u0003\u001dI+w-\u001a=U_.,g.\u001b>fe*\u0011\u0001%I\u0001\bM\u0016\fG/\u001e:f\u0015\t\u00113%\u0001\u0002nY*\u0011A%J\u0001\u0006gB\f'o\u001b\u0006\u0003M\u001d\na!\u00199bG\",'\"\u0001\u0015\u0002\u0007=\u0014xm\u0001\u0001\u0014\u0007\u0001Ys\tE\u0003-[=bT)D\u0001\"\u0013\tq\u0013E\u0001\tV]\u0006\u0014\u0018\u0010\u0016:b]N4wN]7feB\u0011\u0001'\u000f\b\u0003c]\u0002\"AM\u001b\u000e\u0003MR!\u0001N\u0015\u0002\rq\u0012xn\u001c;?\u0015\u00051\u0014!B:dC2\f\u0017B\u0001\u001d6\u0003\u0019\u0001&/\u001a3fM&\u0011!h\u000f\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005a*\u0004cA\u001fC_9\u0011a\b\u0011\b\u0003e}J\u0011AN\u0005\u0003\u0003V\nq\u0001]1dW\u0006<W-\u0003\u0002D\t\n\u00191+Z9\u000b\u0005\u0005+\u0004C\u0001$\u0001\u001b\u0005y\u0002C\u0001%L\u001b\u0005I%B\u0001&\"\u0003\u0011)H/\u001b7\n\u00051K%!\u0006#fM\u0006,H\u000e\u001e)be\u0006l7o\u0016:ji\u0006\u0014G.Z\u0001\u0004k&$W#A\u0018)\u0007\u0005\u0001f\u000b\u0005\u0002R)6\t!K\u0003\u0002TG\u0005Q\u0011M\u001c8pi\u0006$\u0018n\u001c8\n\u0005U\u0013&!B*j]\u000e,\u0017%A,\u0002\u000bErCG\f\u0019\u0002\tULG\r\t\u0015\u0004\u0005A3\u0016A\u0002\u001fj]&$h\b\u0006\u0002F9\")Qj\u0001a\u0001_!\u001aA\f\u0015,)\u0007\r\u0001f\u000bF\u0001FQ\r!\u0001KV\u0001\u000f[&tGk\\6f]2+gn\u001a;i+\u0005\u0019\u0007C\u00013h\u001b\u0005)'B\u00014\"\u0003\u0015\u0001\u0018M]1n\u0013\tAWM\u0001\u0005J]R\u0004\u0016M]1nQ\r)\u0001KV\u0001\u0010[&tGk\\6f]2+gn\u001a;iA!\u001aa\u0001\u0015,\u0002#M,G/T5o)>\\WM\u001c'f]\u001e$\b\u000e\u0006\u0002o_6\t\u0001\u0001C\u0003q\u000f\u0001\u0007\u0011/A\u0003wC2,X\r\u0005\u0002sg6\tQ'\u0003\u0002uk\t\u0019\u0011J\u001c;)\u0007\u001d\u0001f+A\thKRl\u0015N\u001c+pW\u0016tG*\u001a8hi\",\u0012!\u001d\u0015\u0004\u0011A3\u0016\u0001B4baN,\u0012a\u001f\t\u0003IrL!!`3\u0003\u0019\t{w\u000e\\3b]B\u000b'/Y7)\u0007%\u0001f+A\u0003hCB\u001c\b\u0005K\u0002\u000b!Z\u000bqa]3u\u000f\u0006\u00048\u000fF\u0002o\u0003\u000fAa\u0001]\u0006A\u0002\u0005%\u0001c\u0001:\u0002\f%\u0019\u0011QB\u001b\u0003\u000f\t{w\u000e\\3b]\"\u001a1\u0002\u0015,\u0002\u000f\u001d,GoR1qgV\u0011\u0011\u0011\u0002\u0015\u0004\u0019A3\u0016a\u00029biR,'O\\\u000b\u0003\u00037\u0001B\u0001ZA\u000f_%\u0019\u0011qD3\u0003\u000bA\u000b'/Y7)\u00075\u0001f+\u0001\u0005qCR$XM\u001d8!Q\rq\u0001KV\u0001\u000bg\u0016$\b+\u0019;uKJtGc\u00018\u0002,!)\u0001o\u0004a\u0001_!\u001aq\u0002\u0015,\u0002\u0015\u001d,G\u000fU1ui\u0016\u0014h\u000eK\u0002\u0011!Z\u000b1\u0002^8M_^,'oY1tK\"\"\u0011\u0003UA\u001cC\t\tI$A\u00032]Yr\u0003'\u0001\u0007u_2{w/\u001a:dCN,\u0007\u0005\u000b\u0003\u0013!\u0006]\u0012AD:fiR{Gj\\<fe\u000e\f7/\u001a\u000b\u0004]\u0006\r\u0003B\u00029\u0014\u0001\u0004\tI\u0001\u000b\u0003\u0014!\u0006]\u0012AD4fiR{Gj\\<fe\u000e\f7/\u001a\u0015\u0005)A\u000b9$A\nde\u0016\fG/\u001a+sC:\u001chm\u001c:n\rVt7-\u0006\u0002\u0002PA)!/!\u00150y%\u0019\u00111K\u001b\u0003\u0013\u0019+hn\u0019;j_:\f\u0014!\u0005<bY&$\u0017\r^3J]B,H\u000fV=qKR!\u0011\u0011LA0!\r\u0011\u00181L\u0005\u0004\u0003;*$\u0001B+oSRDq!!\u0019\u0017\u0001\u0004\t\u0019'A\u0005j]B,H\u000fV=qKB!\u0011QMA8\u001b\t\t9G\u0003\u0003\u0002j\u0005-\u0014!\u0002;za\u0016\u001c(bAA7G\u0005\u00191/\u001d7\n\t\u0005E\u0014q\r\u0002\t\t\u0006$\u0018\rV=qK\u0006qq.\u001e;qkR$\u0015\r^1UsB,WCAA2\u0003\u0011\u0019w\u000e]=\u0015\u0007\u0015\u000bY\bC\u0004\u0002~a\u0001\r!a \u0002\u000b\u0015DHO]1\u0011\u0007\u0011\f\t)C\u0002\u0002\u0004\u0016\u0014\u0001\u0002U1sC6l\u0015\r\u001d\u0015\u00051A\u000b9)\t\u0002\u0002\n\u0006)\u0011G\f\u001b/c!\u001a\u0001\u0001\u0015,\u0002\u001dI+w-\u001a=U_.,g.\u001b>feB\u0011aIG\n\b5\u0005M\u0015\u0011TAP!\r\u0011\u0018QS\u0005\u0004\u0003/+$AB!osJ+g\r\u0005\u0003I\u00037+\u0015bAAO\u0013\n)B)\u001a4bk2$\b+\u0019:b[N\u0014V-\u00193bE2,\u0007c\u0001:\u0002\"&\u0019\u00111U\u001b\u0003\u0019M+'/[1mSj\f'\r\\3\u0015\u0005\u0005=\u0015\u0001\u00027pC\u0012$2!RAV\u0011\u0019\ti\u000b\ba\u0001_\u0005!\u0001/\u0019;iQ\u0011a\u0002+a\u000e\u0002\u0017I,\u0017\r\u001a*fg>dg/\u001a\u000b\u0003\u0003k\u0003B!a.\u0002B6\u0011\u0011\u0011\u0018\u0006\u0005\u0003w\u000bi,\u0001\u0003mC:<'BAA`\u0003\u0011Q\u0017M^1\n\t\u0005\r\u0017\u0011\u0018\u0002\u0007\u001f\nTWm\u0019;)\ti\u0001\u0016q\u0007\u0015\u00053A\u000b9\u0004")
public class RegexTokenizer
extends UnaryTransformer<String, Seq<String>, RegexTokenizer>
implements DefaultParamsWritable {
    private final String uid;
    private final IntParam minTokenLength;
    private final BooleanParam gaps;
    private final Param<String> pattern;
    private final BooleanParam toLowercase;

    public static RegexTokenizer load(String string) {
        return RegexTokenizer$.MODULE$.load(string);
    }

    public static MLReader<RegexTokenizer> read() {
        return RegexTokenizer$.MODULE$.read();
    }

    @Override
    public MLWriter write() {
        return DefaultParamsWritable.write$(this);
    }

    @Override
    public void save(String path) throws IOException {
        MLWritable.save$(this, path);
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public IntParam minTokenLength() {
        return this.minTokenLength;
    }

    public RegexTokenizer setMinTokenLength(int value) {
        return (RegexTokenizer)this.set(this.minTokenLength(), BoxesRunTime.boxToInteger((int)value));
    }

    public int getMinTokenLength() {
        return BoxesRunTime.unboxToInt((Object)this.$(this.minTokenLength()));
    }

    public BooleanParam gaps() {
        return this.gaps;
    }

    public RegexTokenizer setGaps(boolean value) {
        return (RegexTokenizer)this.set(this.gaps(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getGaps() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.gaps()));
    }

    public Param<String> pattern() {
        return this.pattern;
    }

    public RegexTokenizer setPattern(String value) {
        return (RegexTokenizer)this.set(this.pattern(), value);
    }

    public String getPattern() {
        return this.$(this.pattern());
    }

    public final BooleanParam toLowercase() {
        return this.toLowercase;
    }

    public RegexTokenizer setToLowercase(boolean value) {
        return (RegexTokenizer)this.set(this.toLowercase(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getToLowercase() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.toLowercase()));
    }

    @Override
    public Function1<String, Seq<String>> createTransformFunc() {
        return (Function1 & Serializable & scala.Serializable)originStr -> {
            Regex re = new StringOps(Predef$.MODULE$.augmentString(this.$(this.pattern()))).r();
            String str = BoxesRunTime.unboxToBoolean((Object)this.$(this.toLowercase())) ? originStr.toLowerCase() : originStr;
            Seq tokens = BoxesRunTime.unboxToBoolean((Object)this.$(this.gaps())) ? new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])re.split((CharSequence)str))).toSeq() : re.findAllIn((CharSequence)str).toSeq();
            int minLength = BoxesRunTime.unboxToInt((Object)this.$(this.minTokenLength()));
            return (Seq)tokens.filter((Function1 & Serializable & scala.Serializable)x$2 -> BoxesRunTime.boxToBoolean((boolean)RegexTokenizer.$anonfun$createTransformFunc$3(minLength, x$2)));
        };
    }

    @Override
    public void validateInputType(DataType inputType) {
        DataType dataType = inputType;
        StringType$ stringType$ = StringType$.MODULE$;
        Predef$.MODULE$.require(!(dataType != null ? !dataType.equals(stringType$) : stringType$ != null), (Function0 & Serializable & scala.Serializable)() -> new StringBuilder(40).append("Input type must be string type but got ").append(inputType).append(".").toString());
    }

    @Override
    public DataType outputDataType() {
        return new ArrayType((DataType)StringType$.MODULE$, true);
    }

    @Override
    public RegexTokenizer copy(ParamMap extra) {
        return (RegexTokenizer)this.defaultCopy(extra);
    }

    public static final /* synthetic */ boolean $anonfun$createTransformFunc$3(int minLength$1, String x$2) {
        return x$2.length() >= minLength$1;
    }

    public RegexTokenizer(String uid) {
        this.uid = uid;
        MLWritable.$init$(this);
        DefaultParamsWritable.$init$(this);
        this.minTokenLength = new IntParam((Identifiable)this, "minTokenLength", "minimum token length (>= 0)", (Function1<Object, Object>)ParamValidators$.MODULE$.gtEq(0.0));
        this.gaps = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens");
        this.pattern = new Param(this, "pattern", "regex pattern used for tokenizing");
        this.toLowercase = new BooleanParam(this, "toLowercase", "whether to convert all characters to lowercase before tokenizing.");
        this.setDefault((Seq<ParamPair<?>>)Predef$.MODULE$.wrapRefArray((Object[])new ParamPair[]{this.minTokenLength().$minus$greater(BoxesRunTime.boxToInteger((int)1)), this.gaps().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true)), this.pattern().$minus$greater("\\s+"), this.toLowercase().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true))}));
    }

    public RegexTokenizer() {
        this(Identifiable$.MODULE$.randomUID("regexTok"));
    }
}

