package com.mayabot.nlp.segment.pipeline;

import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.mayabot.nlp.segment.CharNormalize;
import com.mayabot.nlp.segment.Lexer;
import com.mayabot.nlp.segment.Nature;
import com.mayabot.nlp.segment.WordSplitAlgorithm;
import com.mayabot.nlp.segment.WordTerm;
import com.mayabot.nlp.segment.WordpathProcessor;
import com.mayabot.nlp.segment.plugins.collector.WordTermCollector;
import com.mayabot.nlp.segment.wordnet.BestPathAlgorithm;
import com.mayabot.nlp.segment.wordnet.Wordnet;
import com.mayabot.nlp.segment.wordnet.Wordpath;
import com.mayabot.nlp.utils.Characters;
import com.mayabot.nlp.utils.StringUtils;
import java.util.List;
import java.util.function.Consumer;
import java.util.stream.Collectors;

/* loaded from: input_file:com/mayabot/nlp/segment/pipeline/PipelineLexer.class */
public class PipelineLexer implements Lexer {
    private BestPathAlgorithm bestPathAlgorithm;
    private WordTermCollector collector;
    private CharNormalize[] charNormalizes;
    private WordSplitAlgorithm[] initer;
    private WordpathProcessor[] pipeline;

    public static PipelineLexerBuilder builder() {
        return new PipelineLexerBuilder();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public PipelineLexer(List<WordSplitAlgorithm> list, WordpathProcessor[] wordpathProcessorArr, BestPathAlgorithm bestPathAlgorithm, WordTermCollector wordTermCollector, List<CharNormalize> list2) {
        this.initer = (WordSplitAlgorithm[]) list.toArray(new WordSplitAlgorithm[0]);
        this.pipeline = wordpathProcessorArr;
        this.bestPathAlgorithm = bestPathAlgorithm;
        this.collector = wordTermCollector;
        this.charNormalizes = (CharNormalize[]) list2.toArray(new CharNormalize[0]);
        Preconditions.checkNotNull(bestPathAlgorithm);
        Preconditions.checkNotNull(this.initer);
        Preconditions.checkNotNull(wordpathProcessorArr);
    }

    @Override // com.mayabot.nlp.segment.Lexer
    public void scan(char[] cArr, Consumer<WordTerm> consumer) {
        if (this.charNormalizes != null) {
            for (CharNormalize charNormalize : this.charNormalizes) {
                charNormalize.normal(cArr);
            }
        }
        if (cArr.length == 0) {
            return;
        }
        if (cArr.length == 1 && StringUtils.isWhiteSpace(cArr[0])) {
            if (StringUtils.isWhiteSpace(cArr[0]) || Characters.isPunctuation(cArr[0])) {
                consumer.accept(new WordTerm(new String(cArr), Nature.w));
                return;
            } else {
                consumer.accept(new WordTerm(new String(cArr), Nature.x));
                return;
            }
        }
        Wordnet wordnet = new Wordnet(cArr);
        for (WordSplitAlgorithm wordSplitAlgorithm : this.initer) {
            wordSplitAlgorithm.fill(wordnet);
        }
        wordnet.fillNill();
        Wordpath select = this.bestPathAlgorithm.select(wordnet);
        for (WordpathProcessor wordpathProcessor : this.pipeline) {
            if (wordpathProcessor.isEnabled()) {
                select = wordpathProcessor.process(select);
            }
        }
        this.collector.collect(wordnet, select, consumer);
    }

    public List<WordpathProcessor> getPipeline() {
        return ImmutableList.copyOf(this.pipeline);
    }

    public WordTermCollector getCollector() {
        return this.collector;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("PipelineTokenizer\n\n");
        sb.append("BestPathAlgorithm = " + this.bestPathAlgorithm.getClass().getSimpleName()).append("\n");
        sb.append("CharNormalize = " + Joiner.on(",").join((Iterable) Lists.newArrayList(this.charNormalizes).stream().map(charNormalize -> {
            return charNormalize.getClass().getSimpleName();
        }).collect(Collectors.toList()))).append("\n");
        sb.append("WordTermCollector = " + this.collector.getClass().getSimpleName() + "\n");
        sb.append("WordSplitAlgorithm = " + Joiner.on(",").join((Iterable) Lists.newArrayList(this.initer).stream().map(wordSplitAlgorithm -> {
            return wordSplitAlgorithm.getClass().getSimpleName();
        }).collect(Collectors.toList()))).append("\n");
        sb.append("WordpathProcessor = \n");
        for (WordpathProcessor wordpathProcessor : this.pipeline) {
            sb.append("\t" + wordpathProcessor.getClass().getSimpleName()).append("\n");
        }
        return sb.toString();
    }
}
