/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.model.crf;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.model.crf.CRFTagger;
import com.hankcs.hanlp.model.crf.FeatureTemplate;
import com.hankcs.hanlp.model.crf.crfpp.FeatureIndex;
import com.hankcs.hanlp.model.perceptron.PerceptronSegmenter;
import com.hankcs.hanlp.model.perceptron.feature.FeatureMap;
import com.hankcs.hanlp.model.perceptron.instance.CWSInstance;
import com.hankcs.hanlp.tokenizer.lexical.Segmenter;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

public class CRFSegmenter
extends CRFTagger
implements Segmenter {
    private PerceptronSegmenter perceptronSegmenter;

    public CRFSegmenter() throws IOException {
        this(HanLP.Config.CRFCWSModelPath);
    }

    public CRFSegmenter(String modelPath) throws IOException {
        super(modelPath);
        if (modelPath != null) {
            this.perceptronSegmenter = new PerceptronSegmenter(this.model);
        }
    }

    @Override
    protected void convertCorpus(Sentence sentence, BufferedWriter bw) throws IOException {
        for (Word w : sentence.toSimpleWordList()) {
            String word = CharTable.convert(w.value);
            if (word.length() == 1) {
                bw.write(word);
                bw.write(9);
                bw.write(83);
                bw.write(10);
                continue;
            }
            bw.write(word.charAt(0));
            bw.write(9);
            bw.write(66);
            bw.write(10);
            for (int i = 1; i < word.length() - 1; ++i) {
                bw.write(word.charAt(i));
                bw.write(9);
                bw.write(77);
                bw.write(10);
            }
            bw.write(word.charAt(word.length() - 1));
            bw.write(9);
            bw.write(69);
            bw.write(10);
        }
    }

    @Override
    public List<String> segment(String text) {
        LinkedList<String> wordList = new LinkedList<String>();
        this.segment(text, CharTable.convert(text), wordList);
        return wordList;
    }

    @Override
    public void segment(String text, String normalized, List<String> wordList) {
        this.perceptronSegmenter.segment(text, this.createInstance(normalized), wordList);
    }

    private CWSInstance createInstance(String text) {
        final FeatureTemplate[] featureTemplateArray = this.model.getFeatureTemplateArray();
        return new CWSInstance(text, this.model.featureMap){

            @Override
            protected int[] extractFeature(String sentence, FeatureMap featureMap, int position) {
                StringBuilder sbFeature = new StringBuilder();
                LinkedList<Integer> featureVec = new LinkedList<Integer>();
                for (int i = 0; i < featureTemplateArray.length; ++i) {
                    Iterator<int[]> offsetIterator = featureTemplateArray[i].offsetList.iterator();
                    Iterator<String> delimiterIterator = featureTemplateArray[i].delimiterList.iterator();
                    delimiterIterator.next();
                    while (offsetIterator.hasNext()) {
                        int offset = offsetIterator.next()[0] + position;
                        if (offset < 0) {
                            sbFeature.append(FeatureIndex.BOS[-(offset + 1)]);
                        } else if (offset >= sentence.length()) {
                            sbFeature.append(FeatureIndex.EOS[offset - sentence.length()]);
                        } else {
                            sbFeature.append(sentence.charAt(offset));
                        }
                        if (delimiterIterator.hasNext()) {
                            sbFeature.append(delimiterIterator.next());
                            continue;
                        }
                        sbFeature.append(i);
                    }
                    1.addFeatureThenClear(sbFeature, featureVec, featureMap);
                }
                return 1.toFeatureArray(featureVec);
            }
        };
    }

    @Override
    protected String getDefaultFeatureTemplate() {
        return "# Unigram\nU0:%x[-1,0]\nU1:%x[0,0]\nU2:%x[1,0]\nU3:%x[-2,0]%x[-1,0]\nU4:%x[-1,0]%x[0,0]\nU5:%x[0,0]%x[1,0]\nU6:%x[1,0]%x[2,0]\n\n# Bigram\nB";
    }
}

