package com.hankcs.hanlp.classification.classifiers;

import com.hankcs.hanlp.classification.models.NaiveBayesModel;
import com.hankcs.hanlp.classification.utilities.TextProcessUtility;
import com.hankcs.hanlp.corpus.io.IOUtil;
import java.util.Iterator;
import java.util.Map;
import junit.framework.TestCase;

/* loaded from: input_file:com/hankcs/hanlp/classification/classifiers/NaiveBayesClassifierTest.class */
public class NaiveBayesClassifierTest extends TestCase {
    private static final String MODEL_PATH = "data/test/classification.ser";
    private Map<String, String[]> trainingDataSet;

    private void loadDataSet() {
        if (this.trainingDataSet != null) {
            return;
        }
        System.out.printf("正在从 %s 中加载分类语料...\n", "data/test/搜狗文本分类语料库迷你版");
        this.trainingDataSet = TextProcessUtility.loadCorpus("data/test/搜狗文本分类语料库迷你版");
        for (Map.Entry<String, String[]> entry : this.trainingDataSet.entrySet()) {
            System.out.printf("%s : %d 个文档\n", entry.getKey(), Integer.valueOf(entry.getValue().length));
        }
    }

    public void testTrain() throws Exception {
        loadDataSet();
        NaiveBayesClassifier naiveBayesClassifier = new NaiveBayesClassifier();
        long currentTimeMillis = System.currentTimeMillis();
        System.out.println("开始训练...");
        naiveBayesClassifier.train(this.trainingDataSet);
        System.out.printf("训练耗时：%d ms\n", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
        IOUtil.saveObjectTo(naiveBayesClassifier.getNaiveBayesModel(), MODEL_PATH);
    }

    public void testPredictAndAccuracy() throws Exception {
        NaiveBayesModel naiveBayesModel = (NaiveBayesModel) IOUtil.readObjectFrom(MODEL_PATH);
        if (naiveBayesModel == null) {
            testTrain();
            naiveBayesModel = (NaiveBayesModel) IOUtil.readObjectFrom(MODEL_PATH);
        }
        NaiveBayesClassifier naiveBayesClassifier = new NaiveBayesClassifier(naiveBayesModel);
        String readTxt = IOUtil.readTxt("data/test/搜狗文本分类语料库迷你版/财经/12.txt");
        System.out.printf("《%s》 属于分类 【%s】\n", readTxt.split("\\n")[0].replaceAll("\\s", ""), naiveBayesClassifier.classify(readTxt));
        System.out.printf("《%s》 属于分类 【%s】\n", "2016年中国铁路完成固定资产投资将达8000亿元", naiveBayesClassifier.classify("2016年中国铁路完成固定资产投资将达8000亿元"));
        System.out.printf("《%s》 属于分类 【%s】\n", "国安2016赛季年票开售比赛场次减少套票却上涨", naiveBayesClassifier.classify("国安2016赛季年票开售比赛场次减少套票却上涨"));
        int i = 0;
        int i2 = 0;
        loadDataSet();
        long currentTimeMillis = System.currentTimeMillis();
        System.out.println("开始评测...");
        for (Map.Entry<String, String[]> entry : this.trainingDataSet.entrySet()) {
            String key = entry.getKey();
            String[] value = entry.getValue();
            i += value.length;
            for (String str : value) {
                if (key.equals(naiveBayesClassifier.classify(str))) {
                    i2++;
                }
            }
        }
        System.out.printf("准确率 %d / %d = %.2f%%\n速度 %.2f 文档/秒", Integer.valueOf(i2), Integer.valueOf(i), Double.valueOf((i2 / i) * 100.0d), Double.valueOf((i / (System.currentTimeMillis() - currentTimeMillis)) * 1000.0d));
    }

    public void testPredict() throws Exception {
        NaiveBayesModel naiveBayesModel = (NaiveBayesModel) IOUtil.readObjectFrom(MODEL_PATH);
        if (naiveBayesModel == null) {
            testTrain();
            naiveBayesModel = (NaiveBayesModel) IOUtil.readObjectFrom(MODEL_PATH);
        }
        Iterator<Map.Entry<String, Double>> it = new NaiveBayesClassifier(naiveBayesModel).predict("国安2016赛季年票开售比赛场次减少套票却上涨").entrySet().iterator();
        while (it.hasNext()) {
            System.out.println(it.next());
        }
    }
}
