package edu.umass.cs.mallet.base.classify.examples;

import edu.umass.cs.mallet.base.classify.Classifier;
import edu.umass.cs.mallet.base.classify.NaiveBayesTrainer;
import edu.umass.cs.mallet.base.pipe.CharSequence2TokenSequence;
import edu.umass.cs.mallet.base.pipe.CharSubsequence;
import edu.umass.cs.mallet.base.pipe.FeatureSequence2FeatureVector;
import edu.umass.cs.mallet.base.pipe.Input2CharSequence;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.PrintInputAndTarget;
import edu.umass.cs.mallet.base.pipe.SerialPipes;
import edu.umass.cs.mallet.base.pipe.Target2Label;
import edu.umass.cs.mallet.base.pipe.TokenSequence2FeatureSequence;
import edu.umass.cs.mallet.base.pipe.TokenSequenceLowercase;
import edu.umass.cs.mallet.base.pipe.TokenSequenceRemoveStopwords;
import edu.umass.cs.mallet.base.pipe.iterator.FileIterator;
import edu.umass.cs.mallet.base.types.InstanceList;
import java.io.File;

/* loaded from: input_file:WEB-INF/lib/mallet-0.4-jaeschke.jar:edu/umass/cs/mallet/base/classify/examples/DocumentClassifier.class */
public class DocumentClassifier {
    public static void main(String[] strArr) {
        File[] fileArr = new File[strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            fileArr[i] = new File(strArr[i]);
        }
        InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[]{new Target2Label(), new Input2CharSequence(), new CharSubsequence(CharSubsequence.SKIP_HEADER), new CharSequence2TokenSequence(), new TokenSequenceLowercase(), new TokenSequenceRemoveStopwords(), new TokenSequence2FeatureSequence(), new FeatureSequence2FeatureVector(), new PrintInputAndTarget()}));
        instanceList.add(new FileIterator(fileArr, FileIterator.STARTING_DIRECTORIES));
        InstanceList[] split = instanceList.split(new double[]{0.5d, 0.5d});
        Classifier train = new NaiveBayesTrainer().train(split[0]);
        System.out.println("The training accuracy is " + train.getAccuracy(split[0]));
        System.out.println("The testing accuracy is " + train.getAccuracy(split[1]));
    }
}
