package edu.umass.cs.mallet.util.bibsonomy;

import bsh.EvalError;
import edu.umass.cs.mallet.base.fst.CRF4;
import edu.umass.cs.mallet.base.fst.InstanceAccuracyEvaluator;
import edu.umass.cs.mallet.base.fst.PerClassAccuracyEvaluator;
import edu.umass.cs.mallet.base.fst.TokenAccuracyEvaluator;
import edu.umass.cs.mallet.base.fst.Transducer;
import edu.umass.cs.mallet.base.pipe.Input2CharSequence;
import edu.umass.cs.mallet.base.pipe.Noop;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.SGML2TokenSequence;
import edu.umass.cs.mallet.base.pipe.SerialPipes;
import edu.umass.cs.mallet.base.pipe.Target2LabelSequence;
import edu.umass.cs.mallet.base.pipe.TokenSequence2FeatureVectorSequence;
import edu.umass.cs.mallet.base.pipe.iterator.AbstractPipeInputIterator;
import edu.umass.cs.mallet.base.pipe.iterator.LineGroupIterator;
import edu.umass.cs.mallet.base.pipe.tsf.LexiconMembership;
import edu.umass.cs.mallet.base.pipe.tsf.OffsetConjunctions;
import edu.umass.cs.mallet.base.pipe.tsf.RegexMatches;
import edu.umass.cs.mallet.base.pipe.tsf.TokenText;
import edu.umass.cs.mallet.base.types.Alphabet;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.types.Sequence;
import edu.umass.cs.mallet.base.types.Token;
import edu.umass.cs.mallet.base.types.TokenSequence;
import edu.umass.cs.mallet.base.util.CharSequenceLexer;
import edu.umass.cs.mallet.base.util.CommandOption;
import edu.umass.cs.mallet.base.util.MalletLogger;
import edu.umass.cs.mallet.util.bibsonomy.clustering.LineGroupIterator2;
import edu.umass.cs.mallet.util.bibsonomy.clustering.SGML2FieldsPipe;
import gnu.trove.TIntArrayList;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.lucene.index.IndexWriter;
import org.bibsonomy.model.util.BibTexUtils;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF.class */
public class TUI_IE_CRF extends BaseTUICRF {
    private static String separator;
    private static final int MIN_CLUSTER_SIZE = 3;
    private static String[] SEPARATORS = {"<NEW_HEADER>", "<NEWREFERENCE>"};
    private static final Logger logger = MalletLogger.getLogger(TUI_IE_CRF.class.getName());
    static CommandOption.File crfInputFileOption = new CommandOption.File(TUI_IE_CRF.class, "crf-input-file", "FILENAME", true, null, "The name of the file to read the trained CRF for testing.", null);
    static CommandOption.File inputFileOption = new CommandOption.File(TUI_IE_CRF.class, "input-file", "FILENAME", true, null, "The name of the file containing the testing data.", null);
    static CommandOption.Integer headOrRefOption = new CommandOption.Integer(TUI_IE_CRF.class, "head-or-ref", "INTEGER", true, 0, "0 for header, 1 for reference", null);
    static CommandOption.Integer nBestChoice = new CommandOption.Integer(TUI_IE_CRF.class, "nbestchoice", "INTEGER", true, 1, "N for N-best", null);
    static CommandOption.Boolean includeBibtexLexicons = new CommandOption.Boolean(TUI_IE_CRF.class, "include-bibtex-lexicons", "INTEGER", true, false, "Whether to use BibTeX lexicons from Fuchun.", null);
    static CommandOption.Boolean excludingSingletons = new CommandOption.Boolean(TUI_IE_CRF.class, "exclude-singletons", "boolean", true, true, "excluding singletons.", null);
    static CommandOption.Boolean useClusterFeatures = new CommandOption.Boolean(TUI_IE_CRF.class, "use-cluster-features", "boolean", true, true, "excluding singletons.", null);
    static CommandOption.Boolean useNegativeClusterFeatures = new CommandOption.Boolean(TUI_IE_CRF.class, "use-negative-cluster-features", "boolean", true, false, "Whether to use features that say words AREN'T tagged in cluster.", null);
    static CommandOption.Boolean useNumClusterOccurences = new CommandOption.Boolean(TUI_IE_CRF.class, "use-cluster-occurrences", "boolean", true, false, "Whether to use number of tagged word occurrences in cluster as features.", null);
    static CommandOption.Boolean useBogusClusterFeatures = new CommandOption.Boolean(TUI_IE_CRF.class, "use-bogus-cluster-features", "boolean", true, false, "If true, use features from the instance's true segmentation.", null);
    static CommandOption.Boolean useSparseWeights = new CommandOption.Boolean(TUI_IE_CRF.class, "use-sparse-weights", "boolean", true, false, "If true, use only input features that appear in training set.", null);
    static CommandOption.Integer clusterFeatureMinimum = new CommandOption.Integer(TUI_IE_CRF.class, "cluster-feature-minimum", "INTEGER", true, 2, "Minimum number of coreferent citations that need to agree to create a cluster feature.", null);
    static CommandOption.Integer clusterSizeLimit = new CommandOption.Integer(TUI_IE_CRF.class, "cluster-size-limit", "INTEGER", true, IndexWriter.DEFAULT_MAX_FIELD_LENGTH, "cluster Size Limit", null);
    static CommandOption.Integer methodChoice = new CommandOption.Integer(TUI_IE_CRF.class, "methodchoice", "INTEGER", true, 1, "method for canonical citation creation", null);
    static CommandOption.Integer markovOrder = new CommandOption.Integer(TUI_IE_CRF.class, "markov-order", "INTEGER", true, 0, "0 = states for all transitions, 1 = half labels, 2 = three-quarter labels", null);
    static CommandOption.Integer numRepsOption = new CommandOption.Integer(TUI_IE_CRF.class, "num-reps", "INTEGER", true, 5, "Number of random test-training splits to try.", null);
    static String refNoMeta = "reference_no=";
    static String clusterNoMeta = "cluster_no=";
    static String[] FIELD_NAMES = {"author", "title", BibTexUtils.ADDITIONAL_MISC_FIELD_DATE, "publisher", "location", "pages", "institution", "editor", "volume", "note", "booktitle", "tech", "journal"};
    static String[] startTags = {"<author>", "<title>", "<date>", "<publisher>", "<location>", "<pages>", "<institution>", "<editor>", "<volume>", "<note>", "<booktitle>", "<tech>", "<journal>"};
    static String[] endTags = {"</author>", "</title>", "</date>", "</publisher>", "</location>", "</pages>", "</institution>", "</editor>", "</volume>", "</note>", "</booktitle>", "</tech>", "</journal>"};
    static double[] tagWeight = {1.0d, 1.0d, 0.5d, 1.0d, 1.0d, 1.0d, 1.0d, 1.0d, 1.0d, 1.0d, 1.0d, 1.0d, 1.0d};
    static int NumFields = 10;
    private static String CAPS = "[A-Z������]";
    private static String ALPHA = "[A-Z������a-z�������]";
    private static String ALPHANUM = "[A-Z������a-z�������0-9]";
    private static String PUNT = "[,\\.;:?!()]";
    private static String bibtexLexDir = "/usr/col/tmp1/casutton/resources/fuchun/";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$AddClusterPropertyPipe.class */
    public static class AddClusterPropertyPipe extends Pipe {
        private static final long serialVersionUID = -6552875329899979457L;

        private AddClusterPropertyPipe() {
        }

        @Override // edu.umass.cs.mallet.base.pipe.Pipe
        public Instance pipe(Instance instance) {
            instance.setProperty("CLUSTER", instance.getSource());
            return instance;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$AllClusterSegmentation.class */
    public static class AllClusterSegmentation {
        Map inst2segmentation = new HashMap();
        Map inst2cluster = new HashMap();

        public AllClusterSegmentation(InstanceList instanceList, Pipe pipe) {
            InstanceList instanceList2 = new InstanceList(pipe);
            instanceList2.add(new ClusterListIterator(instanceList));
            InstanceList.Iterator it2 = instanceList2.iterator();
            while (it2.hasNext()) {
                Instance instance = (Instance) it2.next();
                this.inst2segmentation.put(instance.getName(), new Segmentation((TokenSequence) instance.getData(), (Sequence) instance.getTarget()));
                this.inst2cluster.put(instance.getName(), (InstanceList) instance.getProperty("CLUSTER"));
            }
        }

        public InstanceList getCluster(Instance instance) {
            return (InstanceList) this.inst2cluster.get(instance.getName());
        }

        public Segmentation getSegmentation(Instance instance) {
            return (Segmentation) this.inst2segmentation.get(instance.getName());
        }

        public void print() {
            for (Object obj : this.inst2cluster.keySet()) {
                System.out.println("Instance " + obj + "\n  " + ((Segmentation) this.inst2segmentation.get(obj)) + "\n  " + ((InstanceList) this.inst2cluster.get(obj)));
            }
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$BogusClusterPipe.class */
    public static class BogusClusterPipe extends Pipe {
        private static Pattern EXCLUDE = Pattern.compile("\\p{Punct}");
        private AllClusterSegmentation segmentation;

        BogusClusterPipe(AllClusterSegmentation allClusterSegmentation) {
            this.segmentation = allClusterSegmentation;
        }

        @Override // edu.umass.cs.mallet.base.pipe.Pipe
        public Instance pipe(Instance instance) {
            TokenSequence tokenSequence = (TokenSequence) instance.getData();
            InstanceList.Iterator it2 = this.segmentation.getCluster(instance).iterator();
            while (it2.hasNext()) {
                Instance instance2 = (Instance) it2.next();
                String str = instance2.getName().equals(instance.getName()) ? "I_AM_TAGGED_AS_" : "TAGGED_AS_";
                Segmentation segmentation = this.segmentation.getSegmentation(instance2);
                for (int i = 0; i < tokenSequence.size(); i++) {
                    Token token = tokenSequence.getToken(i);
                    String text = token.getText();
                    if (!EXCLUDE.matcher(text).matches()) {
                        for (String str2 : segmentation.fieldNamesForWord(text)) {
                            String intern = (str + str2).intern();
                            if (token.getFeatureValue(intern) == Transducer.ZERO_COST) {
                                token.setFeatureValue(intern, 1.0d);
                            }
                        }
                    }
                }
            }
            return instance;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$ClusterListIterator.class */
    public static class ClusterListIterator extends AbstractPipeInputIterator {
        private Iterator perClusterIterator;
        private Iterator withinClusterIterator;
        private InstanceList currentCluster;

        public ClusterListIterator(InstanceList instanceList) {
            this.perClusterIterator = instanceList.iterator();
            nextCluster();
        }

        @Override // edu.umass.cs.mallet.base.pipe.iterator.AbstractPipeInputIterator, edu.umass.cs.mallet.base.pipe.iterator.PipeInputIterator
        public Instance nextInstance() {
            while (!this.withinClusterIterator.hasNext()) {
                nextCluster();
            }
            Instance instance = (Instance) this.withinClusterIterator.next();
            instance.setSource(this.currentCluster);
            return instance;
        }

        private void nextCluster() {
            this.currentCluster = (InstanceList) ((Instance) this.perClusterIterator.next()).getData();
            this.withinClusterIterator = this.currentCluster.iterator();
        }

        @Override // edu.umass.cs.mallet.base.pipe.iterator.AbstractPipeInputIterator, java.util.Iterator
        public boolean hasNext() {
            return this.perClusterIterator.hasNext() || this.withinClusterIterator.hasNext();
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$NegativeClusterFeaturePipe.class */
    public static class NegativeClusterFeaturePipe extends Pipe {
        private static Pattern EXCLUDE = Pattern.compile("\\p{Punct}");
        private AllClusterSegmentation segmentation;

        NegativeClusterFeaturePipe(AllClusterSegmentation allClusterSegmentation) {
            this.segmentation = allClusterSegmentation;
        }

        @Override // edu.umass.cs.mallet.base.pipe.Pipe
        public Instance pipe(Instance instance) {
            InstanceList cluster = this.segmentation.getCluster(instance);
            TokenSequence tokenSequence = (TokenSequence) instance.getData();
            if (cluster.size() < 3) {
                return instance;
            }
            for (int i = 0; i < tokenSequence.size(); i++) {
                Token token = tokenSequence.getToken(i);
                String text = token.getText();
                if (!EXCLUDE.matcher(text).matches()) {
                    boolean[] zArr = new boolean[TUI_IE_CRF.FIELD_NAMES.length];
                    InstanceList.Iterator it2 = cluster.iterator();
                    while (it2.hasNext()) {
                        Instance instance2 = (Instance) it2.next();
                        if (!instance2.getName().equals(instance.getName())) {
                            for (int i2 : this.segmentation.getSegmentation(instance2).fieldIdsForWord(text)) {
                                zArr[i2] = true;
                            }
                        }
                    }
                    for (int i3 = 0; i3 < zArr.length; i3++) {
                        if (!zArr[i3]) {
                            String intern = ("NEVER_TAGGED_AS_" + TUI_IE_CRF.FIELD_NAMES[i3]).intern();
                            if (token.getFeatureValue(intern) == Transducer.ZERO_COST) {
                                token.setFeatureValue(intern, 1.0d);
                            }
                        }
                    }
                }
            }
            return instance;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$NumAppearancesInClusterPipe.class */
    public static class NumAppearancesInClusterPipe extends Pipe {
        private static Pattern EXCLUDE = Pattern.compile("\\p{Punct}");
        private AllClusterSegmentation segmentation;

        NumAppearancesInClusterPipe(AllClusterSegmentation allClusterSegmentation) {
            this.segmentation = allClusterSegmentation;
        }

        @Override // edu.umass.cs.mallet.base.pipe.Pipe
        public Instance pipe(Instance instance) {
            TokenSequence tokenSequence = (TokenSequence) instance.getData();
            InstanceList.Iterator it2 = this.segmentation.getCluster(instance).iterator();
            while (it2.hasNext()) {
                Instance instance2 = (Instance) it2.next();
                if (!instance2.getName().equals(instance.getName())) {
                    Segmentation segmentation = this.segmentation.getSegmentation(instance2);
                    for (int i = 0; i < tokenSequence.size(); i++) {
                        Token token = tokenSequence.getToken(i);
                        String text = token.getText();
                        if (!EXCLUDE.matcher(text).matches()) {
                            for (String str : segmentation.fieldNamesForWord(text)) {
                                token.setFeatureValue(("TAGGED_AS_" + str).intern(), 1.0d);
                            }
                        }
                    }
                }
            }
            return instance;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$Segmentation.class */
    public static class Segmentation {
        private Sequence output;
        private TokenSequence input;
        static String background;
        static final /* synthetic */ boolean $assertionsDisabled;

        public Segmentation(TokenSequence tokenSequence, Sequence sequence) {
            this.input = tokenSequence;
            this.output = sequence;
        }

        public String[] getFieldNames() {
            return TUI_IE_CRF.FIELD_NAMES;
        }

        public String[] fieldNamesForWord(String str) {
            HashSet hashSet = new HashSet();
            if (!$assertionsDisabled && this.input.size() != this.output.size()) {
                throw new AssertionError();
            }
            for (int i = 0; i < this.input.size(); i++) {
                if (this.input.getToken(i).getText().equals(str)) {
                    hashSet.add(this.output.get(i).toString());
                }
            }
            return (String[]) hashSet.toArray(new String[hashSet.size()]);
        }

        public String toString() {
            return "SEGMENTATION\n  input:\n" + this.input + "\n  output:\n" + this.output;
        }

        public int[] fieldIdsForWord(String str) {
            TIntArrayList tIntArrayList = new TIntArrayList();
            String[] fieldNamesForWord = fieldNamesForWord(str);
            List asList = Arrays.asList(TUI_IE_CRF.FIELD_NAMES);
            for (String str2 : fieldNamesForWord) {
                if (!str2.equals(background)) {
                    int indexOf = asList.indexOf(str2);
                    if (indexOf == -1) {
                        System.err.println("ERROR: Couldn't find " + str2 + "\n");
                    }
                    tIntArrayList.add(indexOf);
                }
            }
            return tIntArrayList.toNativeArray();
        }

        static {
            $assertionsDisabled = !TUI_IE_CRF.class.desiredAssertionStatus();
            background = "O";
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$WordAppearsInAnyClusterPipe.class */
    public static class WordAppearsInAnyClusterPipe extends Pipe {
        private static Pattern EXCLUDE = Pattern.compile("\\p{Punct}");
        private AllClusterSegmentation segmentation;

        WordAppearsInAnyClusterPipe(AllClusterSegmentation allClusterSegmentation) {
            this.segmentation = allClusterSegmentation;
        }

        @Override // edu.umass.cs.mallet.base.pipe.Pipe
        public Instance pipe(Instance instance) {
            TokenSequence tokenSequence = (TokenSequence) instance.getData();
            InstanceList.Iterator it2 = this.segmentation.getCluster(instance).iterator();
            while (it2.hasNext()) {
                Instance instance2 = (Instance) it2.next();
                if (!instance2.getName().equals(instance.getName())) {
                    Segmentation segmentation = this.segmentation.getSegmentation(instance2);
                    for (int i = 0; i < tokenSequence.size(); i++) {
                        Token token = tokenSequence.getToken(i);
                        String text = token.getText();
                        if (!EXCLUDE.matcher(text).matches()) {
                            for (String str : segmentation.fieldNamesForWord(text)) {
                                String intern = ("TAGGED_AS_" + str).intern();
                                if (token.getFeatureValue(intern) == Transducer.ZERO_COST) {
                                    token.setFeatureValue(intern, 1.0d);
                                }
                            }
                        }
                    }
                }
            }
            return instance;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:edu/umass/cs/mallet/util/bibsonomy/TUI_IE_CRF$WordOftenAppearsAsPipe.class */
    public static class WordOftenAppearsAsPipe extends Pipe {
        private static Pattern EXCLUDE = Pattern.compile("\\p{Punct}");
        private AllClusterSegmentation segmentation;

        WordOftenAppearsAsPipe(AllClusterSegmentation allClusterSegmentation) {
            this.segmentation = allClusterSegmentation;
        }

        @Override // edu.umass.cs.mallet.base.pipe.Pipe
        public Instance pipe(Instance instance) {
            InstanceList cluster = this.segmentation.getCluster(instance);
            TokenSequence tokenSequence = (TokenSequence) instance.getData();
            if (cluster.size() < 3) {
                return instance;
            }
            for (int i = 0; i < tokenSequence.size(); i++) {
                Token token = tokenSequence.getToken(i);
                String text = token.getText();
                if (!EXCLUDE.matcher(text).matches()) {
                    int[] iArr = new int[TUI_IE_CRF.FIELD_NAMES.length];
                    InstanceList.Iterator it2 = cluster.iterator();
                    while (it2.hasNext()) {
                        Instance instance2 = (Instance) it2.next();
                        if (!instance2.getName().equals(instance.getName())) {
                            for (int i2 : this.segmentation.getSegmentation(instance2).fieldIdsForWord(text)) {
                                iArr[i2] = iArr[i2] + 1;
                            }
                        }
                    }
                    for (int i3 = 0; i3 < iArr.length; i3++) {
                        if (iArr[i3] >= TUI_IE_CRF.clusterFeatureMinimum.value) {
                            String intern = ("OFTEN_TAGGED_AS_" + TUI_IE_CRF.FIELD_NAMES[i3]).intern();
                            if (token.getFeatureValue(intern) == Transducer.ZERO_COST) {
                                token.setFeatureValue(intern, 1.0d);
                            }
                        }
                    }
                }
            }
            return instance;
        }
    }

    public static void main(String[] strArr) throws Exception {
        CommandOption.List list = new CommandOption.List("Segmenting references based on coreference information.", new CommandOption[0]);
        list.add(TUI_IE_CRF.class);
        list.add(BaseTUICRF.class);
        list.process(strArr);
        initOutputDirectory();
        list.logOptions(logger);
        long currentTimeMillis = System.currentTimeMillis();
        separator = SEPARATORS[headOrRefOption.value()];
        Random random = new Random(randomSeedOption.value);
        for (int i = 0; i < numRepsOption.value; i++) {
            logger.info("REPETITION " + i);
            InstanceList instanceList = new InstanceList(new Alphabet(), new Alphabet());
            instanceList.add(new LineGroupIterator(new FileReader(inputFileOption.value), Pattern.compile(separator), true));
            InstanceList instanceListClusters = getInstanceListClusters(instanceList, inputFileOption.value);
            InstanceList[] split = instanceListClusters.split(random, new double[]{trainingPct.value, 1.0d - trainingPct.value});
            InstanceList instanceList2 = split[0];
            InstanceList instanceList3 = split[1];
            logger.info("Num train clusters = " + instanceList2.size());
            logger.info("Num test clusters = " + instanceList3.size());
            System.out.println("Creating allclustersegmentation");
            SerialPipes serialPipes = new SerialPipes(new Pipe[]{makeBasePipe(), makeSegmentationsPipe(new AllClusterSegmentation(instanceListClusters, makeBasePipe()), useClusterFeatures.value, useNumClusterOccurences.value, useBogusClusterFeatures.value), new TokenSequence2FeatureVectorSequence()});
            InstanceList instanceList4 = new InstanceList(serialPipes);
            instanceList4.add(new ClusterListIterator(instanceList2));
            InstanceList instanceList5 = new InstanceList(serialPipes);
            instanceList5.add(new ClusterListIterator(instanceList3));
            logger.info("Number of training instances = " + instanceList4.size());
            logger.info("Number of testing instances = " + instanceList5.size());
            CRF4 crf4 = new CRF4(serialPipes, (Pipe) null);
            crf4.setUseSparseWeights(useSparseWeights.value);
            switch (markovOrder.value) {
                case 0:
                    crf4.addStatesForLabelsConnectedAsIn(instanceList4);
                    break;
                case 1:
                    crf4.addStatesForHalfLabelsConnectedAsIn(instanceList4);
                    break;
                case 2:
                    crf4.addStatesForThreeQuarterLabelsConnectedAsIn(instanceList4);
                    break;
                default:
                    System.err.println("Unknown markov-order " + markovOrder.value);
                    System.exit(1);
                    break;
            }
            TokenAccuracyEvaluator tokenAccuracyEvaluator = new TokenAccuracyEvaluator();
            tokenAccuracyEvaluator.setNumIterationsToWait(10);
            tokenAccuracyEvaluator.setNumIterationsToSkip(5);
            crf4.train(instanceList4, null, instanceList5, tokenAccuracyEvaluator);
            FieldF1Evaluator fieldF1Evaluator = new FieldF1Evaluator(FIELD_NAMES);
            fieldF1Evaluator.test(crf4, instanceList4, "Training", null);
            fieldF1Evaluator.test(crf4, instanceList5, "Testing", null);
            PerClassAccuracyEvaluator perClassAccuracyEvaluator = new PerClassAccuracyEvaluator();
            perClassAccuracyEvaluator.test(crf4, instanceList4, "Training", null);
            perClassAccuracyEvaluator.test(crf4, instanceList5, "Testing", null);
            InstanceAccuracyEvaluator instanceAccuracyEvaluator = new InstanceAccuracyEvaluator();
            instanceAccuracyEvaluator.test(crf4, instanceList4, "Training", null);
            instanceAccuracyEvaluator.test(crf4, instanceList5, "Testing", null);
            writeOutput(crf4, instanceList4, "-train-" + i);
            writeOutput(crf4, instanceList5, "-test-" + i);
            saveCrf(crf4, "-" + i);
        }
        System.out.println("Time elapses " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " seconds for testing.");
    }

    private static InstanceList getNonTrivialTesting(Pipe pipe, InstanceList instanceList) {
        InstanceList instanceList2 = new InstanceList(new Alphabet(), new Alphabet());
        InstanceList.Iterator it2 = instanceList.iterator();
        while (it2.hasNext()) {
            Instance instance = (Instance) it2.next();
            if (((InstanceList) instance.getData()).size() > 2) {
                instanceList2.add(instance);
            }
        }
        InstanceList instanceList3 = new InstanceList(pipe);
        instanceList3.add(new ClusterListIterator(instanceList2));
        return instanceList3;
    }

    private static Pipe makeBasePipe() throws EvalError, FileNotFoundException {
        return new SerialPipes(new Pipe[]{new AddClusterPropertyPipe(), new Input2CharSequence(), new SGML2TokenSequence(new CharSequenceLexer(CharSequenceLexer.LEX_NONWHITESPACE_CLASSES), "O"), new TokenText("W="), new RegexMatches("INITCAP", Pattern.compile(CAPS + ".*")), new RegexMatches("ALLDIGITS", Pattern.compile("[0-9]*")), new RegexMatches("ALLCAPS", Pattern.compile(CAPS + "+")), new RegexMatches("CONTAINSDIGITS", Pattern.compile(".*[0-9].*")), new RegexMatches("ALLDIGITS", Pattern.compile("[0-9]+")), new RegexMatches("PHONEORZIP", Pattern.compile("[0-9]+-[0-9]+")), new RegexMatches("CONTAINSDOTS", Pattern.compile("[^\\.]*\\..*")), new RegexMatches("CONTAINSDASH", Pattern.compile(ALPHANUM + "+-" + ALPHANUM + "*")), new RegexMatches("ACRO", Pattern.compile("[A-Z][A-Z\\.]*\\.[A-Z\\.]*")), new RegexMatches("LONELYINITIAL", Pattern.compile(CAPS + "\\.")), new RegexMatches("SINGLECHAR", Pattern.compile(ALPHA)), new RegexMatches("CAPLETTER", Pattern.compile(CAPS)), new RegexMatches("PUNC", Pattern.compile(PUNT)), new RegexMatches("URL", Pattern.compile("www\\..*|http://.*|ftp\\..*")), new RegexMatches("EMAIL", Pattern.compile("\\S+@\\S+|e-mail.*|email.*|Email.*")), new OffsetConjunctions(true, getOffsets()), includeBibtexLexicons.value ? new SerialPipes(new Pipe[]{new LexiconMembership("BIBTEX_AUTHOR", new File(bibtexLexDir, "lexicon_author"), true), new LexiconMembership("BIBTEX_DATE", new File(bibtexLexDir, "lexicon_date"), true), new LexiconMembership("NOTES", new File(bibtexLexDir, "lexicon_note"), true), new LexiconMembership("DEGREE", new File(bibtexLexDir, "lexicon_degree"), true), new LexiconMembership("AFFILIATION", new File(bibtexLexDir, "lexicon_affiliation"), true)}) : new Noop(), new Target2LabelSequence()});
    }

    private static Pipe makeSegmentationsPipe(AllClusterSegmentation allClusterSegmentation, boolean z, boolean z2, boolean z3) {
        Pipe[] pipeArr = new Pipe[5];
        pipeArr[0] = z ? new WordAppearsInAnyClusterPipe(allClusterSegmentation) : new Noop();
        pipeArr[1] = z2 ? new NumAppearancesInClusterPipe(allClusterSegmentation) : new Noop();
        pipeArr[2] = z3 ? new BogusClusterPipe(allClusterSegmentation) : new Noop();
        pipeArr[3] = clusterFeatureMinimum.wasInvoked() ? new WordOftenAppearsAsPipe(allClusterSegmentation) : new Noop();
        pipeArr[4] = useNegativeClusterFeatures.value ? new NegativeClusterFeaturePipe(allClusterSegmentation) : new Noop();
        return new SerialPipes(pipeArr);
    }

    private static void printClusterList(InstanceList instanceList) {
        InstanceList.Iterator it2 = instanceList.iterator();
        while (it2.hasNext()) {
            Instance instance = (Instance) it2.next();
            InstanceList instanceList2 = (InstanceList) instance.getData();
            System.out.println("\n\nCLUSTER *** " + instance.getName());
            InstanceList.Iterator it3 = instanceList2.iterator();
            while (it3.hasNext()) {
                Instance instance2 = (Instance) it3.next();
                String str = "<null>";
                if (instance2.getTarget() != null) {
                    str = instance2.getTarget().toString();
                }
                System.out.println("name: " + instance2.getName() + "\ninput: " + instance2.getData().toString() + "\ntarget: " + str);
            }
        }
    }

    private static InstanceList getInstanceListClusters(InstanceList instanceList, File file) {
        InstanceList instanceList2 = new InstanceList(new SerialPipes(new Pipe[]{new Input2CharSequence(), new SGML2FieldsPipe(refNoMeta, clusterNoMeta, startTags, endTags, tagWeight)}));
        try {
            instanceList2.add(new LineGroupIterator2(new FileReader(file), Pattern.compile(separator), true));
            if (instanceList.size() != instanceList2.size()) {
                throw new UnsupportedOperationException("size not equal");
            }
            int size = instanceList.size();
            for (int i = 0; i < size; i++) {
                Instance instanceList3 = instanceList.getInstance(i);
                Instance instanceList4 = instanceList2.getInstance(i);
                instanceList3.setPropertyList(instanceList4.getPropertyList());
                instanceList3.setName(instanceList4.getName());
                instanceList.setInstance(i, instanceList3);
            }
            LinkedHashMap linkedHashMap = new LinkedHashMap();
            for (int i2 = 0; i2 < size; i2++) {
                Instance instanceList5 = instanceList.getInstance(i2);
                Object property = instanceList5.getProperty(clusterNoMeta);
                if (linkedHashMap.containsKey(property)) {
                    InstanceList instanceList6 = (InstanceList) linkedHashMap.get(property);
                    instanceList6.add(instanceList5);
                    linkedHashMap.put(property, instanceList6);
                } else {
                    InstanceList instanceList7 = new InstanceList();
                    instanceList7.add(instanceList5);
                    linkedHashMap.put(property, instanceList7);
                }
            }
            InstanceList instanceList8 = new InstanceList(null);
            for (Map.Entry entry : linkedHashMap.entrySet()) {
                instanceList8.add(new Instance((InstanceList) entry.getValue(), null, "Cluster " + ((String) entry.getKey()), null));
            }
            return instanceList8;
        } catch (Exception e) {
            throw new IllegalArgumentException("Can't read file " + file);
        }
    }
}
