package edu.umass.cs.mallet.base.extract;

import edu.umass.cs.mallet.base.types.Label;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.Sequence;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.regex.Pattern;

/* loaded from: input_file:WEB-INF/lib/mallet-0.4-jaeschke.jar:edu/umass/cs/mallet/base/extract/HierarchicalTokenizationFilter.class */
public class HierarchicalTokenizationFilter implements TokenizationFilter {
    Pattern ignorePattern;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/mallet-0.4-jaeschke.jar:edu/umass/cs/mallet/base/extract/HierarchicalTokenizationFilter$TagStart.class */
    public static class TagStart {
        int start;
        Label label;

        public TagStart(int i, Label label) {
            this.start = i;
            this.label = label;
        }
    }

    public HierarchicalTokenizationFilter() {
        this.ignorePattern = null;
    }

    public HierarchicalTokenizationFilter(Pattern pattern) {
        this.ignorePattern = null;
        this.ignorePattern = pattern;
    }

    @Override // edu.umass.cs.mallet.base.extract.TokenizationFilter
    public LabeledSpans constructLabeledSpans(LabelAlphabet labelAlphabet, Object obj, Label label, Tokenization tokenization, Sequence sequence) {
        LabeledSpans labeledSpans = new LabeledSpans(obj);
        addSpansFromTags(labeledSpans, tokenization, sequence, labelAlphabet, label);
        return labeledSpans;
    }

    private void addSpansFromTags(LabeledSpans labeledSpans, Tokenization tokenization, Sequence sequence, LabelAlphabet labelAlphabet, Label label) {
        int i = 0;
        LinkedList linkedList = new LinkedList();
        String[] strArr = new String[0];
        while (i < sequence.size()) {
            String[] splitTag = splitTag(labelAlphabet.lookupLabel(sequence.get(i).toString()));
            for (int compareSplitTags = compareSplitTags(splitTag, strArr); compareSplitTags > 0; compareSplitTags--) {
                addLabeledSpan(labeledSpans, tokenization, (TagStart) linkedList.removeLast(), i, label);
            }
            for (int size = linkedList.size(); size < splitTag.length; size++) {
                linkedList.add(new TagStart(i, labelAlphabet.lookupLabel(splitTag[size])));
            }
            strArr = splitTag;
            i++;
        }
        while (!linkedList.isEmpty()) {
            addLabeledSpan(labeledSpans, tokenization, (TagStart) linkedList.removeLast(), i, label);
        }
    }

    private void addLabeledSpan(LabeledSpans labeledSpans, Tokenization tokenization, TagStart tagStart, int i, Label label) {
        Span subspan = tokenization.subspan(tagStart.start, i);
        Label label2 = tagStart.label;
        labeledSpans.add(new LabeledSpan(subspan, label2, label2 == label));
    }

    private int compareSplitTags(String[] strArr, String[] strArr2) {
        int length = strArr2.length - 1;
        while (length >= 0) {
            if (length < strArr.length) {
                String str = strArr[length];
                if (!isBeginName(str) && matches(strArr2[length], str)) {
                    break;
                }
            }
            length--;
        }
        int length2 = (strArr2.length - length) - 1;
        while (length >= 0) {
            if (!matches(strArr[length], strArr2[length])) {
                throw new IllegalArgumentException("Tags don't match.");
            }
            length--;
        }
        return length2;
    }

    private boolean matches(String str, String str2) {
        return trim(str).equals(trim(str2));
    }

    private String trim(String str) {
        return (isBeginName(str) || isInsideName(str)) ? str.substring(2) : str;
    }

    private String[] splitTag(Label label) {
        ArrayList arrayList = new ArrayList(Arrays.asList(label.toString().split("\\|")));
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            if (this.ignorePattern != null && this.ignorePattern.matcher(str).matches()) {
                it.remove();
            }
        }
        return (String[]) arrayList.toArray(new String[0]);
    }

    private boolean isBeginName(String str) {
        return str.startsWith("B-");
    }

    private boolean isInsideName(String str) {
        return str.startsWith("I-");
    }
}
