package edu.umass.cs.mallet.base.extract;

import edu.umass.cs.mallet.base.types.Label;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.LabelSequence;
import edu.umass.cs.mallet.base.types.Sequence;
import gnu.trove.THashMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Namespace;
import org.jdom.Text;
import org.jdom.output.XMLOutputter;

/* JADX WARN: Classes with same name are omitted:
  input_file:WEB-INF/lib/bibsonomy-scraper-2.0.1.jar:org/bibsonomy/scraper/ie/training/mallet.jar:edu/umass/cs/mallet/base/extract/DocumentExtraction.class
 */
/* loaded from: input_file:WEB-INF/lib/mallet-0.4-steuber.jar:edu/umass/cs/mallet/base/extract/DocumentExtraction.class */
public class DocumentExtraction {
    private Tokenization input;
    private Sequence predictedLabels;
    private LabelSequence target;
    private LabeledSpans extractedSpans;
    private LabeledSpans targetSpans;
    private Object document;
    private Label backgroundTag;
    private String name;
    static final /* synthetic */ boolean $assertionsDisabled;

    static {
        $assertionsDisabled = !DocumentExtraction.class.desiredAssertionStatus();
    }

    public DocumentExtraction(String str, LabelAlphabet labelAlphabet, Tokenization tokenization, Sequence sequence, String str2) {
        this(str, labelAlphabet, tokenization, sequence, null, str2, new BIOTokenizationFilter());
    }

    public DocumentExtraction(String str, LabelAlphabet labelAlphabet, Tokenization tokenization, Sequence sequence, Sequence sequence2, String str2) {
        this(str, labelAlphabet, tokenization, sequence, sequence2, str2, new BIOTokenizationFilter());
    }

    public DocumentExtraction(String str, LabelAlphabet labelAlphabet, Tokenization tokenization, Sequence sequence, Sequence sequence2, String str2, TokenizationFilter tokenizationFilter) {
        this.document = tokenization.getDocument();
        this.name = str;
        if (!$assertionsDisabled && tokenization.size() != sequence.size()) {
            throw new AssertionError();
        }
        this.backgroundTag = labelAlphabet.lookupLabel(str2);
        this.input = tokenization;
        this.predictedLabels = sequence;
        this.extractedSpans = tokenizationFilter.constructLabeledSpans(labelAlphabet, this.document, this.backgroundTag, tokenization, sequence);
        if (sequence2 != null) {
            if (sequence2 instanceof LabelSequence) {
                this.target = (LabelSequence) sequence2;
            }
            this.targetSpans = tokenizationFilter.constructLabeledSpans(labelAlphabet, this.document, this.backgroundTag, tokenization, sequence2);
        }
    }

    public DocumentExtraction(String str, LabelAlphabet labelAlphabet, Tokenization tokenization, LabeledSpans labeledSpans, LabeledSpans labeledSpans2, String str2) {
        this.document = tokenization.getDocument();
        this.name = str;
        this.backgroundTag = labelAlphabet.lookupLabel(str2);
        this.input = tokenization;
        this.extractedSpans = labeledSpans;
        this.targetSpans = labeledSpans2;
    }

    public Object getDocument() {
        return this.document;
    }

    public Tokenization getInput() {
        return this.input;
    }

    public Sequence getPredictedLabels() {
        return this.predictedLabels;
    }

    public LabeledSpans getExtractedSpans() {
        return this.extractedSpans;
    }

    public LabeledSpans getTargetSpans() {
        return this.targetSpans;
    }

    public LabelSequence getTarget() {
        return this.target;
    }

    public String getName() {
        return this.name;
    }

    public Span subspan(int i, int i2) {
        throw new UnsupportedOperationException("not yet implemented.");
    }

    public Document toXmlDocument() {
        return toXmlDocument("doc", Namespace.NO_NAMESPACE);
    }

    public Document toXmlDocument(String str, Namespace namespace) {
        ArrayList arrayList = new ArrayList(this.extractedSpans);
        Collections.sort(arrayList, new Comparator() { // from class: edu.umass.cs.mallet.base.extract.DocumentExtraction.1
            @Override // java.util.Comparator
            public int compare(Object obj, Object obj2) {
                return Double.compare(((Span) obj).getStartIdx(), ((Span) obj2).getStartIdx());
            }
        });
        ArrayList arrayList2 = new ArrayList(arrayList);
        THashMap tHashMap = new THashMap();
        for (int i = 0; i < arrayList.size(); i++) {
            LabeledSpan labeledSpan = (LabeledSpan) arrayList.get(i);
            int i2 = i - 1;
            while (true) {
                if (i2 < 0) {
                    break;
                }
                LabeledSpan labeledSpan2 = (LabeledSpan) arrayList.get(i2);
                if (labeledSpan2.isSubspan(labeledSpan)) {
                    List list = (List) tHashMap.get(labeledSpan2);
                    if (list == null) {
                        list = new ArrayList();
                        tHashMap.put(labeledSpan2, list);
                    }
                    arrayList2.remove(labeledSpan);
                    list.add(labeledSpan);
                } else {
                    i2--;
                }
            }
        }
        CharSequence charSequence = (CharSequence) this.document;
        return new Document(generateElement(str, new StringSpan(charSequence, 0, charSequence.length()), arrayList2, tHashMap));
    }

    private Element generateElement(String str, Span span, List list, THashMap tHashMap) {
        Element element = new Element(str);
        if (list == null || list.isEmpty()) {
            element.setContent(new Text(span.getText()));
        } else {
            ArrayList arrayList = new ArrayList(list.size());
            int startIdx = span.getStartIdx();
            int i = 0;
            for (int i2 = 0; i2 < list.size(); i2++) {
                LabeledSpan labeledSpan = (LabeledSpan) list.get(i2);
                Label label = labeledSpan.getLabel();
                int startIdx2 = labeledSpan.getStartIdx() - startIdx;
                if (startIdx2 > i) {
                    arrayList.add(new Text(span.getText().substring(i, startIdx2)));
                }
                if (label == this.backgroundTag) {
                    arrayList.add(new Text(labeledSpan.getText()));
                } else {
                    arrayList.add(generateElement(label.getEntry().toString(), labeledSpan, (List) tHashMap.get(labeledSpan), tHashMap));
                }
                i = labeledSpan.getEndIdx() - startIdx;
            }
            if (i < span.getEndIdx()) {
                arrayList.add(new Text(span.getText().substring(i)));
            }
            element.addContent(arrayList);
        }
        return element;
    }

    public String toXmlString() {
        return new XMLOutputter().outputString(toXmlDocument());
    }

    public int size() {
        return this.extractedSpans.size();
    }
}
