package org.bibsonomy.util;

import it.unimi.dsi.fastutil.chars.CharOpenHashSet;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-web-common-2.0.22.jar:org/bibsonomy/util/XmlUtils.class */
public class XmlUtils {
    private static final CharOpenHashSet illegalChars = new CharOpenHashSet();
    private static final char ILLEGAL_CHAR_SUBSTITUTE = 65533;

    public static String removeXmlControlCharacters(String str, boolean z) {
        if (str == null) {
            return str;
        }
        char[] charArray = str.toCharArray();
        StringBuilder sb = new StringBuilder(charArray.length);
        for (int i = 0; i < charArray.length; i++) {
            if (illegalChars.contains(charArray[i])) {
                sb.append(z ? (char) 65533 : "");
            } else {
                sb.append(charArray[i]);
            }
        }
        return sb.toString();
    }

    public static String removeXmlControlCharacters(String str) {
        return removeXmlControlCharacters(str, false);
    }

    public static char[] removeXmlControlCharacters(char[] cArr, boolean z) {
        StringBuilder sb = new StringBuilder(cArr.length);
        for (int i = 0; i < cArr.length; i++) {
            if (illegalChars.contains(cArr[i])) {
                sb.append(z ? (char) 65533 : "");
            } else {
                sb.append(cArr[i]);
            }
        }
        return sb.toString().toCharArray();
    }

    public static char[] removeXmlControlCharacters(char[] cArr) {
        return removeXmlControlCharacters(cArr, false);
    }

    public static char removeXmlControlCharacter(char c, boolean z) {
        return illegalChars.contains(c) ? z ? (char) 65533 : ' ' : c;
    }

    public static char removeXmlControlCharacters(char c) {
        return removeXmlControlCharacter(c, false);
    }

    public static Document getDOM(String str) {
        return getDOM(str, false);
    }

    public static Document getDOM(String str, boolean z) {
        return getDOM(new ByteArrayInputStream(str.getBytes()), z);
    }

    public static Document getDOM(URL url) throws IOException {
        return getDOM(url, false);
    }

    public static Document getDOM(URL url, boolean z) throws IOException {
        Tidy tidy = getTidy(z);
        tidy.setInputEncoding(WebUtils.extractCharset(((HttpURLConnection) url.openConnection()).getContentType()));
        return tidy.parseDOM(url.openConnection().getInputStream(), null);
    }

    public static Document getDOM(InputStream inputStream) {
        return getDOM(inputStream, false);
    }

    public static Document getDOM(InputStream inputStream, boolean z) {
        Tidy tidy = getTidy(z);
        tidy.setInputEncoding("UTF-8");
        return tidy.parseDOM(inputStream, null);
    }

    private static Tidy getTidy(boolean z) {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        tidy.setShowErrors(0);
        tidy.setXmlTags(z);
        return tidy;
    }

    public static String getText(Node node) {
        StringBuilder sb = new StringBuilder();
        String nodeValue = node.getNodeValue();
        if (nodeValue != null) {
            sb.append(nodeValue);
        }
        if (node.hasChildNodes()) {
            NodeList childNodes = node.getChildNodes();
            for (int i = 0; i < childNodes.getLength(); i++) {
                sb.append(getText(childNodes.item(i)));
            }
        }
        return sb.toString();
    }

    static {
        for (int i = 0; i < "��\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\u000b\f\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f\ufffe\uffff".length(); i++) {
            illegalChars.add("��\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\u000b\f\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f\ufffe\uffff".charAt(i));
        }
    }
}
