package org.bibsonomy.scraper.url.kde.digitalhumanities;

import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import com.hp.hpl.jena.sparql.sse.Tags;
import java.io.IOException;
import java.net.URL;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xml.serializer.SerializerConstants;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.model.util.PersonNameUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.Scraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.UrlUtils;
import org.bibsonomy.util.ValidationUtils;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;
import org.springframework.web.servlet.tags.form.AbstractHtmlElementTag;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.9.2.jar:org/bibsonomy/scraper/url/kde/digitalhumanities/ZfdGScraper.class */
public class ZfdGScraper extends AbstractUrlScraper {
    private static final String AUTHOR_LAST_KEY = "rft.aulast";
    private static final String AUTHOR_FIRST_KEY = "rft.aufirst";
    private static final String A_TITLE = "rft.atitle";
    private static final String TITLE = "rft.title";
    private static final String DATE_KEY = "rft.date";
    private static final String END_PAGE_KEY = "rft.epage";
    private static final String START_PAGE_KEY = "rft.spage";
    private static final String PAGES_KEY = "rft.pages";
    private static final String ENTRY_TYPE_KEY = "rft_val_fmt";
    private static final String GENRE = "rft.genre";
    private static final String RFT_AU = "rft.au";
    private static final String DOI = "rft_id";
    private static final String SITE_NAME = "ZfdG";
    private static final String SITE_URL = "http://zfdg.de/";
    private static final String ZFDG_HOST = "zfdg.de";
    private static final String INFO = "<a href=\"http://zfdg.de/\">ZfdG</a> Scraper: Scraper for ZfdG journal.";
    private static final String PATTERN_HTML_TAG = "</?\\s*+\\w++.*?>";
    private static final Log log = LogFactory.getLog(ZfdGScraper.class);
    private static final Pattern PATTERN_COINS = Pattern.compile("<span class=\"Z3988\" title=\"([^\"]*)\"");
    private static final Pattern PATTERN_TEI_XML = Pattern.compile("<div id=\"xml_but\" class=\"but\"><a href=\"([^\"]*)\"");
    private static final Pattern PATTERN_KEY_VALUE = Pattern.compile("([^=]*)=(([^&]|&(?!amp;))*)(&amp;|&)?");
    private static final Pattern PATTERN_DATE = Pattern.compile("(\\d{4})");
    private static final Pattern PATTERN_DOI = Pattern.compile("http://dx.doi.org/(.*)");
    private static final Pattern PATTERN_ABSTRACT = Pattern.compile("<h1>Abstract</h1>(([^<]|<(?!/p>))*)</p>", 32);
    private static final List<Pair<Pattern, Pattern>> URL_PATTERNS = Collections.singletonList(new Pair(Pattern.compile(".*zfdg.de"), AbstractUrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    public boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        String pageContent = scrapingContext.getPageContent();
        StringBuffer stringBuffer = new StringBuffer();
        Matcher matcher = PATTERN_COINS.matcher(pageContent);
        Matcher matcher2 = PATTERN_TEI_XML.matcher(pageContent);
        boolean endsWith = scrapingContext.getUrl().getHost().endsWith(ZFDG_HOST);
        if (!endsWith || !matcher.find()) {
            if (!endsWith || !matcher2.find()) {
                return false;
            }
            scrapingContext.setScraper(this);
            String str = SITE_URL + matcher2.group(1);
            if (!ValidationUtils.present(str)) {
                log.error("can't parse publication");
                return false;
            }
            try {
                Document parse = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new URL(str).openStream());
                XPath newXPath = XPathFactory.newInstance().newXPath();
                NodeList nodeList = (NodeList) newXPath.compile("//biblStruct/analytic/respStmt/resp/persName/name").evaluate(parse, XPathConstants.NODESET);
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < nodeList.getLength(); i++) {
                    String textContent = nodeList.item(i).getTextContent();
                    if (sb.length() == 0) {
                        sb.append(textContent);
                    } else {
                        sb.insert(0, PersonNameUtils.PERSON_NAME_DELIMITER).insert(0, textContent);
                    }
                }
                String cleanText = cleanText(sb.toString());
                String cleanText2 = cleanText(parse.getElementsByTagName(AbstractHtmlElementTag.TITLE_ATTRIBUTE).item(1).getTextContent());
                String cleanText3 = cleanText(parse.getElementsByTagName("date").item(0).getTextContent().substring(6));
                String xMLElement = getXMLElement(parse, newXPath, "//argument", 0);
                String xMLElement2 = getXMLElement(parse, newXPath, "//monogr/title", 0);
                String xMLElement3 = getXMLElement(parse, newXPath, "//monogr/title", 1);
                String xMLElement4 = getXMLElement(parse, newXPath, "//analytic/idno", 0);
                stringBuffer.append("@article{").append(BibTexUtils.generateBibtexKey(cleanText, (String) null, cleanText3, cleanText2)).append(",\n");
                append(AbstractHtmlElementTag.TITLE_ATTRIBUTE, cleanText2, stringBuffer);
                append("author", cleanText, stringBuffer);
                append("year", cleanText3, stringBuffer);
                append("volume", xMLElement3, stringBuffer);
                append(BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE, xMLElement, stringBuffer);
                append("journal", xMLElement2, stringBuffer);
                append("doi", xMLElement4, stringBuffer);
                stringBuffer.append("\n}\n");
                return returnBibTeX(scrapingContext, stringBuffer);
            } catch (IOException | ParserConfigurationException | XPathExpressionException | ScrapingFailureException | DOMException | SAXException e) {
                throw new InternalFailureException(e);
            }
        }
        scrapingContext.setScraper(this);
        String group = matcher.group(1);
        HashMap hashMap = new HashMap();
        Matcher matcher3 = PATTERN_KEY_VALUE.matcher(group);
        while (matcher3.find()) {
            String safeURIDecode = UrlUtils.safeURIDecode(matcher3.group(1));
            String safeURIDecode2 = UrlUtils.safeURIDecode(matcher3.group(2));
            if (ValidationUtils.present(safeURIDecode) && ValidationUtils.present(safeURIDecode2)) {
                if (safeURIDecode.equals(RFT_AU) && hashMap.containsKey(RFT_AU)) {
                    safeURIDecode2 = ((String) hashMap.get(RFT_AU)) + PersonNameUtils.PERSON_NAME_DELIMITER + safeURIDecode2;
                }
                hashMap.put(safeURIDecode, safeURIDecode2);
            }
        }
        StringBuilder sb2 = new StringBuilder();
        if (hashMap.containsKey(RFT_AU)) {
            sb2.append((String) hashMap.get(RFT_AU));
        }
        if (hashMap.containsKey(AUTHOR_FIRST_KEY) || hashMap.containsKey(AUTHOR_LAST_KEY)) {
            String authorFirstLast = getAuthorFirstLast((String) hashMap.get(AUTHOR_FIRST_KEY), (String) hashMap.get(AUTHOR_LAST_KEY));
            if (sb2.length() == 0) {
                sb2.append(authorFirstLast);
            } else {
                sb2.insert(0, PersonNameUtils.PERSON_NAME_DELIMITER).insert(0, authorFirstLast);
            }
        }
        String sb3 = sb2.toString();
        String cleanText4 = cleanText((hashMap.containsKey(A_TITLE) ? (String) hashMap.get(A_TITLE) : (String) hashMap.get(TITLE)).replace(SerializerConstants.ENTITY_CRLF, MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR));
        String str2 = null;
        if (hashMap.containsKey(DATE_KEY)) {
            Matcher matcher4 = PATTERN_DATE.matcher((CharSequence) hashMap.get(DATE_KEY));
            if (matcher4.find()) {
                str2 = matcher4.group(1);
            }
        }
        String str3 = null;
        if (hashMap.containsKey(PAGES_KEY)) {
            str3 = (String) hashMap.get(PAGES_KEY);
        } else if (hashMap.containsKey(START_PAGE_KEY) && hashMap.containsKey(END_PAGE_KEY)) {
            str3 = ((String) hashMap.get(START_PAGE_KEY)) + "--" + ((String) hashMap.get(END_PAGE_KEY));
        }
        String str4 = null;
        if (hashMap.containsKey(DOI)) {
            Matcher matcher5 = PATTERN_DOI.matcher((CharSequence) hashMap.get(DOI));
            if (matcher5.find()) {
                str4 = matcher5.group(1);
            }
        }
        Matcher matcher6 = PATTERN_ABSTRACT.matcher(pageContent);
        String cleanText5 = matcher6.find() ? cleanText(matcher6.group(1)) : "";
        if (hashMap.containsKey(ENTRY_TYPE_KEY)) {
            String str5 = (String) hashMap.get(ENTRY_TYPE_KEY);
            String str6 = (String) hashMap.get(GENRE);
            if (str5.contains(":journal") || str6.contains(BibTexUtils.ARTICLE) || str6.contains("bookitem")) {
                String str7 = get(hashMap, TITLE);
                stringBuffer.append("@article{").append(BibTexUtils.generateBibtexKey(sb3, (String) null, str2, cleanText4)).append(",\n");
                if (str7 != null) {
                    append("journal", str7, stringBuffer);
                } else {
                    append("journal", get(hashMap, "rft.series"), stringBuffer);
                }
            } else if (str5.contains(":book")) {
                String str8 = get(hashMap, "rft.btitle");
                stringBuffer.append("@book{").append(BibTexUtils.generateBibtexKey(sb3, (String) null, str2, str8)).append(",\n");
                if (str8 != null) {
                    append("booktitle", str8, stringBuffer);
                }
            } else {
                stringBuffer.append("@misc{").append(BibTexUtils.generateBibtexKey(sb3, (String) null, str2, cleanText4)).append(",\n");
            }
            append(AbstractHtmlElementTag.TITLE_ATTRIBUTE, cleanText4, stringBuffer);
            append("author", sb3, stringBuffer);
            append("year", str2, stringBuffer);
            append("volume", get(hashMap, "rft.volume"), stringBuffer);
            append("number", get(hashMap, "rft.issue"), stringBuffer);
            append("pages", str3, stringBuffer);
            append(BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE, cleanText5, stringBuffer);
            append("doi", str4, stringBuffer);
            stringBuffer.append("\n}\n");
        }
        return returnBibTeX(scrapingContext, stringBuffer);
    }

    private static String cleanText(String str) {
        return str.replaceAll("\\s+", MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR).trim().replaceAll(PATTERN_HTML_TAG, "").replaceAll("&nbsp;", MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR).replaceAll(SerializerConstants.ENTITY_LT, Tags.symLT).replaceAll(SerializerConstants.ENTITY_GT, Tags.symGT);
    }

    private static String getXMLElement(Document document, XPath xPath, String str, int i) {
        try {
            return cleanText(((NodeList) xPath.compile(str).evaluate(document, XPathConstants.NODESET)).item(i).getTextContent());
        } catch (XPathExpressionException e) {
            log.error("XPath not valid or reachable", e);
            return null;
        }
    }

    private static boolean returnBibTeX(ScrapingContext scrapingContext, StringBuffer stringBuffer) throws ScrapingFailureException {
        if (!ValidationUtils.present((CharSequence) stringBuffer)) {
            throw new ScrapingFailureException("span does not contain a book or journal");
        }
        BibTexUtils.addFieldIfNotContained(stringBuffer, "url", scrapingContext.getUrl().toString());
        scrapingContext.setBibtexResult(stringBuffer.toString());
        return true;
    }

    private static String getAuthorFirstLast(String str, String str2) {
        return ValidationUtils.present(str) ? ValidationUtils.present(str2) ? str2 + ", " + str : str : ValidationUtils.present(str2) ? str2 : "";
    }

    private static void append(String str, String str2, StringBuffer stringBuffer) {
        if (ValidationUtils.present(str2)) {
            stringBuffer.append(str).append(" = {").append(str2).append("},\n");
        }
    }

    private static String get(Map<String, String> map, String str) {
        if (map.containsKey(str)) {
            return map.get(str);
        }
        return null;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.Scraper
    public Collection<Scraper> getScraper() {
        return Collections.singleton(this);
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.Scraper
    public boolean supportsScrapingContext(ScrapingContext scrapingContext) {
        if (!ValidationUtils.present(scrapingContext.getUrl())) {
            return false;
        }
        try {
            if (!PATTERN_COINS.matcher(scrapingContext.getPageContent()).find()) {
                if (!PATTERN_TEI_XML.matcher(scrapingContext.getPageContent()).find()) {
                    return false;
                }
            }
            return true;
        } catch (ScrapingException e) {
            return false;
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return URL_PATTERNS;
    }
}
