package org.bibsonomy.scraper.url.kde.citebase;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xalan.templates.Constants;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.XmlUtils;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.44.jar:org/bibsonomy/scraper/url/kde/citebase/CiteBaseScraper.class */
public class CiteBaseScraper extends AbstractUrlScraper {
    private static final String CITEBASE_HOST_NAME = "http://www.citebase.org";
    private static final String CITEBASE_HOST = "citebase.org";
    private static final String BIBTEX_STRING_ON_ARXIV = "BibTeX";
    private static final String BIBTEX_ABSTRACT_TAG = "div";
    private static final String SITE_URL = "http://www.citebase.org/";
    private static final String SITE_NAME = "Citebase";
    private static final String info = "This scraper parses a publication page from " + href(SITE_URL, SITE_NAME) + ".";
    private static final Log log = LogFactory.getLog(CiteBaseScraper.class);
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*citebase.org"), AbstractUrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            Document dom = XmlUtils.getDOM(scrapingContext.getPageContent());
            String extractAbstract = extractAbstract(dom, BIBTEX_ABSTRACT_TAG);
            URL url = new URL(CITEBASE_HOST_NAME + extractUrlFromElementByTagNameAndValue(dom, "a", BIBTEX_STRING_ON_ARXIV, Constants.ATTRNAME_HREF));
            log.debug("bibtex url = " + url);
            StringBuffer stringBuffer = new StringBuffer(WebUtils.getContentAsString(url));
            if (extractAbstract != null) {
                BibTexUtils.addField(stringBuffer, BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE, extractAbstract);
            }
            scrapingContext.setBibtexResult(stringBuffer.toString());
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    private String extractUrlFromElementByTagNameAndValue(Document document, String str, String str2, String str3) throws MalformedURLException, DOMException {
        NodeList elementsByTagName = document.getElementsByTagName(str);
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Node item = elementsByTagName.item(i);
            if (item.getChildNodes().getLength() > 0 && str2.equals(item.getChildNodes().item(0).getNodeValue())) {
                return item.getAttributes().getNamedItem(str3).getNodeValue();
            }
        }
        return null;
    }

    private String extractAbstract(Document document, String str) {
        NodeList elementsByTagName = document.getElementsByTagName(str);
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Node item = elementsByTagName.item(i);
            if (item.getAttributes().getNamedItem("class") != null && item.getAttributes().getNamedItem("class").getNodeValue().equals(BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE)) {
                log.debug("abstract = " + item.getChildNodes().item(0).getNodeValue());
                return item.getChildNodes().item(0).getNodeValue();
            }
        }
        return null;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return CITEBASE_HOST_NAME;
    }
}
