package org.bibsonomy.scraper.generic;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Collection;
import java.util.Collections;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bibsonomy.scraper.Scraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.XmlUtils;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.16.jar:org/bibsonomy/scraper/generic/UnAPIScraper.class */
public class UnAPIScraper implements Scraper {
    private static final String SITE_NAME = "UnAPIScraper";
    private static final String SITE_URL = "http://unapi.info/";
    private static final String INFO = "Scrapes pages providing BibTeX (format=bibtex) via <a href=\"http://unapi.info/\">UN-API</a>.";
    private static final Log log = LogFactory.getLog(UnAPIScraper.class);

    @Override // org.bibsonomy.scraper.Scraper
    public Collection<Scraper> getScraper() {
        return Collections.singleton(this);
    }

    @Override // org.bibsonomy.scraper.Scraper
    public boolean scrape(ScrapingContext scrapingContext) throws ScrapingException {
        String pageContent;
        Document dom;
        String apiHref;
        if (scrapingContext.getUrl() == null || (pageContent = scrapingContext.getPageContent()) == null || !pageContent.contains("unapi-server") || !pageContent.contains("unapi-id") || (apiHref = getApiHref((dom = XmlUtils.getDOM(pageContent, true)))) == null) {
            return false;
        }
        log.debug("found server id " + apiHref);
        String recordIdentifier = getRecordIdentifier(dom);
        if (recordIdentifier == null) {
            return false;
        }
        log.debug("found record id " + recordIdentifier);
        try {
            URL url = new URL(apiHref + "?format=bibtex&id=" + URLEncoder.encode(recordIdentifier, "UTF-8"));
            log.debug("querying service at " + url);
            String contentAsString = WebUtils.getContentAsString(url);
            if (contentAsString == null) {
                return false;
            }
            log.debug("got bibtex (" + contentAsString.length() + " characters)");
            scrapingContext.setScraper(this);
            scrapingContext.setBibtexResult(contentAsString);
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    private String getApiHref(Document document) {
        Node namedItem;
        NodeList elementsByTagName = document.getElementsByTagName("link");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            NamedNodeMap attributes = elementsByTagName.item(i).getAttributes();
            Node namedItem2 = attributes.getNamedItem("rel");
            if (namedItem2 != null && "unapi-server".equals(namedItem2.getNodeValue()) && (namedItem = attributes.getNamedItem("href")) != null) {
                return namedItem.getNodeValue();
            }
        }
        return null;
    }

    private String getRecordIdentifier(Document document) {
        Node namedItem;
        NodeList elementsByTagName = document.getElementsByTagName("abbr");
        log.debug("found " + elementsByTagName.getLength() + " abbr nodes.");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            NamedNodeMap attributes = elementsByTagName.item(i).getAttributes();
            Node namedItem2 = attributes.getNamedItem(BeanDefinitionParserDelegate.CLASS_ATTRIBUTE);
            if (namedItem2 != null && "unapi-id".equals(namedItem2.getNodeValue()) && (namedItem = attributes.getNamedItem("title")) != null) {
                return namedItem.getNodeValue();
            }
        }
        return null;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public boolean supportsScrapingContext(ScrapingContext scrapingContext) {
        if (scrapingContext.getUrl() == null) {
            return false;
        }
        try {
            String pageContent = scrapingContext.getPageContent();
            if (pageContent == null || !pageContent.contains("unapi-server")) {
                return false;
            }
            return pageContent.contains("unapi-id");
        } catch (ScrapingException e) {
            return false;
        }
    }

    public static ScrapingContext getTestContext() {
        ScrapingContext scrapingContext = null;
        try {
            scrapingContext = new ScrapingContext(new URL("http://canarydatabase.org/record/488"));
        } catch (MalformedURLException e) {
            log.debug(e);
        }
        return scrapingContext;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
