package org.bibsonomy.scraper.url.kde.spires;

import java.io.ByteArrayInputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.soap.Constants;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.UrlScraper;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:org/bibsonomy/scraper/url/kde/spires/SpiresScraper.class */
public class SpiresScraper extends UrlScraper {
    private static final String SPIRES_HOST = "slac.stanford.edu";
    private static final String info = "Spires Scraper: Gets publications from " + href(SPIRES_HOST, "SLAC National Accelerator Laboratory");
    private static final List<Tuple<Pattern, Pattern>> patterns = Collections.singletonList(new Tuple(Pattern.compile(".*slac.stanford.edu"), UrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.UrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            String str = null;
            NodeList elementsByTagName = getDOM(scrapingContext.getContentAsString(new URL("http://slac.stanford.edu" + extractUrlFromElementByTagNameAndValue(getDOM(scrapingContext.getPageContent()), "a", "BibTeX", Constants.ATTR_REFERENCE)))).getElementsByTagName("pre");
            for (int i = 0; i < elementsByTagName.getLength(); i++) {
                Node item = elementsByTagName.item(i);
                if (item.hasChildNodes()) {
                    str = item.getChildNodes().item(0).getNodeValue();
                }
            }
            if (str == null || "".equals(str)) {
                throw new ScrapingFailureException("getting bibtex failed");
            }
            scrapingContext.setBibtexResult(str);
            return true;
        } catch (MalformedURLException e) {
            throw new InternalFailureException(e);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    private Document getDOM(String str) {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        return tidy.parseDOM(new ByteArrayInputStream(str.getBytes()), (OutputStream) null);
    }

    private String extractUrlFromElementByTagNameAndValue(Document document, String str, String str2, String str3) throws MalformedURLException, DOMException {
        NodeList elementsByTagName = document.getElementsByTagName(str);
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Node item = elementsByTagName.item(i);
            if (item.getChildNodes().getLength() > 0 && str2.equals(item.getChildNodes().item(0).getNodeValue())) {
                return item.getAttributes().getNamedItem(str3).getNodeValue();
            }
        }
        return null;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }
}
