package org.bibsonomy.scraper.url.kde.spires;

import java.io.IOException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.XmlUtils;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:org/bibsonomy/scraper/url/kde/spires/SpiresScraper.class */
public class SpiresScraper extends AbstractUrlScraper {
    private static final String FORMAT_WWWBRIEFBIBTEX = "FORMAT=WWWBRIEFBIBTEX";
    private static final String SITE_URL = "http://slac.stanford.edu/";
    private static final String SITE_NAME = "SLAC National Accelerator Laboratory";
    private static final String info = "Gets publications from " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Tuple<Pattern, Pattern>> patterns = new LinkedList();

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            URL url = scrapingContext.getUrl();
            URL url2 = url;
            if (!url.getQuery().contains(FORMAT_WWWBRIEFBIBTEX)) {
                Matcher matcher = Pattern.compile("<a href=\"?(/spires/find/hep/www\\?.*?\\&FORMAT=WWWBRIEFBIBTEX)\"?>").matcher(scrapingContext.getPageContent());
                if (!matcher.find()) {
                    throw new ScrapingFailureException("no download link found");
                }
                url2 = new URL(url.getProtocol() + "://" + url.getHost() + matcher.group(1));
            }
            String str = null;
            NodeList elementsByTagName = XmlUtils.getDOM(url2).getElementsByTagName("pre");
            for (int i = 0; i < elementsByTagName.getLength(); i++) {
                Node item = elementsByTagName.item(i);
                if (item.hasChildNodes()) {
                    str = item.getChildNodes().item(0).getNodeValue();
                }
            }
            String addFieldIfNotContained = BibTexUtils.addFieldIfNotContained(str, "url", url.toString());
            if (addFieldIfNotContained == null || "".equals(addFieldIfNotContained)) {
                throw new ScrapingFailureException("getting bibtex failed");
            }
            scrapingContext.setBibtexResult(addFieldIfNotContained);
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    static {
        patterns.add(new Tuple<>(Pattern.compile(".*slac.stanford.edu"), AbstractUrlScraper.EMPTY_PATTERN));
        patterns.add(new Tuple<>(Pattern.compile(".*www-library.desy.de"), AbstractUrlScraper.EMPTY_PATTERN));
    }
}
