package org.bibsonomy.scraper.url.kde.prola;

import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.CitedbyScraper;
import org.bibsonomy.scraper.ReferencesScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.generic.GenericBibTeXURLScraper;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.8.15.jar:org/bibsonomy/scraper/url/kde/prola/ProlaScraper.class */
public class ProlaScraper extends GenericBibTeXURLScraper implements ReferencesScraper, CitedbyScraper {
    private static final String PROLA_APS_URL_BASE = "http://prola.aps.org";
    private static final String PROLA_APS_HOST = ".aps.org";
    private static final Log log = LogFactory.getLog(ProlaScraper.class);
    private static final String SITE_URL = "http://prola.aps.org/";
    private static final String SITE_NAME = "PROLA";
    private static final String INFO = "For selected BibTeX snippets and articles from " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*.aps.org"), AbstractUrlScraper.EMPTY_PATTERN));
    private static final Pattern PATTERN_ABSTRACT = Pattern.compile("<meta name=\"description\" content=\"(.*)\">");
    private static final Pattern PATTERN_URL = Pattern.compile("<meta content=\"(http.*?)\" property=\"og:url\" />");
    private static final Pattern PATTERN_CITEDBY = Pattern.compile("(?s)<div class=\"large-9 columns\">(.*)</div><div class=\"pagination-centered\">");

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    @Override // org.bibsonomy.scraper.generic.AbstractGenericFormatURLScraper
    public String getDownloadURL(URL url, String str) throws ScrapingException {
        return url.toString().replace(BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE, "export");
    }

    @Override // org.bibsonomy.scraper.generic.AbstractGenericFormatURLScraper
    protected String postProcessScrapingResult(ScrapingContext scrapingContext, String str) {
        return BibTexUtils.addFieldIfNotContained(str, BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE, abstractParser(scrapingContext.getUrl()));
    }

    private static String abstractParser(URL url) {
        try {
            Matcher matcher = PATTERN_ABSTRACT.matcher(WebUtils.getContentAsString(url));
            if (matcher.find()) {
                return matcher.group(1);
            }
            return null;
        } catch (Exception e) {
            log.error("error while getting abstract for " + url, e);
            return null;
        }
    }

    @Override // org.bibsonomy.scraper.CitedbyScraper
    public boolean scrapeCitedby(ScrapingContext scrapingContext) throws ScrapingException {
        try {
            Matcher matcher = PATTERN_URL.matcher(WebUtils.getContentAsString(scrapingContext.getUrl()));
            if (!matcher.find()) {
                return false;
            }
            Matcher matcher2 = PATTERN_CITEDBY.matcher(WebUtils.getContentAsString(matcher.group(1).replaceAll("pdf|abstract|article|export", "cited-by")));
            if (!matcher2.find()) {
                return false;
            }
            scrapingContext.setCitedBy(matcher2.group(1));
            return true;
        } catch (IOException e) {
            log.error("error while getting cited by articles for " + scrapingContext.getUrl(), e);
            return false;
        }
    }

    @Override // org.bibsonomy.scraper.ReferencesScraper
    public boolean scrapeReferences(ScrapingContext scrapingContext) throws ScrapingException {
        try {
            Matcher matcher = PATTERN_URL.matcher(WebUtils.getContentAsString(scrapingContext.getUrl()));
            if (!matcher.find()) {
                return false;
            }
            scrapingContext.setReferences(WebUtils.getContentAsString(matcher.group(1).replaceAll("pdf|abstract|export", BibTexUtils.ARTICLE) + "/section/references"));
            return true;
        } catch (IOException e) {
            log.error("error while getting references for " + scrapingContext.getUrl(), e);
            return false;
        }
    }
}
