package org.bibsonomy.scraper.url.kde.cshlp;

import com.hp.hpl.jena.util.FileManager;
import java.io.IOException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.common.Pair;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.generic.GenericBibTeXURLScraper;
import org.bibsonomy.util.WebUtils;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.9.2.jar:org/bibsonomy/scraper/url/kde/cshlp/CSHLPScraper.class */
public class CSHLPScraper extends GenericBibTeXURLScraper {
    private static final String CSHLP_HOST = "cshperspectives.cshlp.org";
    private static final String JBC_HOST = "jbc.org";
    private static final String CANCERRES_AACJOURNALS_HOST = "cancerres.aacrjournals.org";
    private static final String JIMMUNOL_HOST = "jimmunol.org";
    private static final String HTTP = "http://";
    private static final String CONTENT_SUBPATH = "/content/";
    private static final Pattern PATTERN_FROM_URL;
    private static final Pattern BIBTEX_PATTERN;
    private static final String DOWNLOAD_URL_CSHLP_HOST;
    private static final String DOWNLOAD_URL_JBC_HOST;
    private static final String DOWNLOAD_URL_CANCERRES_AACJOURNALS_HOST;
    private static final String DOWNLOAD_URL_JIMMUNOL_HOST;
    private static final String SITE_URL = "http://cshperspectives.cshlp.org/";
    private static final String SITE_NAME = "Cold Spting Harbor Perspetives in Biology";
    private static final String info = "This scraper parses a publication page of citations from " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Pair<Pattern, Pattern>> patterns = new LinkedList();

    private static String getDownloadURLForHost(String str, String str2) {
        return HTTP + str + "/citmgr?type=bibtex&gca=" + str2 + FileManager.PATH_DELIMITER;
    }

    @Override // org.bibsonomy.scraper.generic.AbstractGenericFormatURLScraper
    protected String getDownloadURL(URL url, String str) throws ScrapingException {
        Matcher matcher = PATTERN_FROM_URL.matcher(url.getPath());
        if (!matcher.find()) {
            return null;
        }
        String group = matcher.group(1);
        return url.getHost().contains(CSHLP_HOST) ? DOWNLOAD_URL_CSHLP_HOST + group : url.getHost().contains(JBC_HOST) ? DOWNLOAD_URL_JBC_HOST + group : getDownloadURLForHost(url, str);
    }

    private String getDownloadURLForHost(URL url, String str) throws ScrapingException {
        try {
            Matcher matcher = BIBTEX_PATTERN.matcher(WebUtils.getContentAsString(url.toString(), str));
            if (!matcher.find()) {
                return null;
            }
            if (url.getHost().contains(CANCERRES_AACJOURNALS_HOST)) {
                return "http://cancerres.aacrjournals.org" + matcher.group(1);
            }
            if (url.getHost().contains(JIMMUNOL_HOST)) {
                return "http://jimmunol.org" + matcher.group(1);
            }
            return null;
        } catch (IOException e) {
            throw new ScrapingException(e);
        }
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    static {
        patterns.add(new Pair<>(Pattern.compile(".*cshperspectives.cshlp.org"), Pattern.compile(CONTENT_SUBPATH)));
        patterns.add(new Pair<>(Pattern.compile(".*jbc.org"), Pattern.compile(CONTENT_SUBPATH)));
        patterns.add(new Pair<>(Pattern.compile(".*cancerres.aacrjournals.org"), Pattern.compile(CONTENT_SUBPATH)));
        patterns.add(new Pair<>(Pattern.compile(".*jimmunol.org"), Pattern.compile(CONTENT_SUBPATH)));
        PATTERN_FROM_URL = Pattern.compile("/content/(.+?)\\.");
        BIBTEX_PATTERN = Pattern.compile("<a.*href=\"([^\"]+)\".*>BibTeX</a>");
        DOWNLOAD_URL_CSHLP_HOST = getDownloadURLForHost(CSHLP_HOST, "cshperspect");
        DOWNLOAD_URL_JBC_HOST = getDownloadURLForHost(JBC_HOST, "jbc");
        DOWNLOAD_URL_CANCERRES_AACJOURNALS_HOST = getDownloadURLForHost(CANCERRES_AACJOURNALS_HOST, "canres");
        DOWNLOAD_URL_JIMMUNOL_HOST = getDownloadURLForHost(JIMMUNOL_HOST, "jimmunol");
    }
}
