package org.bibsonomy.scraper.url.kde.apha;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.common.Pair;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.converter.RisToBibtexConverter;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.8.2.jar:org/bibsonomy/scraper/url/kde/apha/APHAScraper.class */
public class APHAScraper extends AbstractUrlScraper {
    private static final String AJPH_HOST = "ajph.aphapublications.org";
    private static final String NRCRESEACHPRESS_HOST = "nrcresearchpress.com";
    private static final String EMERALDINSIGHT_HOST = "emeraldinsight.com";
    private static final String HTTP = "http://";
    private static final List<Pattern> DOWNLOAD_URL;
    private final RisToBibtexConverter ris = new RisToBibtexConverter();
    private static final String SITE_URL = "http://ajph.aphapublications.org/";
    private static final String SITE_NAME = "American Journal of PUBLIC HEALTH";
    private static final String info = "This scraper parses a publication page of citations from " + href(SITE_URL, SITE_NAME) + ".";
    private static final Pattern DOI_PATTERN_FROM_URL = Pattern.compile("/abs/(.+?)$");
    private static final List<Pair<Pattern, Pattern>> patterns = new LinkedList();

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        String postContentAsString;
        scrapingContext.setScraper(this);
        try {
            String cookies = WebUtils.getCookies(scrapingContext.getUrl());
            String str = null;
            Matcher matcher = DOI_PATTERN_FROM_URL.matcher(scrapingContext.getUrl().toString());
            if (matcher.find()) {
                str = "doi=" + matcher.group(1);
            }
            if (str == null || cookies == null) {
                return false;
            }
            try {
                if (scrapingContext.getUrl().toString().contains(AJPH_HOST)) {
                    postContentAsString = WebUtils.getPostContentAsString(cookies, new URL(DOWNLOAD_URL.get(0).toString()), str);
                } else if (scrapingContext.getUrl().toString().contains(NRCRESEACHPRESS_HOST)) {
                    postContentAsString = WebUtils.getPostContentAsString(cookies, new URL(DOWNLOAD_URL.get(1).toString()), str + "&format=bibtex");
                    if (postContentAsString != null) {
                        scrapingContext.setBibtexResult(postContentAsString);
                        return true;
                    }
                } else {
                    postContentAsString = WebUtils.getPostContentAsString(cookies, new URL(DOWNLOAD_URL.get(2).toString()), str + "&format=bibtex");
                    if (postContentAsString != null) {
                        scrapingContext.setBibtexResult(postContentAsString);
                        return true;
                    }
                }
                String bibtex2 = this.ris.toBibtex(postContentAsString);
                if (bibtex2 == null) {
                    return false;
                }
                scrapingContext.setBibtexResult(bibtex2);
                return true;
            } catch (MalformedURLException e) {
                throw new ScrapingFailureException("URL to scrape does not exist. It may be malformed.");
            }
        } catch (IOException e2) {
            throw new ScrapingFailureException("An unexpected IO error has occurred. Maybe APHA or NRC Researchpress is down.");
        }
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    static {
        patterns.add(new Pair<>(Pattern.compile(".*ajph.aphapublications.org"), Pattern.compile("/doi/abs")));
        patterns.add(new Pair<>(Pattern.compile(".*nrcresearchpress.com"), Pattern.compile("/doi/abs")));
        patterns.add(new Pair<>(Pattern.compile(".*emeraldinsight.com"), Pattern.compile("/doi/abs")));
        DOWNLOAD_URL = new LinkedList();
        DOWNLOAD_URL.add(Pattern.compile("http://ajph.aphapublications.org/action/downloadCitation"));
        DOWNLOAD_URL.add(Pattern.compile("http://nrcresearchpress.com/action/downloadCitation"));
        DOWNLOAD_URL.add(Pattern.compile("http://emeraldinsight.com/action/downloadCitation"));
    }
}
