package org.bibsonomy.scraper.url.kde.pubmed;

import java.io.IOException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.support.PropertiesBeanDefinitionReader;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.11.jar:org/bibsonomy/scraper/url/kde/pubmed/PubMedScraper.class */
public class PubMedScraper extends AbstractUrlScraper {
    private static final String HOST = "ncbi.nlm.nih.gov";
    private static final String PUBMED_EUTIL_HOST = "eutils.ncbi.nlm.nih.gov";
    private static final String UK_PUBMED_CENTRAL_HOST = "ukpmc.ac.uk";
    private static final String SITE_URL = "http://www.ncbi.nlm.nih.gov/";
    private static final String SITE_NAME = "PubMed";
    private static final String info = "This scraper parses a publication page of citations from " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Tuple<Pattern, Pattern>> patterns = new LinkedList();

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        String str = null;
        scrapingContext.setScraper(this);
        Matcher matcher = Pattern.compile("meta name=\"citation_pmid\" content=\"(\\d+)\"").matcher(scrapingContext.getPageContent());
        String url = scrapingContext.getUrl().toString();
        try {
            if (url.matches("(?im)^.+db=PubMed.+$")) {
                Matcher matcher2 = Pattern.compile("\\d+").matcher(scrapingContext.getUrl().getQuery());
                if (matcher2.find()) {
                    str = WebUtils.getContentAsString(new URL("http://www.hubmed.org/export/bibtex.cgi?uids=" + matcher2.group()));
                }
            } else if (scrapingContext.getPageContent().matches("(?ims)^.+PMID: (\\d*) .+$")) {
                Matcher matcher3 = Pattern.compile("(?ms)^.+PMID: (\\d*) .+$").matcher(scrapingContext.getPageContent());
                if (matcher3.find()) {
                    str = WebUtils.getContentAsString(new URL("http://www.hubmed.org/export/bibtex.cgi?uids=" + matcher3.group(1)));
                }
            } else if (matcher.find()) {
                str = WebUtils.getContentAsString(new URL("http://www.hubmed.org/export/bibtex.cgi?uids=" + matcher.group(1)));
            }
            Matcher matcher4 = Pattern.compile("url = \".*\"").matcher(str);
            if (matcher4.find()) {
                str = matcher4.replaceFirst("url = \"" + url.replace(PropertiesBeanDefinitionReader.CONSTRUCTOR_ARG_PREFIX, "\\$") + "\"");
            }
            if (str == null || "".equals(str)) {
                throw new ScrapingFailureException("getting bibtex failed");
            }
            scrapingContext.setBibtexResult(str);
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    static {
        patterns.add(new Tuple<>(Pattern.compile(".*ncbi.nlm.nih.gov"), AbstractUrlScraper.EMPTY_PATTERN));
        patterns.add(new Tuple<>(Pattern.compile(".*eutils.ncbi.nlm.nih.gov"), AbstractUrlScraper.EMPTY_PATTERN));
        patterns.add(new Tuple<>(Pattern.compile(".*ukpmc.ac.uk"), AbstractUrlScraper.EMPTY_PATTERN));
    }
}
