package org.bibsonomy.scraper.url.kde.arxiv;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.common.Pair;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.converter.OAIConverter;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.24.jar:org/bibsonomy/scraper/url/kde/arxiv/ArxivScraper.class */
public class ArxivScraper extends AbstractUrlScraper {
    private static final String SITE_URL = "http://arxiv.org/";
    private static final String SITE_NAME = "arXiv";
    private static final String info = "This scraper parses a publication page from " + href(SITE_URL, SITE_NAME) + ".";
    private static final Pattern patternID = Pattern.compile("abs/([^?]*)");
    private static final String ARXIV_HOST = "arxiv.org";
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(ARXIV_HOST), AbstractUrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        if (scrapingContext.getUrl() == null || !scrapingContext.getUrl().getHost().endsWith(ARXIV_HOST)) {
            return false;
        }
        try {
            scrapingContext.setScraper(this);
            Matcher matcher = patternID.matcher(scrapingContext.getUrl().toString());
            if (!matcher.find()) {
                throw new ScrapingFailureException("no arxiv id found in URL");
            }
            String group = matcher.group(1);
            String convert = OAIConverter.convert(WebUtils.getContentAsString("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:" + group + "&metadataPrefix=oai_dc"));
            scrapingContext.setBibtexResult(convert.contains("note = {") ? convert.replace("note = {", "note = {cite arxiv:" + group + "\n") : convert.replaceFirst("},", "},\nnote = {cite arxiv:" + group + "},"));
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
