package org.bibsonomy.scraper.url.kde.iucr;

import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.PageNotSupportedException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.scraper.exceptions.UsageFailureException;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.6.0.jar:org/bibsonomy/scraper/url/kde/iucr/IucrScraper.class */
public class IucrScraper extends AbstractUrlScraper {
    private static final String USEAGE_FAILURE_MESSAGE = "Please open the publication in a new browser tab and post it again.";
    private static final String HOST = "iucr.org";
    private static final String HOST_JOURNAL_PREFIX = "journal";
    private static final String HOST_SCRIPTS_PREFIX = "scripts";
    private static final String DOWNLOAD_LINK_PART = "http://scripts.iucr.org/cgi-bin/biblio?Action=download&saveas=BIBTeX&cnor=";
    private static final Log log = LogFactory.getLog(IucrScraper.class);
    private static final String SITE_URL = "http://www.iucr.org/";
    private static final String SITE_NAME = "International Union of Crystallography";
    private static final String INFO = "Scraper for journals from the " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*iucr.org"), AbstractUrlScraper.EMPTY_PATTERN));
    private static final Pattern cnorPattern = Pattern.compile("<input name=\"cnor\" value=\"([^\"]*)\" type=\"hidden\">");
    private static final Pattern abstractPattern = Pattern.compile("<meta name=\"DC.description\" content=\"(.*) />");

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        if (scrapingContext.getUrl().getHost().startsWith(HOST_JOURNAL_PREFIX)) {
            throw new UsageFailureException(USEAGE_FAILURE_MESSAGE);
        }
        if (!scrapingContext.getUrl().getHost().startsWith(HOST_SCRIPTS_PREFIX)) {
            throw new PageNotSupportedException(PageNotSupportedException.DEFAULT_ERROR_MESSAGE + getClass().getName());
        }
        try {
            Matcher matcher = cnorPattern.matcher(scrapingContext.getPageContent());
            if (!matcher.find()) {
                throw new ScrapingFailureException("ID for donwload link is missing.");
            }
            String contentAsString = WebUtils.getContentAsString(new URL(DOWNLOAD_LINK_PART + matcher.group(1)));
            if (contentAsString == null) {
                throw new ScrapingFailureException("Bibtex download failed. Bibtex result is null.");
            }
            scrapingContext.setBibtexResult(BibTexUtils.addFieldIfNotContained(contentAsString, BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE, abstractParser(scrapingContext.getUrl())).replace("}\nkeywords={", "},\nkeywords={"));
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    private static String abstractParser(URL url) {
        try {
            Matcher matcher = abstractPattern.matcher(WebUtils.getContentAsString(url));
            if (matcher.find()) {
                return matcher.group(1);
            }
            return null;
        } catch (Exception e) {
            log.error("error while getting abstract for " + url, e);
            return null;
        }
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
