package org.bibsonomy.scraper.url.kde.acl;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import net.didion.jwnl.dictionary.database.DatabaseManagerImpl;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.PageNotSupportedException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.25.jar:org/bibsonomy/scraper/url/kde/acl/AclScraper.class */
public class AclScraper extends AbstractUrlScraper {
    private static final String ERROR_CODE_300 = "<TITLE>300 Multiple Choices</TITLE>";
    private static final String SITE_URL = "http://aclweb.org/";
    private static final String SITE_NAME = "Association for Computational Linguistics";
    private static final String INFO = "Scraper for (PDF) references from " + href(SITE_URL, SITE_NAME) + ".";
    private static final Pattern hostPattern = Pattern.compile(".*aclweb.org");
    private static final Pattern pathPattern = Pattern.compile("^/anthology-new.*\\.pdf$");
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(hostPattern, pathPattern));

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    public boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        String url = scrapingContext.getUrl().toString();
        try {
            String contentAsString = WebUtils.getContentAsString(url.substring(0, url.length() - 4) + ".bib");
            if (contentAsString == null) {
                throw new ScrapingFailureException("getting bibtex failed");
            }
            if (contentAsString.contains(ERROR_CODE_300)) {
                throw new PageNotSupportedException("This aclweb.org page is not supported. BibTeX is not available.");
            }
            scrapingContext.setBibtexResult(BibTexUtils.addFieldIfNotContained(contentAsString, DatabaseManagerImpl.URL, scrapingContext.getUrl().toString()));
            return true;
        } catch (MalformedURLException e) {
            throw new InternalFailureException(e);
        } catch (IOException e2) {
            throw new InternalFailureException(e2);
        }
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
