package org.bibsonomy.scraper.url.kde.worldcat;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.didion.jwnl.dictionary.database.DatabaseManagerImpl;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.converter.RisToBibtexConverter;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.ValidationUtils;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.id.ISBNUtils;
import org.springframework.beans.factory.BeanFactory;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.19.jar:org/bibsonomy/scraper/url/kde/worldcat/WorldCatScraper.class */
public class WorldCatScraper extends AbstractUrlScraper {
    private static final String WORLDCAT_URL = "http://www.worldcat.org/search?qt=worldcat_org_all&q=";
    private static final String SITE_URL = "http://www.worldcat.org/";
    private static final String SITE_NAME = "Worldcat";
    private static final String INFO = "Scraper for publications from " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Tuple<Pattern, Pattern>> patterns = Collections.singletonList(new Tuple(Pattern.compile(".*worldcat.org"), Pattern.compile("/oclc/")));
    private static final Pattern PATTERN_GET_FIRST_SEARCH_RESULT = Pattern.compile("<a href=\"([^\\\"]*brief_results)\">");
    private static final RisToBibtexConverter converter = new RisToBibtexConverter();

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            String bibtex2 = getBibtex(scrapingContext.getUrl(), false);
            if (!ValidationUtils.present(bibtex2)) {
                throw new ScrapingFailureException("getting bibtex failed");
            }
            scrapingContext.setBibtexResult(bibtex2);
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    public static String getBibtexByISBN(String str) throws IOException, ScrapingException {
        return getBibtex(new URL(WORLDCAT_URL + ISBNUtils.cleanISBN(str)), true);
    }

    public static String getBibtexByISSN(String str) throws IOException, ScrapingException {
        return getBibtex(new URL(WORLDCAT_URL + str), true);
    }

    public static URL getUrlForIsbn(String str) throws MalformedURLException {
        String extractISBN = ISBNUtils.extractISBN(str);
        if (ValidationUtils.present(extractISBN)) {
            return new URL(WORLDCAT_URL + extractISBN);
        }
        return null;
    }

    private static String getBibtex(URL url, boolean z) throws IOException, ScrapingException {
        Matcher matcher = PATTERN_GET_FIRST_SEARCH_RESULT.matcher(WebUtils.getContentAsString(url));
        URL url2 = matcher.find() ? new URL(url.getProtocol() + "://" + url.getHost() + matcher.group(1)) : url;
        String str = url2.getProtocol() + "://" + url2.getHost() + url2.getPath() + "?page=endnote&client=worldcat.org-detailed_record";
        if (z) {
            str = str + BeanFactory.FACTORY_BEAN_PREFIX + url2.getQuery();
        }
        String contentAsString = WebUtils.getContentAsString(new URL(str));
        if (contentAsString.startsWith("TY")) {
            return BibTexUtils.addFieldIfNotContained(converter.RisToBibtex(contentAsString), DatabaseManagerImpl.URL, url.toString());
        }
        return null;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
