package org.bibsonomy.scraper.url.kde.karlsruhe;

import com.hp.hpl.jena.sparql.ARQConstants;
import com.hp.hpl.jena.sparql.sse.Tags;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.didion.jwnl.dictionary.database.DatabaseManagerImpl;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.PageNotSupportedException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.BeanFactory;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.23.jar:org/bibsonomy/scraper/url/kde/karlsruhe/UBKAScraper.class */
public class UBKAScraper extends AbstractUrlScraper {
    private static final String UBKA_HOST_NAME = "http://www.ubka.uni-karlsruhe.de";
    private static final String UBKA_HOST = "ubka.uni-karlsruhe.de";
    private static final String UBKA_SEARCH_NAME = "http://www.ubka.uni-karlsruhe.de/hylib-bin/suche.cgi";
    private static final String UBKA_SEARCH_PATH = "/hylib-bin/suche.cgi";
    private static final String UBKA_PARAM_BIBTEX = "bibtex=1";
    private static final String UBKA_PARAM_OPACDB = "opacdb=UBKA_OPAC";
    private static final String UBKA_PARAM_PRINTMAB = "printMAB=1";
    private static final String UBKA_PARAM_ND = "nd";
    private static final String UBKA_BIB_PATTERN = ".*<td valign=\"top\"\\s*>\\s*(@[A-Za-z]+&nbsp;\\s*\\{.+}\\s).*";
    private static final String UBKA_COMMA_PATTERN = "(.*keywords\\s*=\\s*\\{)(.*?)(\\},?<br>.*)";
    private static final String UBKA_SPACE_PATTERN = "&nbsp;";
    private static final String UBKA_BREAK_PATTERN = "<br>";
    private static final String SITE_URL = "http://www.ubka.uni-karlsruhe.de/";
    private static final String SITE_NAME = "University Library (UB) Karlsruhe";
    private static final String info = "This scraper parses a publication page from the " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Tuple<Pattern, Pattern>> patterns = Collections.singletonList(new Tuple(Pattern.compile(".*ubka.uni-karlsruhe.de"), AbstractUrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        String extractBibtexFromUBKA;
        scrapingContext.setScraper(this);
        if (!UBKA_SEARCH_PATH.equals(scrapingContext.getUrl().getPath())) {
            throw new PageNotSupportedException("This UBKA URL is not supported!");
        }
        if (scrapingContext.getUrl().getQuery().contains(UBKA_PARAM_BIBTEX)) {
            extractBibtexFromUBKA = extractBibtexFromUBKA(scrapingContext.getPageContent());
        } else {
            try {
                extractBibtexFromUBKA = extractBibtexFromUBKA(WebUtils.getContentAsString(new URL("http://www.ubka.uni-karlsruhe.de/hylib-bin/suche.cgi?opacdb=UBKA_OPAC&nd=" + extractQueryParamValue(scrapingContext.getUrl().getQuery(), UBKA_PARAM_ND) + BeanFactory.FACTORY_BEAN_PREFIX + UBKA_PARAM_PRINTMAB + BeanFactory.FACTORY_BEAN_PREFIX + UBKA_PARAM_BIBTEX)));
            } catch (IOException e) {
                throw new InternalFailureException(e);
            }
        }
        if (extractBibtexFromUBKA == null) {
            throw new ScrapingFailureException("getting bibtex failed");
        }
        scrapingContext.setBibtexResult(BibTexUtils.addFieldIfNotContained(extractBibtexFromUBKA, DatabaseManagerImpl.URL, scrapingContext.getUrl().toString()));
        scrapingContext.setScraper(this);
        return true;
    }

    private String extractBibtexFromUBKA(String str) throws ScrapingException {
        try {
            Matcher matcher = Pattern.compile(UBKA_BIB_PATTERN, 40).matcher(Pattern.compile(UBKA_BREAK_PATTERN).matcher(str).replaceAll(""));
            if (!matcher.matches()) {
                return null;
            }
            String replaceAll = Pattern.compile(UBKA_SPACE_PATTERN).matcher(matcher.group(1)).replaceAll(" ");
            Matcher matcher2 = Pattern.compile(UBKA_COMMA_PATTERN, 40).matcher(replaceAll);
            if (matcher2.matches()) {
                replaceAll = matcher2.group(1) + matcher2.group(2).replaceAll(",", " ") + matcher2.group(3);
            }
            return replaceAll;
        } catch (PatternSyntaxException e) {
            throw new InternalFailureException(e);
        }
    }

    private String extractQueryParamValue(String str, String str2) throws ScrapingException {
        StringTokenizer stringTokenizer = new StringTokenizer(str, "&=", true);
        Properties properties = new Properties();
        String str3 = null;
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (!ARQConstants.anonVarMarker.equals(nextToken) && !BeanFactory.FACTORY_BEAN_PREFIX.equals(nextToken)) {
                if (Tags.symEQ.equals(nextToken)) {
                    try {
                        properties.setProperty(URLDecoder.decode(str3, "UTF-8"), URLDecoder.decode(stringTokenizer.nextToken(), "UTF-8"));
                    } catch (UnsupportedEncodingException e) {
                        throw new InternalFailureException(e);
                    }
                } else {
                    str3 = nextToken;
                }
            }
        }
        return (String) properties.get(str2);
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
