package org.bibsonomy.scraper.url.kde.aip;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.ConnectException;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.TagStringUtils;
import org.bibsonomy.util.WebUtils;

/* loaded from: input_file:org/bibsonomy/scraper/url/kde/aip/AipScitationScraper.class */
public class AipScitationScraper extends AbstractUrlScraper {
    private static final String URL_AIP_CITATION_BIBTEX_PAGE_PATH = "/getabs/servlet/GetCitation";
    private static final String URL_AIP_CITATION_BIBTEX_PAGE = "http://scitation.aip.org/getabs/servlet/GetCitation?";
    private static final String URL_SPIE_AIP_CITATION_BIBTEX_PAGE = "http://spiedl.aip.org/getabs/servlet/GetCitation?";
    private static final String URL_DOI = "http://dx.doi.org/";
    private static final String HTML_INPUT_NAME_FN_AND_VALUE = "fn=view_bibtex2";
    private static final String HTML_INPUT_NAME_DOWNLOADCITATION_AND_VALUE = "downloadcitation=+Go+";
    private static final String HTML_INPUT_NAME_SOURCE = "source";
    private static final String HTML_INPUT_NAME_PREFTYPE = "PrefType";
    private static final String HTML_INPUT_NAME_PREFACTION = "PrefAction";
    private static final String HTML_INPUT_NAME_SELECTCHECK = "SelectCheck";
    private static final String LINK_BEFORE_DOI = "<a href=\"http://scitation.aip.org/jhtml/doi.jsp\">doi:</a>";
    private static final String SITE_URL = "http://scitation.aip.org/";
    private static final String SITE_NAME = "AIP Scitation";
    private static final String INFO = "Extracts publications from " + href(SITE_URL, SITE_NAME) + ". Publications can be entered as a selected BibTeX snippet or by posting the page of the reference.";
    private static final Pattern hostPattern = Pattern.compile(".*aip.org");
    private static final Pattern pathPattern = AbstractUrlScraper.EMPTY_PATTERN;
    private static final String AIP_CONTENT_TYPE_HTML = "<!DOCTYPE html";
    private static final Pattern AIP_CONTENT_TYPE_HTML_PATTERN = Pattern.compile(AIP_CONTENT_TYPE_HTML);
    private static final Pattern inputPattern = Pattern.compile("<input(.*)>");
    private static final Pattern valuePattern = Pattern.compile("value=\"([^\"]*)\"");
    private static final Pattern namePattern = Pattern.compile("name=\"([^\"]*)\"");
    private static final Pattern keywordsPattern = Pattern.compile("keywords = \\{[^\\}]*\\}");
    private static final List<Tuple<Pattern, Pattern>> patterns = Collections.singletonList(new Tuple(hostPattern, pathPattern));

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        if (scrapingContext.getSelectedText() != null && scrapingContext.getUrl().getPath().startsWith(URL_AIP_CITATION_BIBTEX_PAGE_PATH) && scrapingContext.getUrl().toString().contains(HTML_INPUT_NAME_FN_AND_VALUE)) {
            scrapingContext.setBibtexResult(cleanKeywords(scrapingContext.getSelectedText()));
            return true;
        }
        try {
            String contentAsString = WebUtils.getContentAsString(scrapingContext.getUrl().toString());
            String str = null;
            Matcher matcher = Pattern.compile("name=\"SelectCheck\"\\s+value=\"(\\w+)").matcher(contentAsString);
            Matcher matcher2 = Pattern.compile("[(]'Download',\\W{0,}'(\\w+)'[)]").matcher(contentAsString);
            if (matcher2.find()) {
                str = matcher2.group(1);
            } else if (matcher.find()) {
                str = matcher.group(1);
            }
            if (!AIP_CONTENT_TYPE_HTML_PATTERN.matcher(contentAsString).find()) {
                scrapingContext.setBibtexResult(cleanKeywords(contentAsString));
                return true;
            }
            StringBuffer bibTeXDownloadURL = getBibTeXDownloadURL(WebUtils.getContentAsString("http://scitation.aip.org/journals/help_system/getabs/actions/download_citation_form.jsp"), URL_AIP_CITATION_BIBTEX_PAGE, str);
            if (bibTeXDownloadURL == null) {
                String substring = contentAsString.substring(contentAsString.indexOf(LINK_BEFORE_DOI) + LINK_BEFORE_DOI.length());
                bibTeXDownloadURL = getBibTeXDownloadURL(WebUtils.getContentAsString(URL_DOI + substring.substring(0, substring.indexOf("\n"))), URL_SPIE_AIP_CITATION_BIBTEX_PAGE, str);
            }
            if (bibTeXDownloadURL == null) {
                throw new ScrapingFailureException("getting bibtex failed");
            }
            scrapingContext.setBibtexResult(cleanKeywords(WebUtils.getContentAsString(bibTeXDownloadURL.toString())));
            return true;
        } catch (ConnectException e) {
            throw new InternalFailureException(e);
        } catch (IOException e2) {
            throw new InternalFailureException(e2);
        }
    }

    private String getInputValue(String str, String str2) throws UnsupportedEncodingException {
        String str3 = null;
        Matcher matcher = valuePattern.matcher(str);
        if (matcher.find()) {
            str3 = str2 + "=" + URLEncoder.encode(matcher.group(1), "UTF-8");
        }
        return str3;
    }

    private String cleanKeywords(String str) {
        String str2 = str;
        Matcher matcher = keywordsPattern.matcher(str);
        if (matcher.find()) {
            String group = matcher.group();
            str2 = str.substring(0, str.indexOf(group)) + "keywords = {" + TagStringUtils.cleanTags(group.substring(12, group.length() - 1), true, ";", "_") + "}" + str.substring(str.indexOf(group) + group.length());
        }
        return str2;
    }

    private StringBuffer getBibTeXDownloadURL(String str, String str2, String str3) throws UnsupportedEncodingException {
        Matcher matcher = inputPattern.matcher(str);
        String str4 = null;
        String str5 = null;
        String str6 = null;
        String str7 = null;
        while (matcher.find()) {
            String group = matcher.group(1);
            Matcher matcher2 = namePattern.matcher(group);
            if (matcher2.find()) {
                String group2 = matcher2.group(1);
                if (group2.contains(HTML_INPUT_NAME_PREFACTION)) {
                    str4 = getInputValue(group, HTML_INPUT_NAME_PREFACTION);
                } else if (group2.contains(HTML_INPUT_NAME_PREFTYPE)) {
                    str5 = getInputValue(group, HTML_INPUT_NAME_PREFTYPE);
                } else if (group2.contains(HTML_INPUT_NAME_SELECTCHECK)) {
                    str6 = getInputValue(group, HTML_INPUT_NAME_SELECTCHECK);
                } else if (group2.contains(HTML_INPUT_NAME_SOURCE)) {
                    str7 = getInputValue(group, HTML_INPUT_NAME_SOURCE);
                }
            }
        }
        if (str6 == null || str6.equals("SelectCheck=null")) {
            str6 = "SelectCheck=" + str3;
        }
        StringBuffer stringBuffer = null;
        if (str7 != null && str5 != null && str4 != null && str6 != null) {
            stringBuffer = new StringBuffer(str2);
            stringBuffer.append(HTML_INPUT_NAME_FN_AND_VALUE);
            stringBuffer.append("&");
            stringBuffer.append(str4);
            stringBuffer.append("&");
            stringBuffer.append(str5);
            stringBuffer.append("&");
            stringBuffer.append(str6);
            stringBuffer.append("&");
            stringBuffer.append(str7);
            stringBuffer.append("&");
            stringBuffer.append(HTML_INPUT_NAME_DOWNLOADCITATION_AND_VALUE);
        }
        return stringBuffer;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
