package org.bibsonomy.scraper.url.kde.jstor;

import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.xml.serializer.SerializerConstants;
import org.bibsonomy.common.Pair;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.ValidationUtils;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.BeanFactory;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.46.jar:org/bibsonomy/scraper/url/kde/jstor/JStorScraper.class */
public class JStorScraper extends AbstractUrlScraper {
    private static final String JSTOR_HOST = "jstor.org";
    private static final String JSTOR_HOST_NAME = "http://www.jstor.org";
    private static final String JSTOR_ABSTRACT_PATH = "/pss/";
    private static final String JSTOR_EXPORT_PATH = "/action/exportSingleCitation";
    private static final String JSTOR_STABLE_PATH = "/stable/";
    private static final String JSTOR_DISCOVER_PATH = "/discover/";
    private static final String EXPORT_PAGE_URL = "https://www.jstor.org/action/exportSingleCitation?singleCitation=true&doi=";
    private static final String info = "This Scraper parses a publication from " + href("http://www.jstor.org/", "JSTOR");
    private static final Pattern PAGE_CONTENT_DOI_PATTERN = Pattern.compile("(?m)<div id=\"doi\" class=\"hide\">([^>]+?)<");
    private static final Pattern EXPORT_URL_DOI_PATTERN = Pattern.compile("doi=([^\\&]++)");
    private static final Pattern EXPORT_LINK_PATTERN = Pattern.compile("href=\"([^\"]++).*?id=\"export\"");
    private static final Pattern SUBMIT_ACTION_NODOI_PATTERN = Pattern.compile("<input.*?id=\"noDoi\".*?value=\"([^\"]++)\"");
    private static final Pattern SUBMIT_ACTION_DOI_PATTERN = Pattern.compile("<input.*?name=\"doi\".*?value=\"([^\"]*+)\"");
    private static final Pattern NUMBER_CITS_EXPORTED_PATTERN = Pattern.compile("NUMBER OF CITATIONS : (\\d++)");
    private static final List<Pair<Pattern, Pattern>> patterns = new LinkedList();
    private static final Pattern SUBMIT_ACTION_PATTERN = Pattern.compile("href=\"javascript:submitActionInNewWindow[^']++'([^']++)");

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        String replace;
        String contentAsString;
        String group;
        scrapingContext.setScraper(this);
        HttpClient httpClient = WebUtils.getHttpClient();
        try {
            GetMethod getMethod = new GetMethod(scrapingContext.getUrl().toExternalForm());
            String contentAsString2 = WebUtils.getContentAsString(httpClient, getMethod);
            if (contentAsString2 == null) {
                throw new ScrapingException("Cannot access requested location");
            }
            Matcher matcher = SUBMIT_ACTION_PATTERN.matcher(contentAsString2);
            if (matcher.find()) {
                group = matcher.group(1);
                contentAsString = contentAsString2;
            } else {
                Matcher matcher2 = EXPORT_URL_DOI_PATTERN.matcher(scrapingContext.getUrl().toExternalForm());
                if (matcher2.find()) {
                    replace = EXPORT_PAGE_URL + matcher2.group(1);
                } else {
                    Matcher matcher3 = PAGE_CONTENT_DOI_PATTERN.matcher(contentAsString2);
                    if (matcher3.find()) {
                        replace = EXPORT_PAGE_URL + matcher3.group(1);
                    } else {
                        Matcher matcher4 = EXPORT_LINK_PATTERN.matcher(contentAsString2);
                        if (!matcher4.find()) {
                            throw new ScrapingException("Cannot continue. JStor Scraper must get updated");
                        }
                        replace = matcher4.group(1).replace(SerializerConstants.ENTITY_AMP, BeanFactory.FACTORY_BEAN_PREFIX);
                    }
                }
                if (!ValidationUtils.present(replace)) {
                    throw new ScrapingException("Cannot continue, finally not having submit action");
                }
                getMethod = new GetMethod(replace);
                contentAsString = WebUtils.getContentAsString(httpClient, getMethod);
                Matcher matcher5 = SUBMIT_ACTION_PATTERN.matcher(contentAsString);
                if (!matcher5.find()) {
                    throw new ScrapingException("Downloaded export page but didn't find submit action");
                }
                group = matcher5.group(1);
            }
            Matcher matcher6 = SUBMIT_ACTION_NODOI_PATTERN.matcher(contentAsString);
            Matcher matcher7 = SUBMIT_ACTION_DOI_PATTERN.matcher(contentAsString);
            if (!matcher6.find() || !matcher7.find()) {
                throw new ScrapingException("Couldn't get required data for export form");
            }
            String group2 = matcher6.group(1);
            URI uri = "noDoi".equalsIgnoreCase(group2) ? getMethod.getURI() : new URI(group, true);
            PostMethod postMethod = new PostMethod();
            postMethod.setURI(uri);
            postMethod.addParameter("redirectUri", getMethod.getPath());
            postMethod.addParameter("noDoi", group2);
            postMethod.addParameter("doi", matcher7.group(1));
            String postContentAsString = WebUtils.getPostContentAsString(httpClient, postMethod);
            if (!ValidationUtils.present(postContentAsString)) {
                throw new ScrapingException("Could not submit export form");
            }
            Matcher matcher8 = NUMBER_CITS_EXPORTED_PATTERN.matcher(postContentAsString);
            if (!matcher8.find()) {
                throw new ScrapingException("no citations received");
            }
            int parseInt = Integer.parseInt(matcher8.group(1));
            if (parseInt < 1) {
                throw new ScrapingException("received " + parseInt + " citations");
            }
            scrapingContext.setBibtexResult(postContentAsString);
            return true;
        } catch (IOException e) {
            throw new ScrapingException(e);
        }
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return "JSTOR";
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return JSTOR_HOST_NAME;
    }

    static {
        Pattern compile = Pattern.compile(".*jstor.org");
        patterns.add(new Pair<>(compile, Pattern.compile("/pss/.*")));
        patterns.add(new Pair<>(compile, Pattern.compile("/action/exportSingleCitation.*")));
        patterns.add(new Pair<>(compile, Pattern.compile("/stable/.*")));
        patterns.add(new Pair<>(compile, Pattern.compile("/discover/.*")));
    }
}
