package org.bibsonomy.scraper.url.kde.jstor;

import com.hp.hpl.jena.sparql.sse.Tags;
import com.hp.hpl.jena.util.FileManager;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.ws.rs.core.HttpHeaders;
import org.bibsonomy.common.Pair;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.BeanFactory;

/* JADX WARN: Classes with same name are omitted:
  input_file:WEB-INF/lib/bibsonomy-scraper-2.0.29.jar:org/bibsonomy/scraper/url/kde/jstor/JStorScraper.class
 */
/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.30.jar:org/bibsonomy/scraper/url/kde/jstor/JStorScraper.class */
public class JStorScraper extends AbstractUrlScraper {
    private static final String JSTOR_HOST = "jstor.org";
    private static final String JSTOR_HOST_NAME = "http://www.jstor.org";
    private static final String JSTOR_ABSTRACT_PATH = "/pss/";
    private static final String JSTOR_EXPORT_PATH = "/action/exportSingleCitation";
    private static final String JSTOR_STABLE_PATH = "/stable/";
    private static final String JSTOR_DOWNLOAD_SUBMIT_ACTION_YESDOI = "https://www.jstor.org/action/downloadSingleCitationSec?format=bibtex&include=abs&singleCitation=true";
    private static final String EXPORT_PAGE_URL = "https://www.jstor.org/action/exportSingleCitation?singleCitation=true&suffix=";
    private static final String info = "This Scraper parses a publication from " + href("http://www.jstor.org/", "JSTOR");
    private static final Pattern INDEX_PATTERN_FOR_STABLE_PATH = Pattern.compile("/stable/((\\d{2}+\\.\\d++/)?\\d++)");
    private static final Pattern INDEX_PATTERN_FOR_ABSTRACT_PATH = Pattern.compile("/pss/(\\d++)");
    private static final Pattern EXPORT_LINK_PATTERN = Pattern.compile("href=\"([^\"]++).*?id=\"export\"");
    private static final Pattern SUBMIT_ACTION_NODOI_PATTERN = Pattern.compile("<input.*?id=\"noDoi\".*?value=\"([^\"]++)\"");
    private static final Pattern SUBMIT_ACTION_SUFFIX_PATTERN = Pattern.compile("<input.*?name=\"suffix\".*?value=\"([^\"]++)\"");
    private static final Pattern SUBMIT_ACTION_FILENAME_PATTERN = Pattern.compile("<input.*?name=\"downloadFileName\".*?value=\"([^\"]++)\"");
    private static final Pattern NUMBER_CITS_EXPORTED_PATTERN = Pattern.compile("NUMBER OF CITATIONS : (\\d++)");
    private static final List<Pair<Pattern, Pattern>> patterns = new LinkedList();

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        HashMap hashMap = new HashMap();
        String url = scrapingContext.getUrl().toString();
        URL url2 = null;
        if (url.contains(JSTOR_STABLE_PATH)) {
            Matcher matcher = INDEX_PATTERN_FOR_STABLE_PATH.matcher(url);
            if (!matcher.find()) {
                throw new ScrapingException("/stable/ path without id");
            }
            try {
                url2 = new URL(EXPORT_PAGE_URL + URLEncoder.encode(matcher.group(1), "UTF-8"));
                startSessionForURL(url2, hashMap);
            } catch (UnsupportedEncodingException e) {
                throw new ScrapingException(e);
            } catch (MalformedURLException e2) {
            }
        }
        if (url.contains(JSTOR_ABSTRACT_PATH)) {
            Matcher matcher2 = INDEX_PATTERN_FOR_ABSTRACT_PATH.matcher(url);
            if (!matcher2.find()) {
                throw new ScrapingException("/pss/ path without id");
            }
            try {
                url2 = new URL(EXPORT_PAGE_URL + matcher2.group(1));
                startSessionForURL(url2, hashMap);
            } catch (MalformedURLException e3) {
            }
        }
        if (url.contains(JSTOR_EXPORT_PATH)) {
            url2 = scrapingContext.getUrl();
            if (url2.getProtocol().equalsIgnoreCase("http")) {
                try {
                    url2 = new URL("https", url2.getHost(), url2.getFile());
                } catch (MalformedURLException e4) {
                }
            }
            startSessionForURL(url2, hashMap);
        }
        String submitExportPage = submitExportPage(url2, hashMap);
        Matcher matcher3 = NUMBER_CITS_EXPORTED_PATTERN.matcher(submitExportPage);
        if (!matcher3.find()) {
            throw new ScrapingException("no citations received");
        }
        int parseInt = Integer.parseInt(matcher3.group(1));
        if (parseInt < 1) {
            throw new ScrapingException("received " + parseInt + " citations");
        }
        scrapingContext.setBibtexResult(submitExportPage);
        return true;
    }

    private String getContent(URL url, String str) throws IOException {
        HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
        httpURLConnection.setAllowUserInteraction(false);
        httpURLConnection.setDoInput(true);
        httpURLConnection.setDoOutput(false);
        httpURLConnection.setUseCaches(false);
        httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)");
        httpURLConnection.setRequestProperty(HttpHeaders.COOKIE, str);
        httpURLConnection.connect();
        StringWriter stringWriter = new StringWriter();
        BufferedInputStream bufferedInputStream = new BufferedInputStream(httpURLConnection.getInputStream());
        while (true) {
            int read = bufferedInputStream.read();
            if (read < 0) {
                httpURLConnection.disconnect();
                return stringWriter.toString();
            }
            stringWriter.write(read);
        }
    }

    private String getCookies(URL url) throws IOException {
        HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
        httpURLConnection.setAllowUserInteraction(false);
        httpURLConnection.setDoInput(true);
        httpURLConnection.setDoOutput(false);
        httpURLConnection.setUseCaches(false);
        httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)");
        httpURLConnection.setRequestProperty(HttpHeaders.COOKIE, "I2KBRCK=1");
        httpURLConnection.connect();
        List<String> list = httpURLConnection.getHeaderFields().get(HttpHeaders.SET_COOKIE);
        StringBuffer stringBuffer = new StringBuffer();
        for (String str : list) {
            stringBuffer.append(str.substring(0, str.indexOf(FileManager.PATH_DELIMITER) + 1) + " ");
        }
        stringBuffer.append("I2KBRCK=1");
        httpURLConnection.disconnect();
        return stringBuffer.toString();
    }

    private static String cookiesMap2String(Map<String, String> map) {
        StringBuffer stringBuffer = new StringBuffer();
        for (String str : map.keySet()) {
            stringBuffer.append(str);
            stringBuffer.append('=');
            stringBuffer.append(map.get(str));
            stringBuffer.append("; ");
        }
        return stringBuffer.toString();
    }

    private static void startSessionForURL(URL url, Map<String, String> map) throws ScrapingException {
        HttpURLConnection httpURLConnection = null;
        HttpURLConnection httpURLConnection2 = null;
        try {
            try {
                httpURLConnection = (HttpURLConnection) url.openConnection();
                httpURLConnection.setInstanceFollowRedirects(false);
                httpURLConnection.connect();
                for (String str : httpURLConnection.getHeaderFields().get(HttpHeaders.SET_COOKIE)) {
                    String[] split = str.substring(0, str.indexOf(59)).split(Tags.symEQ);
                    map.put(split[0], split[1]);
                }
                httpURLConnection2 = (HttpURLConnection) new URL(httpURLConnection.getHeaderFields().get(HttpHeaders.LOCATION).get(0)).openConnection();
                httpURLConnection2.setInstanceFollowRedirects(false);
                httpURLConnection2.addRequestProperty(HttpHeaders.COOKIE, cookiesMap2String(map));
                httpURLConnection2.connect();
                for (String str2 : httpURLConnection2.getHeaderFields().get(HttpHeaders.SET_COOKIE)) {
                    String[] split2 = str2.substring(0, str2.indexOf(59)).split(Tags.symEQ);
                    map.put(split2[0], split2[1]);
                }
                httpURLConnection2.getHeaderFields().get(HttpHeaders.LOCATION).get(0);
                if (httpURLConnection != null) {
                    httpURLConnection.disconnect();
                }
                if (httpURLConnection2 != null) {
                    httpURLConnection2.disconnect();
                }
            } catch (IOException e) {
                throw new ScrapingException(e);
            }
        } catch (Throwable th) {
            if (httpURLConnection != null) {
                httpURLConnection.disconnect();
            }
            if (httpURLConnection2 != null) {
                httpURLConnection2.disconnect();
            }
            throw th;
        }
    }

    private static String submitExportPage(URL url, Map<String, String> map) throws ScrapingException {
        InputStream inputStream = null;
        HttpURLConnection httpURLConnection = null;
        try {
            try {
                HttpURLConnection httpURLConnection2 = (HttpURLConnection) url.openConnection();
                httpURLConnection2.setInstanceFollowRedirects(false);
                httpURLConnection2.addRequestProperty(HttpHeaders.COOKIE, cookiesMap2String(map));
                httpURLConnection2.connect();
                for (String str : httpURLConnection2.getHeaderFields().get(HttpHeaders.SET_COOKIE)) {
                    String[] split = str.substring(0, str.indexOf(59)).split(Tags.symEQ);
                    map.put(split[0], split[1]);
                }
                InputStream inputStream2 = httpURLConnection2.getInputStream();
                BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream2);
                StringWriter stringWriter = new StringWriter();
                while (true) {
                    int read = bufferedInputStream.read();
                    if (read < 0) {
                        break;
                    }
                    stringWriter.write(read);
                }
                String stringWriter2 = stringWriter.toString();
                Matcher matcher = SUBMIT_ACTION_NODOI_PATTERN.matcher(stringWriter2);
                Matcher matcher2 = SUBMIT_ACTION_SUFFIX_PATTERN.matcher(stringWriter2);
                Matcher matcher3 = SUBMIT_ACTION_FILENAME_PATTERN.matcher(stringWriter2);
                if (!matcher.find() || !matcher2.find() || !matcher3.find()) {
                    throw new ScrapingException("noDoi flag not found");
                }
                String group = matcher.group(1);
                String postContentAsString = WebUtils.getPostContentAsString(cookiesMap2String(map), "noDoi".equalsIgnoreCase(group) ? url : new URL(JSTOR_DOWNLOAD_SUBMIT_ACTION_YESDOI), "redirectUri=" + URLEncoder.encode(url.getFile(), "UTF-8") + "&noDoi=" + group + "&suffix=" + matcher2.group(1) + "&downloadFileName=" + matcher3.group(1));
                if (inputStream2 != null) {
                    try {
                        inputStream2.close();
                    } catch (IOException e) {
                    }
                }
                if (httpURLConnection2 != null) {
                    httpURLConnection2.disconnect();
                }
                return postContentAsString;
            } catch (IOException e2) {
                throw new ScrapingException(e2);
            }
        } catch (Throwable th) {
            if (0 != 0) {
                try {
                    inputStream.close();
                } catch (IOException e3) {
                }
            }
            if (0 != 0) {
                httpURLConnection.disconnect();
            }
            throw th;
        }
    }

    private static URL getExportLinkAsURL(URL url, Map<String, String> map) throws ScrapingException {
        HttpURLConnection httpURLConnection = null;
        InputStream inputStream = null;
        try {
            try {
                startSessionForURL(url, map);
                HttpURLConnection httpURLConnection2 = (HttpURLConnection) url.openConnection();
                httpURLConnection2.setInstanceFollowRedirects(false);
                httpURLConnection2.addRequestProperty(HttpHeaders.COOKIE, cookiesMap2String(map));
                httpURLConnection2.connect();
                for (String str : httpURLConnection2.getHeaderFields().get(HttpHeaders.SET_COOKIE)) {
                    String[] split = str.substring(0, str.indexOf(59)).split(Tags.symEQ);
                    map.put(split[0], split[1]);
                }
                StringWriter stringWriter = new StringWriter();
                BufferedInputStream bufferedInputStream = new BufferedInputStream(httpURLConnection2.getInputStream());
                while (true) {
                    int read = bufferedInputStream.read();
                    if (read < 0) {
                        break;
                    }
                    stringWriter.write(read);
                }
                Matcher matcher = EXPORT_LINK_PATTERN.matcher(stringWriter.toString());
                if (!matcher.find()) {
                    throw new ScrapingException("Exportlink not found");
                }
                URL url2 = new URL(matcher.group(1).replace("&amp;", BeanFactory.FACTORY_BEAN_PREFIX));
                if (bufferedInputStream != null) {
                    try {
                        bufferedInputStream.close();
                    } catch (IOException e) {
                    }
                }
                if (httpURLConnection2 != null) {
                    httpURLConnection2.disconnect();
                }
                return url2;
            } catch (IOException e2) {
                throw new ScrapingException(e2);
            }
        } catch (Throwable th) {
            if (0 != 0) {
                try {
                    inputStream.close();
                } catch (IOException e3) {
                }
            }
            if (0 != 0) {
                httpURLConnection.disconnect();
            }
            throw th;
        }
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return "JSTOR";
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return JSTOR_HOST_NAME;
    }

    static {
        Pattern compile = Pattern.compile(".*jstor.org");
        patterns.add(new Pair<>(compile, Pattern.compile("/pss/.*")));
        patterns.add(new Pair<>(compile, Pattern.compile("/action/exportSingleCitation.*")));
        patterns.add(new Pair<>(compile, Pattern.compile("/stable/.*")));
    }
}
