package org.bibsonomy.scraper.url.kde.ssrn;

import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.XmlUtils;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.8.0.jar:org/bibsonomy/scraper/url/kde/ssrn/SSRNScraper.class */
public class SSRNScraper extends AbstractUrlScraper {
    private static final String SSRN_HOST_NAME = "http://papers.ssrn.com";
    private static final String SSRN_ABSTRACT_PATH = "/sol3/papers.cfm?abstract_id=";
    private static final String SSRN_BIBTEX_PATH = "/sol3/RefExport.cfm";
    private static final String SSRN_BIBTEX_PARAMS = "?function=download&format=2&abstract_id=";
    private static final String AUTHOR_PATTERN = "author\\s*=\\s*[{]+(.+)[}]+";
    private static final String EDITOR_PATTERN = "editor\\s*=\\s*[{]+(.+)[}]+";
    private static final String TITLE_PATTERN = "title\\s*=\\s*[{]+(.+)[}]+";
    private static final String YEAR_PATTERN = "year\\s*=\\s*[{]+(.+)[}]+";
    private static final String HOST = "ssrn.com";
    private static final Log log = LogFactory.getLog(SSRNScraper.class);
    private static final String SITE_URL = "http://papers.ssrn.com/";
    private static final String SITE_NAME = "SSRN";
    private static final String INFO = "This Scraper parses a publication from " + href(SITE_URL, SITE_NAME) + "and extracts the adequate BibTeX entry.";
    private static final Pattern ABSTRACT_PATTERN = Pattern.compile("<div id=\"abstract\">(.*)</div>");
    private static final List<Pair<Pattern, Pattern>> patterns = new LinkedList();

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        String url = scrapingContext.getUrl().toString();
        if (!url.startsWith(SSRN_HOST_NAME)) {
            return false;
        }
        String substring = url.startsWith("http://papers.ssrn.com/sol3/papers.cfm?abstract_id=") ? url.substring(url.indexOf(SSRN_ABSTRACT_PATH) + SSRN_ABSTRACT_PATH.length()) : null;
        if (url.startsWith("http://papers.ssrn.com/sol3/RefExport.cfm")) {
            substring = url.substring(url.indexOf("/sol3/RefExport.cfm?abstract_id=") + "/sol3/RefExport.cfm?abstract_id=".length(), url.indexOf("&function"));
        }
        if (substring == null) {
            throw new ScrapingFailureException("ID for donwload link is missing.");
        }
        String str = "http://papers.ssrn.com/sol3/RefExport.cfm?function=download&format=2&abstract_id=" + substring;
        try {
            String str2 = null;
            try {
                NodeList elementsByTagName = XmlUtils.getDOM(WebUtils.getContentAsString(new URL(str), getCookies(scrapingContext.getUrl()))).getElementsByTagName("input");
                for (int i = 0; i < elementsByTagName.getLength(); i++) {
                    NamedNodeMap attributes = elementsByTagName.item(i).getAttributes();
                    if (attributes.getNamedItem("value") != null) {
                        String replaceAll = attributes.getNamedItem("value").getNodeValue().replaceAll("},", "},\n");
                        str2 = replaceAll.replaceFirst(MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR, generateBibtexKey(replaceAll) + ",\n ");
                    }
                }
                if (str2 == null) {
                    return false;
                }
                scrapingContext.setBibtexResult(BibTexUtils.addFieldIfNotContained(str2, BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE, abstractParser(scrapingContext.getUrl())));
                scrapingContext.setScraper(this);
                return true;
            } catch (MalformedURLException e) {
                throw new InternalFailureException("The url " + str + " is not valid");
            } catch (IOException e2) {
                throw new ScrapingFailureException("BibTex download failed. Result is null!");
            }
        } catch (IOException e3) {
            throw new InternalFailureException("Could not store cookies from " + scrapingContext.getUrl());
        }
    }

    private static String abstractParser(URL url) {
        try {
            Matcher matcher = ABSTRACT_PATTERN.matcher(WebUtils.getContentAsString(url));
            if (matcher.find()) {
                return matcher.group(1);
            }
            return null;
        } catch (Exception e) {
            log.error("error while getting abstract for " + url, e);
            return null;
        }
    }

    private String generateBibtexKey(String str) {
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = null;
        Matcher matcher = Pattern.compile(AUTHOR_PATTERN).matcher(str);
        if (matcher.find()) {
            str2 = matcher.group(1);
        }
        Matcher matcher2 = Pattern.compile(EDITOR_PATTERN).matcher(str);
        if (matcher2.find()) {
            str3 = matcher2.group(1);
        }
        Matcher matcher3 = Pattern.compile(TITLE_PATTERN).matcher(str);
        if (matcher3.find()) {
            str5 = matcher3.group(1);
        }
        Matcher matcher4 = Pattern.compile(YEAR_PATTERN).matcher(str);
        if (matcher4.find()) {
            str4 = matcher4.group(1);
        }
        return BibTexUtils.generateBibtexKey(str2, str3, str4, str5);
    }

    private String getCookies(URL url) throws IOException {
        StringBuffer stringBuffer = new StringBuffer(WebUtils.getCookies(url));
        stringBuffer.append(" ; CFCLIENT_SSRN=loginexpire%3D%7Bts%20%272009%2D12%2D12%2012%3A35%3A00%27%7D%23blnlogedin%3D1401777%23;domain=hq.ssrn.com;path=/; ");
        stringBuffer.append("SSRN_LOGIN=092026079048019002070010027035037114047052089011063088001026083003082103106066127064089084103; ");
        stringBuffer.append("SSRN_PW=002008020074048016097064090009116110016084070087029069024; ");
        return stringBuffer.toString();
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    static {
        patterns.add(new Pair<>(Pattern.compile(".*ssrn.com"), AbstractUrlScraper.EMPTY_PATTERN));
    }
}
