package org.bibsonomy.scraper.url.kde.ingenta;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.UrlScraper;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.1.jar:org/bibsonomy/scraper/url/kde/ingenta/IngentaconnectScraper.class */
public class IngentaconnectScraper extends UrlScraper {
    private static final String INGENTA_HOST = "ingentaconnect.com";
    private static final String INGENTA_CITATION_URL = "http://www.ingentaconnect.com/";
    private static final String info = "Ingentaconnect Scraper: This scraper parses a publication page from " + href(INGENTA_CITATION_URL, "Ingentaconnect");
    private static final Pattern exportPattern = Pattern.compile("BibText Export\" href=\"(.*)\"");
    private static final List<Tuple<Pattern, Pattern>> patterns = Collections.singletonList(new Tuple(Pattern.compile(".*ingentaconnect.com"), UrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.UrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            String substring = INGENTA_CITATION_URL.substring(0, INGENTA_CITATION_URL.length() - 1);
            Matcher matcher = exportPattern.matcher(scrapingContext.getPageContent());
            if (!matcher.find()) {
                return false;
            }
            try {
                String[] split = getBibTexFromIngenta(new URL(String.valueOf(substring) + matcher.group(1)), getCookieFromIngenta()).split("\n");
                split[0] = split[0].replaceAll(" ", "");
                StringBuffer stringBuffer = new StringBuffer();
                StringBuffer stringBuffer2 = new StringBuffer("author = \"");
                boolean z = true;
                for (int i = 0; i < split.length - 1; i++) {
                    if (!split[i].contains("author")) {
                        split[i] = removeHTML(split[i]);
                        if (split[i].endsWith(",")) {
                            stringBuffer.append(split[i]);
                        } else {
                            stringBuffer.append(String.valueOf(split[i]) + ",");
                        }
                    } else if (z) {
                        stringBuffer2.append(split[i].substring(split[i].indexOf("\"") + 1, split[i].lastIndexOf("\"")));
                        z = false;
                    } else {
                        stringBuffer2.append(" and " + split[i].substring(split[i].indexOf("\"") + 1, split[i].lastIndexOf("\"")));
                    }
                }
                stringBuffer2.append("\"}");
                stringBuffer.append(stringBuffer2);
                scrapingContext.setBibtexResult(stringBuffer.toString());
                return true;
            } catch (IOException e) {
                throw new InternalFailureException(e);
            }
        } catch (MalformedURLException e2) {
            throw new InternalFailureException(e2);
        }
    }

    private String removeHTML(String str) {
        String replaceAll = str.replaceAll("<.?p>|<.?P>", "").replaceAll("<.?b>|<.?B>", "").replaceAll("<.?i>|<.?I>", "").replaceAll("<.?u>|<.?U>", "").replaceAll("<.?hr>|<.?HR>", "").replaceAll("<.?br>|<.?BR>", "").replaceAll("<.?sup>|<.?SUP>", "").replaceAll("<.?sub>|<.?SUB>", "").replaceAll("&#[0-9]*;", "").replaceAll("<[iI][mM][gG] .* [aA][lL][tT]=\"", "").replaceAll("\">", "").replaceAll("<[iI][mM][gG].*>", "");
        String replaceAll2 = replaceAll.replaceAll("<.*>", "");
        if (replaceAll2.length() < replaceAll.length()) {
            System.out.println("DEBUG: irgendwas weggeworfen!");
            System.out.println("ALT: " + replaceAll);
            System.out.println("NEU: " + replaceAll2);
        }
        return replaceAll2;
    }

    private String getBibTexFromIngenta(URL url, String str) throws IOException {
        HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
        httpURLConnection.setAllowUserInteraction(false);
        httpURLConnection.setDoInput(true);
        httpURLConnection.setDoOutput(false);
        httpURLConnection.setUseCaches(false);
        httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)");
        httpURLConnection.setRequestProperty("Cookie", str);
        httpURLConnection.connect();
        StringWriter stringWriter = new StringWriter();
        BufferedInputStream bufferedInputStream = new BufferedInputStream(httpURLConnection.getInputStream());
        while (true) {
            int read = bufferedInputStream.read();
            if (read < 0) {
                httpURLConnection.disconnect();
                return stringWriter.toString();
            }
            stringWriter.write(read);
        }
    }

    private String getCookieFromIngenta() throws IOException {
        HttpURLConnection httpURLConnection = (HttpURLConnection) new URL(INGENTA_CITATION_URL).openConnection();
        httpURLConnection.setAllowUserInteraction(false);
        httpURLConnection.setDoInput(true);
        httpURLConnection.setDoOutput(false);
        httpURLConnection.setUseCaches(false);
        httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)");
        httpURLConnection.connect();
        for (String str : httpURLConnection.getHeaderFields().get("Set-Cookie")) {
            if (str.contains("JSESSIONID")) {
                return str;
            }
        }
        httpURLConnection.disconnect();
        return null;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }
}
