package org.bibsonomy.scraper.url.kde.springer;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.methods.GetMethod;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.scraper.url.kde.worldcat.WorldCatScraper;
import org.bibsonomy.util.UrlUtils;
import org.bibsonomy.util.ValidationUtils;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.id.ISBNUtils;

/* loaded from: input_file:org/bibsonomy/scraper/url/kde/springer/SpringerLinkScraper.class */
public class SpringerLinkScraper extends AbstractUrlScraper {
    private static final String SPRINGER_CITATION_HOST_COM = "springerlink.com";
    private static final String SPRINGER_CITATION_HOST_DE = "springerlink.de";
    private static final String SPRINGER_CITATION_HOST_NEW = "link.springer.com";
    private static final String SPRINGER_LINK_METAPRESS = "springerlink.metapress.com";
    private static final Pattern CONTENT_PATTERN = Pattern.compile("content/(.+?)(/|$)");
    private static final Pattern ID_PATTERN = Pattern.compile("id=([^\\&]*)");
    private static final Pattern VIEW_STATE_PATTERN = Pattern.compile("id=\"__VIEWSTATE\" value=\"(.+?)\"");
    private static final Pattern EVENT_VALIDATION_PATTERN = Pattern.compile("id=\"__EVENTVALIDATION\" value=\"(.+?)\"");
    private static final Pattern SESSION_PATTERN = Pattern.compile("ASP\\.NET_SessionId=(\\w*+);");
    private static final Pattern YEAR_PATTERN_FOR_BIBTEX = Pattern.compile("(year[^\\{]*+\\{(.*?)\\})");
    private static final Pattern YEAR_PATTERN_FOR_PAGE = Pattern.compile("(?s)<div class=\"secondary\">.*?((20|19)\\d{2}+).*?</div>");
    private static final Pattern EXPORT_LINK_PATTERN = Pattern.compile("href=\"(/export-citation/[^\"]++)\"");
    private static final Pattern BIBTEX_LINK_PATTERN = Pattern.compile("class=\"bib\"[^>]*?href=\"([^\"]++)\"");
    private static final Pattern ABSTRACT_PATTERN_FOR_PAGE = Pattern.compile("(?ms)<div class=\"abstract-content formatted\" itemprop=\"description\">.*?<p class=\"a-plus-plus\">([^<]*+)");
    private static final String SITE_URL = "http://www.springerlink.com/";
    private static final String SITE_NAME = "SpringerLink";
    private static final String INFO = "This scraper parses a publication page from " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Pair<Pattern, Pattern>> patterns = new LinkedList();

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        String group;
        scrapingContext.setScraper(this);
        String url = scrapingContext.getUrl().toString();
        try {
            HttpClient httpClient = WebUtils.getHttpClient();
            GetMethod getMethod = new GetMethod(url);
            String contentAsString = WebUtils.getContentAsString(httpClient, getMethod);
            if (!ValidationUtils.present(contentAsString)) {
                throw new ScrapingException("server did not return response code 200 for URL " + getMethod.getURI());
            }
            Matcher matcher = ABSTRACT_PATTERN_FOR_PAGE.matcher(contentAsString);
            String str = null;
            if (matcher.find()) {
                str = matcher.group(1);
            }
            Matcher matcher2 = EXPORT_LINK_PATTERN.matcher(contentAsString);
            if (matcher2.find()) {
                URI uri = new URI(getMethod.getURI(), matcher2.group(1), true);
                String contentAsString2 = WebUtils.getContentAsString(httpClient, uri);
                if (!ValidationUtils.present(contentAsString2)) {
                    throw new ScrapingException("server did not return response code 200 for URL " + uri);
                }
                Matcher matcher3 = BIBTEX_LINK_PATTERN.matcher(contentAsString2);
                if (!matcher3.find()) {
                    throw new ScrapingException("could not find link to BibTeX file");
                }
                String contentAsString3 = WebUtils.getContentAsString(httpClient, new URI(uri, matcher3.group(1), true));
                if (!ValidationUtils.present(contentAsString3)) {
                    throw new ScrapingException("BibTeX file not present");
                }
                if (ValidationUtils.present(str)) {
                    contentAsString3 = BibTexUtils.addFieldIfNotContained(contentAsString3, "abstract", str);
                }
                scrapingContext.setBibtexResult(contentAsString3);
                return true;
            }
            String extractISBN = ISBNUtils.extractISBN(contentAsString);
            if (ValidationUtils.present(extractISBN)) {
                String bibtexByISBNAndReplaceURL = WorldCatScraper.getBibtexByISBNAndReplaceURL(extractISBN, scrapingContext.getUrl().toString());
                if (!ValidationUtils.present(bibtexByISBNAndReplaceURL)) {
                    return false;
                }
                scrapingContext.setBibtexResult(bibtexByISBNAndReplaceURL);
                return true;
            }
            try {
                Matcher matcher4 = CONTENT_PATTERN.matcher(url);
                Matcher matcher5 = ID_PATTERN.matcher(url);
                if (matcher4.find()) {
                    group = matcher4.group(1);
                } else {
                    if (!matcher5.find()) {
                        return false;
                    }
                    group = matcher5.group(1);
                }
                Matcher matcher6 = SESSION_PATTERN.matcher(WebUtils.getCookies(new URL(url)));
                if (!matcher6.find()) {
                    throw new ScrapingException("No Session Cookie!");
                }
                String str2 = "ASP.NET_SessionId=" + matcher6.group(1) + "; CookiesSupported=True; highlighterEnabled=true; MUD=MP";
                String str3 = "http://www.springerlink.com/content/" + group + "/export-citation/";
                String contentAsString4 = WebUtils.getContentAsString(str3, str2);
                Matcher matcher7 = VIEW_STATE_PATTERN.matcher(contentAsString4);
                Matcher matcher8 = EVENT_VALIDATION_PATTERN.matcher(contentAsString4);
                if (matcher7.find() && matcher8.find()) {
                    String postContentAsString = WebUtils.getPostContentAsString(str2, new URL(str3), "__VIEWSTATE=" + UrlUtils.safeURIEncode(matcher7.group(1)) + "&ctl00%24ctl14%24cultureList=de-de&ctl00%24ctl14%24SearchControl%24BasicSearchForTextBox=&ctl00%24ctl14%24SearchControl%24BasicAuthorOrEditorTextBox=&ctl00%24ctl14%24SearchControl%24BasicPublicationTextBox=&ctl00%24ctl14%24SearchControl%24BasicVolumeTextBox=&ctl00%24ctl14%24SearchControl%24BasicIssueTextBox=&ctl00%24ctl14%24SearchControl%24BasicPageTextBox=&ctl00%24ContentPrimary%24ctl00%24ctl00%24Export=AbstractRadioButton&ctl00%24ContentPrimary%24ctl00%24ctl00%24CitationManagerDropDownList=BibTex&ctl00%24ContentPrimary%24ctl00%24ctl00%24ExportCitationButton=Zitierung+exportieren+&__EVENTVALIDATION=" + UrlUtils.safeURIEncode(matcher8.group(1)));
                    if (ValidationUtils.present(postContentAsString)) {
                        scrapingContext.setBibtexResult(insertYearIfNotContained(cleanEntry(postContentAsString), scrapingContext));
                        return true;
                    }
                }
                throw new ScrapingFailureException("getting bibtex failed");
            } catch (MalformedURLException e) {
                throw new InternalFailureException(e);
            } catch (IOException e2) {
                throw new InternalFailureException(e2);
            }
        } catch (IOException e3) {
            throw new ScrapingException(e3);
        }
    }

    private static String insertYearIfNotContained(String str, ScrapingContext scrapingContext) throws ScrapingException {
        if (!bibtexContainsYear(str)) {
            Matcher matcher = YEAR_PATTERN_FOR_PAGE.matcher(scrapingContext.getPageContent());
            if (matcher.find()) {
                return insertYear(str, matcher.group(1));
            }
        }
        return str;
    }

    private static boolean bibtexContainsYear(String str) {
        Matcher matcher = YEAR_PATTERN_FOR_BIBTEX.matcher(str);
        return matcher.find() && matcher.group(2).trim().length() == 4;
    }

    private static String insertYear(String str, String str2) {
        Matcher matcher = YEAR_PATTERN_FOR_BIBTEX.matcher(str);
        return !matcher.find() ? BibTexUtils.addFieldIfNotContained(str, "year", str2) : str.replace(matcher.group(1), "year={" + str2 + "}");
    }

    private static String cleanEntry(String str) {
        return str.replace("note = {", "doi = {").replace("Springer Berlin", "Springer},\n   address = {Berlin").replaceFirst(" \\{", "{");
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    static {
        patterns.add(new Pair<>(Pattern.compile(".*springerlink.com"), AbstractUrlScraper.EMPTY_PATTERN));
        patterns.add(new Pair<>(Pattern.compile(".*springerlink.de"), AbstractUrlScraper.EMPTY_PATTERN));
        patterns.add(new Pair<>(Pattern.compile(".*link.springer.com"), AbstractUrlScraper.EMPTY_PATTERN));
        patterns.add(new Pair<>(Pattern.compile(".*springerlink.metapress.com"), AbstractUrlScraper.EMPTY_PATTERN));
    }
}
