package org.bibsonomy.scraper.url.kde.ingenta;

import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.json.util.JSONUtils;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.PersonNameUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.id.DOIUtils;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.8.8.jar:org/bibsonomy/scraper/url/kde/ingenta/IngentaconnectScraper.class */
public class IngentaconnectScraper extends AbstractUrlScraper {
    private static final String INGENTA_HOST = "ingentaconnect.com";
    private static final String SITE_URL = "http://www.ingentaconnect.com/";
    private static final String SITE_NAME = "Ingentaconnect";
    private static final String info = "This scraper parses a publication page from " + href(SITE_URL, SITE_NAME) + ".";
    private static final Pattern exportPattern = Pattern.compile("BibText Export\" href=\"([^\"]++)\"");
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*ingentaconnect.com"), AbstractUrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            String substring = SITE_URL.substring(0, SITE_URL.length() - 1);
            Matcher matcher = exportPattern.matcher(scrapingContext.getPageContent());
            if (!matcher.find()) {
                return false;
            }
            URL url = new URL(substring + matcher.group(1));
            try {
                String contentAsString = WebUtils.getContentAsString(url, WebUtils.getCookies(url));
                StringBuffer stringBuffer = new StringBuffer();
                if (contentAsString != null) {
                    String[] split = contentAsString.split("\n");
                    split[0] = split[0].replaceAll(MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR, "");
                    StringBuffer stringBuffer2 = new StringBuffer("author = \"");
                    boolean z = true;
                    for (int i = 0; i < split.length - 1; i++) {
                        if (!split[i].contains("author")) {
                            split[i] = removeHTML(split[i]);
                            if (split[i].endsWith(",")) {
                                stringBuffer.append(split[i] + "\n");
                            } else {
                                stringBuffer.append(split[i] + ",\n");
                            }
                        } else if (z) {
                            stringBuffer2.append(split[i].substring(split[i].indexOf(JSONUtils.DOUBLE_QUOTE) + 1, split[i].lastIndexOf(JSONUtils.DOUBLE_QUOTE)));
                            z = false;
                        } else {
                            stringBuffer2.append(PersonNameUtils.PERSON_NAME_DELIMITER + split[i].substring(split[i].indexOf(JSONUtils.DOUBLE_QUOTE) + 1, split[i].lastIndexOf(JSONUtils.DOUBLE_QUOTE)));
                        }
                    }
                    stringBuffer2.append("\"}");
                    stringBuffer.append(stringBuffer2);
                    contentAsString = DOIUtils.cleanDOI(stringBuffer.toString());
                }
                if (contentAsString == null) {
                    throw new ScrapingFailureException("getting bibtex failed");
                }
                scrapingContext.setBibtexResult(contentAsString);
                return true;
            } catch (IOException e) {
                throw new InternalFailureException(e);
            }
        } catch (MalformedURLException e2) {
            throw new InternalFailureException(e2);
        }
    }

    private String removeHTML(String str) {
        return str.replaceAll("<.?p>|<.?P>", "").replaceAll("<.?b>|<.?B>", "").replaceAll("<.?i>|<.?I>", "").replaceAll("<.?u>|<.?U>", "").replaceAll("<.?hr>|<.?HR>", "").replaceAll("<.?br>|<.?BR>", "").replaceAll("<.?sup>|<.?SUP>", "").replaceAll("<.?sub>|<.?SUB>", "").replaceAll("&#[0-9]*;", "").replaceAll("<[iI][mM][gG] .* [aA][lL][tT]=\"", "").replaceAll("\">", "").replaceAll("<[iI][mM][gG].*>", "").replaceAll("<.*>", "");
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
