package org.bibsonomy.scraper.url.kde.muse;

import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.didion.jwnl.dictionary.file.DictionaryFile;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.PersonNameUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ReferencesScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.ValidationUtils;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;
import org.springframework.web.servlet.tags.form.AbstractHtmlElementTag;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.7.0.jar:org/bibsonomy/scraper/url/kde/muse/ProjectmuseScraper.class */
public class ProjectmuseScraper extends AbstractUrlScraper implements ReferencesScraper {
    private static final String HOST = "muse.jhu.edu";
    private static final String PREFIX_DOWNLOAD_URL = "http://muse.jhu.edu/metadata/sgml/journals/";
    private static final Log log = LogFactory.getLog(ProjectmuseScraper.class);
    private static final String SITE_URL = "http://muse.jhu.edu/";
    private static final String SITE_NAME = "Project MUSE";
    private static final String INFO = "Scraper for citations from " + href(SITE_URL, SITE_NAME) + ".";
    private static final Pattern PATTERN_JOURNAL_ID = Pattern.compile("/journals/(.*)");
    private static final Pattern PATTERN_URL = Pattern.compile("<url>(.*)</url>");
    private static final Pattern PATTERN_JOURNAL = Pattern.compile("<journal>(.*)</journal>");
    private static final Pattern PATTERN_ISSN = Pattern.compile("<issn>(.*)</issn>");
    private static final Pattern PATTERN_VOLUME = Pattern.compile("<volume>(.*)</volume>");
    private static final Pattern PATTERN_ISSUE = Pattern.compile("<issue>(.*)</issue>");
    private static final Pattern PATTERN_YEAR = Pattern.compile("<year>(.*)</year>");
    private static final Pattern PATTERN_FPAGES = Pattern.compile("<fpage>(.*)</fpage>");
    private static final Pattern PATTERN_LPAGES = Pattern.compile("<lpage>(.*)</lpage>");
    private static final Pattern PATTERN_TITLE = Pattern.compile("<doctitle>(.*)</doctitle>");
    private static final Pattern PATTERN_AUTHOR = Pattern.compile("<docauthor>(.*)</docauthor>");
    private static final Pattern PATTERN_SURNAME = Pattern.compile("<surname>(.*)</surname>");
    private static final Pattern PATTERN_FNAME = Pattern.compile("<fname>(.*)</fname>");
    private static final Pattern PATTERN_ABSTRACT = Pattern.compile("<abstract>\\s*<p>([^<]*)</p>\\s*</abstract>");
    private static final Pattern references_pattern = Pattern.compile("(?s)<h3 class=\"references\">(.*)</div>");
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*muse.jhu.edu"), AbstractUrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            String contentAsString = WebUtils.getContentAsString(new URL(PREFIX_DOWNLOAD_URL + getRegexResult(PATTERN_JOURNAL_ID, scrapingContext.getUrl().toString())));
            StringBuilder sb = new StringBuilder();
            StringBuilder sb2 = new StringBuilder();
            Matcher matcher = PATTERN_AUTHOR.matcher(contentAsString);
            while (matcher.find()) {
                String group = matcher.group(1);
                String regexResult = getRegexResult(PATTERN_SURNAME, group);
                String regexResult2 = getRegexResult(PATTERN_FNAME, group);
                if (sb2.length() > 0) {
                    sb2.append(PersonNameUtils.PERSON_NAME_DELIMITER);
                } else {
                    sb.append(regexResult.toLowerCase());
                }
                sb2.append(regexResult).append(", ").append(regexResult2);
            }
            String regexResult3 = getRegexResult(PATTERN_YEAR, contentAsString);
            if (ValidationUtils.present(regexResult3)) {
                sb.append(regexResult3);
            }
            StringBuilder sb3 = new StringBuilder("@inproceedings{");
            if (ValidationUtils.present((CharSequence) sb)) {
                sb3.append((CharSequence) sb).append(",\n");
            } else {
                sb3.append("noKey,\n");
            }
            appendValue(sb3, AbstractHtmlElementTag.TITLE_ATTRIBUTE, getRegexResult(PATTERN_TITLE, contentAsString));
            appendValue(sb3, "url", getRegexResult(PATTERN_URL, contentAsString));
            appendValue(sb3, "journal", getRegexResult(PATTERN_JOURNAL, contentAsString));
            appendValue(sb3, "issn", getRegexResult(PATTERN_ISSN, contentAsString));
            appendValue(sb3, "volume", getRegexResult(PATTERN_VOLUME, contentAsString));
            appendValue(sb3, "number", getRegexResult(PATTERN_ISSUE, contentAsString));
            appendValue(sb3, "author", sb2);
            appendValue(sb3, "year", regexResult3);
            appendValue(sb3, "pages", getPages(contentAsString));
            appendValue(sb3, BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE, getRegexResult(PATTERN_ABSTRACT, contentAsString));
            sb3.append("}\n");
            scrapingContext.setBibtexResult(sb3.toString());
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    private String getPages(String str) {
        String regexResult = getRegexResult(PATTERN_FPAGES, str);
        String regexResult2 = getRegexResult(PATTERN_LPAGES, str);
        String str2 = null;
        if (regexResult != null && regexResult2 == null) {
            str2 = regexResult;
        } else if (regexResult == null && regexResult2 != null) {
            str2 = regexResult2;
        } else if (regexResult != null && regexResult2 != null) {
            str2 = regexResult + "--" + regexResult2;
        }
        return str2;
    }

    private void appendValue(StringBuilder sb, String str, CharSequence charSequence) {
        if (ValidationUtils.present(charSequence)) {
            sb.append(DictionaryFile.COMMENT_HEADER).append(str).append(" = {").append(charSequence).append("},\n");
        }
    }

    private static String getRegexResult(Pattern pattern, String str) {
        Matcher matcher = pattern.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    @Override // org.bibsonomy.scraper.ReferencesScraper
    public boolean scrapeReferences(ScrapingContext scrapingContext) throws ScrapingException {
        try {
            Matcher matcher = references_pattern.matcher(WebUtils.getContentAsString(scrapingContext.getUrl()));
            if (!matcher.find()) {
                return false;
            }
            scrapingContext.setReferences(matcher.group(1));
            return true;
        } catch (Exception e) {
            log.error("error while scraping references " + scrapingContext.getUrl(), e);
            return false;
        }
    }
}
