package org.bibsonomy.scraper.url.kde.usenix;

import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.didion.jwnl.dictionary.database.DatabaseManagerImpl;
import org.apache.commons.lang.StringEscapeUtils;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.model.util.PersonNameUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.19.jar:org/bibsonomy/scraper/url/kde/usenix/UsenixScraper.class */
public class UsenixScraper extends AbstractUrlScraper {
    private static final String HOST = "usenix.org";
    private static final String PATH_1 = "/events/";
    private static final String PATH_2 = "/publications/library/proceedings/.*\\.html";
    private static final String PATTERN_YEAR_EVENTS = "/events/.*(\\d{2})/";
    private static final String PATTERN_YEAR_PROCEEDING = "/publications/library/proceedings/\\D*(\\d{2})/";
    private static final String PATTERN_KEY_EVENTS = "/events/([^/]*)/";
    private static final String PATTERN_KEY_PROCEEDING = "/publications/library/proceedings/([^/]*)/";
    private static final String CURRENT_PATTERN_GET_TITLE = "<h2>(.*)</h2>";
    private static final String CURRENT_PATTERN_GET_AUTHOR = "</h2>(.*)<h3>";
    private static final String CURRENT_WITH_BORDER_PATTERN_GET_AUTHOR = "</h2>(.*)<h4>";
    private static final String OLD_PATTERN_GET_AUTHOR = "<PRE>\\s*(.*)";
    private static final String CURRENT_PATTERN_GET_EVENT = "sans-serif\"><b>([^<]*)</b></font>";
    private static final String OLD_PATTERN_GET_EVENT = "<title>(.*)</title>";
    private static final String CURRENT_PATTERN_GET_PAGES = "<b>Pp.(.*)</b>";
    private static final String SITE_URL = "http://usenix.org/";
    private static final String SITE_NAME = "USENIX";
    private static final String INFO = "Scraper for papers from events which are postetd on " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Tuple<Pattern, Pattern>> patterns = new LinkedList();

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            String path = scrapingContext.getUrl().getPath();
            String str = null;
            String str2 = null;
            String str3 = null;
            String str4 = null;
            String str5 = null;
            String str6 = null;
            String pageContent = scrapingContext.getPageContent();
            if (path.startsWith(PATH_1)) {
                Matcher matcher = Pattern.compile(PATTERN_YEAR_EVENTS).matcher(path);
                if (matcher.find()) {
                    str5 = expandYear(matcher.group(1));
                }
                Matcher matcher2 = Pattern.compile(PATTERN_KEY_EVENTS).matcher(path);
                if (matcher2.find()) {
                    str6 = matcher2.group(1);
                }
            } else if (path.startsWith("/publications/library/proceedings/")) {
                Matcher matcher3 = Pattern.compile(PATTERN_YEAR_PROCEEDING).matcher(path);
                if (matcher3.find()) {
                    str5 = expandYear(matcher3.group(1));
                }
                Matcher matcher4 = Pattern.compile(PATTERN_KEY_PROCEEDING).matcher(path);
                if (matcher4.find()) {
                    str6 = matcher4.group(1);
                }
            }
            Matcher matcher5 = Pattern.compile(CURRENT_PATTERN_GET_TITLE, 34).matcher(pageContent);
            if (matcher5.find()) {
                str = cleanup(matcher5.group(1), false);
            }
            Matcher matcher6 = Pattern.compile(CURRENT_PATTERN_GET_AUTHOR, 34).matcher(pageContent);
            if (matcher6.find()) {
                str2 = cleanup(matcher6.group(1), true);
            } else {
                Matcher matcher7 = Pattern.compile(CURRENT_WITH_BORDER_PATTERN_GET_AUTHOR, 34).matcher(pageContent);
                if (matcher7.find()) {
                    str2 = cleanup(matcher7.group(1), true).replace("<HR>", "").replace("<hr>", "").replace("<P>", "").replace("<p>", "");
                    if (str2.contains("<PRE>")) {
                        Matcher matcher8 = Pattern.compile(OLD_PATTERN_GET_AUTHOR, 2).matcher(pageContent);
                        if (matcher8.find()) {
                            str2 = cleanup(matcher8.group(1), true).replaceAll("\\s{2,}", PersonNameUtils.PERSON_NAME_DELIMITER);
                        }
                    }
                }
            }
            if (str2 != null) {
                str2 = str2.replace("\n", PersonNameUtils.PERSON_NAME_DELIMITER).replace(",", PersonNameUtils.PERSON_NAME_DELIMITER);
                while (str2.contains("and  and")) {
                    str2 = str2.replaceAll("and\\s*and", "and");
                }
                if (str2.endsWith(PersonNameUtils.PERSON_NAME_DELIMITER)) {
                    str2 = str2.substring(0, str2.length() - 5);
                }
                if (str2.startsWith(PersonNameUtils.PERSON_NAME_DELIMITER)) {
                    str2 = str2.substring(5);
                }
            }
            Matcher matcher9 = Pattern.compile(CURRENT_PATTERN_GET_EVENT, 34).matcher(pageContent);
            if (matcher9.find()) {
                str3 = cleanup(matcher9.group(1), false).replace("\n", "");
            } else {
                Matcher matcher10 = Pattern.compile(OLD_PATTERN_GET_EVENT, 34).matcher(pageContent);
                if (matcher10.find()) {
                    str3 = cleanup(matcher10.group(1), false).replace("\n", "");
                }
            }
            Matcher matcher11 = Pattern.compile(CURRENT_PATTERN_GET_PAGES, 2).matcher(pageContent);
            if (matcher11.find()) {
                str4 = cleanup("Pp." + matcher11.group(1), false);
            }
            StringBuffer stringBuffer = new StringBuffer();
            if (str6 != null) {
                stringBuffer.append("@inproceedings{" + str6 + ",\n");
            } else {
                stringBuffer.append("@inproceedings{usenix,\n");
            }
            if (str2 != null) {
                stringBuffer.append("\tauthor = {" + str2 + "},\n");
            }
            if (str != null) {
                stringBuffer.append("\ttitle = {" + str + "},\n");
            }
            if (str5 != null) {
                stringBuffer.append("\tyear = {" + str5 + "},\n");
            }
            if (str3 != null) {
                stringBuffer.append("\tseries = {" + str3 + "},\n");
            }
            if (str4 != null) {
                stringBuffer.append("\tpages = {" + str4 + "},\n");
            }
            String stringBuffer2 = stringBuffer.toString();
            scrapingContext.setBibtexResult(BibTexUtils.addFieldIfNotContained(stringBuffer2.substring(0, stringBuffer2.length() - 2) + "\n}\n", DatabaseManagerImpl.URL, scrapingContext.getUrl().toString()));
            return true;
        } catch (UnsupportedEncodingException e) {
            throw new InternalFailureException(e);
        }
    }

    private String cleanup(String str, boolean z) throws UnsupportedEncodingException {
        int indexOf;
        String replaceAll = StringEscapeUtils.unescapeHtml(str.replace("&#150;", "-")).replaceAll("<!-- CHANGE -->", "");
        if (z) {
            do {
                indexOf = replaceAll.indexOf("<i>");
                if (indexOf == -1) {
                    indexOf = replaceAll.indexOf("<I>");
                }
                int indexOf2 = replaceAll.indexOf("</i>");
                if (indexOf2 == -1) {
                    indexOf2 = replaceAll.indexOf("</I>");
                }
                if (indexOf != -1) {
                    replaceAll = replaceAll.substring(0, indexOf) + (indexOf2 != -1 ? replaceAll.substring(indexOf2 + 4) : "");
                }
            } while (indexOf != -1);
        } else {
            replaceAll = replaceAll.replaceAll("<i>", "").replaceAll("<I>", "").replaceAll("</i>", "").replaceAll("</I>", "");
        }
        return replaceAll.replace("<BR>", "\n").replace("<br>", "\n").trim();
    }

    private String expandYear(String str) {
        return str.startsWith("9") ? "19" + str : "20" + str;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    static {
        Pattern compile = Pattern.compile(".*usenix.org");
        patterns.add(new Tuple<>(compile, Pattern.compile("/events/.*")));
        patterns.add(new Tuple<>(compile, Pattern.compile(PATH_2)));
    }
}
