package org.bibsonomy.scraper.url.kde.ieee;

import java.io.IOException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.XmlUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:org/bibsonomy/scraper/url/kde/ieee/IEEEXploreBookScraper.class */
public class IEEEXploreBookScraper extends AbstractUrlScraper {
    private static final String IEEE_HOST = "ieeexplore.ieee.org";
    private static final String IEEE_BOOK_PATH = "xpl";
    private static final String IEEE_SEARCH_PATH = "search";
    private static final String IEEE_BOOK = "@book";
    private static final String CONST_ISBN = "ISBN: ";
    private static final String CONST_PAGES = "Page(s): ";
    private static final String CONST_ON_PAGES = "On page(s): ";
    private static final String CONST_EDITION = "Edition: ";
    private static final String CONST_VOLUME = "Volume: ";
    private static final String CONST_DATE = "Publication Date: ";
    private static final String EXPORT_ARNUM_URL = "http://ieeexplore.ieee.org/xpl/downloadCitations";
    private static final Log log = LogFactory.getLog(IEEEXploreBookScraper.class);
    private static final String SITE_URL = "http://ieeexplore.ieee.org/books/bkbrowse.jsp";
    private static final String SITE_NAME = "IEEEXplore Books";
    private static final String info = "This scraper creates a BibTeX entry for the books at " + href(SITE_URL, SITE_NAME);
    private static final Pattern URL_PATTERN_BKN = Pattern.compile("bkn=([^&]*)");
    private static final Pattern URL_PATTERN_ARNUMBER = Pattern.compile("arnumber=([^&]*)");
    private static final List<Pair<Pattern, Pattern>> patterns = new LinkedList();

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    public boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        String str = null;
        String str2 = null;
        Matcher matcher = URL_PATTERN_BKN.matcher(scrapingContext.getUrl().toString());
        if (matcher.find()) {
            str2 = "citations-format=citation-abstract&fromPage=&download-format=download-bibtex&recordIds=" + matcher.group(1);
        }
        Matcher matcher2 = URL_PATTERN_ARNUMBER.matcher(scrapingContext.getUrl().toString());
        if (matcher2.find()) {
            str2 = "citations-format=citation-abstract&fromPage=&download-format=download-bibtex&recordIds=" + matcher2.group(1);
        }
        if (str2 != null) {
            try {
                str = WebUtils.getPostContentAsString(new URL(EXPORT_ARNUM_URL), str2);
            } catch (IOException e) {
                throw new InternalFailureException(e);
            }
        }
        if (str != null && str.length() > 0) {
            scrapingContext.setBibtexResult(BibTexUtils.addFieldIfNotContained(str.replace("<br>", ""), "url", scrapingContext.getUrl().toString()));
            return true;
        }
        log.debug("IEEEXploreBookScraper use JTidy to get Bibtex from " + scrapingContext.getUrl().toString());
        scrapingContext.setBibtexResult(ieeeBookScrape(scrapingContext));
        return true;
    }

    public String ieeeBookScrape(ScrapingContext scrapingContext) throws ScrapingException {
        try {
            String url = scrapingContext.getUrl().toString();
            String str = "";
            String str2 = "";
            String str3 = "";
            String str4 = "";
            String str5 = "";
            String str6 = "";
            String str7 = "";
            String str8 = "";
            String str9 = null;
            Document dom = XmlUtils.getDOM(scrapingContext.getPageContent());
            if ((str3 == null || str3.equals("")) && scrapingContext.getPageContent().contains("<title>") && scrapingContext.getPageContent().contains("</title>")) {
                str3 = scrapingContext.getPageContent().substring(scrapingContext.getPageContent().indexOf("<title>") + "<title>".length(), scrapingContext.getPageContent().indexOf("</title>")).replaceAll("IEEEXplore#\\s", "");
            }
            if (scrapingContext.getPageContent().contains("<span class=\"sectionHeaders\">Abstract</span>") && scrapingContext.getPageContent().contains("<td class=\"bodyCopyGrey\"><p class=\"bodyCopyGreySpaced\"><strong>")) {
                str8 = scrapingContext.getPageContent().substring(scrapingContext.getPageContent().indexOf("<span class=\"sectionHeaders\">Abstract</span>") + "<span class=\"sectionHeaders\">Abstract</span>".length(), scrapingContext.getPageContent().indexOf("<td class=\"bodyCopyGrey\"><p class=\"bodyCopyGreySpaced\"><strong>")).replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "").trim();
            }
            NodeList elementsByTagName = dom.getElementsByTagName("p");
            int i = 0;
            while (true) {
                if (i >= elementsByTagName.getLength()) {
                    break;
                }
                Node item = elementsByTagName.item(i);
                if (item.hasAttributes() && "bodyCopyBlackLargeSpaced".equals(((Element) item).getAttributeNode("class").getValue()) && item.hasChildNodes()) {
                    NodeList childNodes = item.getChildNodes();
                    for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
                        if (childNodes.item(i2).getNodeValue().indexOf(CONST_DATE) != -1) {
                            String substring = childNodes.item(i2).getNodeValue().substring(18);
                            str6 = substring.substring(substring.length() - 5).trim();
                            str5 = substring.substring(0, substring.length() - 4).trim();
                        }
                        if (childNodes.item(i2).getNodeValue().indexOf(CONST_PAGES) != -1) {
                            str2 = childNodes.item(i2).getNodeValue().substring(CONST_PAGES.length()).trim();
                        } else if (childNodes.item(i2).getNodeValue().indexOf(CONST_ON_PAGES) != -1) {
                            str2 = childNodes.item(i2).getNodeValue().substring(CONST_ON_PAGES.length()).trim();
                        }
                        if (childNodes.item(i2).getNodeValue().indexOf(CONST_EDITION) != -1) {
                            str7 = childNodes.item(i2).getNodeValue().substring(CONST_EDITION.length()).trim();
                        } else if (childNodes.item(i2).getNodeValue().indexOf(CONST_VOLUME) != -1) {
                            str7 = childNodes.item(i2).getNodeValue().substring(CONST_VOLUME.length()).trim();
                        }
                        if (str4 == "" && childNodes.item(i2).getNodeValue().indexOf(CONST_ISBN) != -1) {
                            str4 = childNodes.item(i2).getNodeValue().substring(CONST_ISBN.length()).trim();
                        }
                    }
                } else {
                    i++;
                }
            }
            if (str == null || str.equals("")) {
                int indexOf = scrapingContext.getPageContent().indexOf("<font color=990000><b>") + "<font color=990000><b>".length();
                if (scrapingContext.getPageContent().contains("<font color=990000><b>") && scrapingContext.getPageContent().indexOf("<br>", indexOf) != -1) {
                    str = scrapingContext.getPageContent().substring(indexOf, scrapingContext.getPageContent().indexOf("<br>", indexOf)).replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "").trim().replaceAll("&nbsp;&nbsp;", " and ");
                    if (str.endsWith(" and ")) {
                        str = str.substring(0, str.length() - 5);
                    }
                }
            }
            if ((str4 == null || !str4.equals("")) && (str6 == null || !str6.equals(""))) {
                str9 = str4.replaceAll("-", "").replaceAll("[^0-9A-Za-z]", "") + ":" + str6;
            }
            return IEEE_BOOK + " { " + str9 + ", \nauthor = {" + str + "}, \ntitle = {" + str3 + "}, \nyear = {" + str6 + "}, \nurl = {" + url + "}, \npages = {" + str2 + "}, \nedition = {" + str7 + "}, \npublisher = {}, \nisbn = {" + str4 + "}, \nabstract = {" + str8 + "}, \nmonth = {" + str5 + "}\n}";
        } catch (Exception e) {
            throw new InternalFailureException(e);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    static {
        patterns.add(new Pair<>(Pattern.compile(".*ieeexplore.ieee.org"), Pattern.compile("xpl.*")));
        patterns.add(new Pair<>(Pattern.compile(".*ieeexplore.ieee.org"), Pattern.compile("search.*")));
    }
}
