package org.bibsonomy.scraper.url.kde.ieee;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.soap.Constants;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.UrlScraper;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.1.jar:org/bibsonomy/scraper/url/kde/ieee/IEEEXploreBookScraper.class */
public class IEEEXploreBookScraper extends UrlScraper {
    private static final String IEEE_HOST = "ieeexplore.ieee.org";
    private static final String IEEE_HOST_NAME = "http://ieeexplore.ieee.org/";
    private static final String IEEE_BOOK_PATH = "books";
    private static final String IEEE_BOOK = "@book";
    private static final String CONST_ISBN = "ISBN: ";
    private static final String CONST_PAGES = "Page(s): ";
    private static final String CONST_EDITION = "Edition: ";
    private static final String CONST_DATE = "Publication Date: ";
    private static final Logger log = Logger.getLogger(IEEEXploreBookScraper.class);
    private static final String info = "IEEEXplore Book Scraper: This scraper creates a BibTeX entry for the books at " + href("http://ieeexplore.ieee.org/books/bkbrowse.jsp", "IEEEXplore");
    private static final Pattern pattern = Pattern.compile("bkn=([^&]*)");
    private static final List<Tuple<Pattern, Pattern>> patterns = Collections.singletonList(new Tuple(Pattern.compile(".*ieeexplore.ieee.org"), Pattern.compile("/books.*")));

    @Override // org.bibsonomy.scraper.UrlScraper
    public boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        Matcher matcher = pattern.matcher(scrapingContext.getUrl().toString());
        if (!matcher.find()) {
            log.debug("IEEEXploreBookScraper use JTidy to get Bibtex from " + scrapingContext.getUrl().toString());
            scrapingContext.setBibtexResult(ieeeBookScrape(scrapingContext));
            return true;
        }
        try {
            String contentAsStringPostRequest = getContentAsStringPostRequest(new URL("http://ieeexplore.ieee.org/books/bkCiteAction?dlSelect=cite_abs&fileFormate=BibTex&arnumber=<arnumber>" + matcher.group(1) + "</arnumber>"));
            if (contentAsStringPostRequest != null) {
                scrapingContext.setBibtexResult(contentAsStringPostRequest.replace("<br>", ""));
                return true;
            }
            log.debug("IEEEXploreBookScraper: direct bibtex download failed. Use JTidy to get bibliographic data.");
            scrapingContext.setBibtexResult(ieeeBookScrape(scrapingContext));
            return true;
        } catch (MalformedURLException e) {
            throw new InternalFailureException(e);
        }
    }

    public String ieeeBookScrape(ScrapingContext scrapingContext) throws ScrapingException {
        try {
            String url = scrapingContext.getUrl().toString();
            String str = "";
            String str2 = "";
            String str3 = "";
            String str4 = "";
            String str5 = "";
            String str6 = "";
            String str7 = "";
            String str8 = "";
            String str9 = "";
            String str10 = null;
            Tidy tidy = new Tidy();
            tidy.setQuiet(true);
            tidy.setMakeClean(true);
            tidy.setDropFontTags(true);
            tidy.setShowWarnings(false);
            Document parseDOM = tidy.parseDOM(new ByteArrayInputStream(scrapingContext.getPageContent().getBytes()), null);
            NodeList elementsByTagName = parseDOM.getElementsByTagName("span");
            for (int i = 0; i < elementsByTagName.getLength(); i++) {
                Node item = elementsByTagName.item(i);
                if ("headNavBlueXLarge".equals(((Element) item).getAttributeNode("class").getValue())) {
                    str3 = item.getFirstChild().getNodeValue();
                }
            }
            if (scrapingContext.getPageContent().indexOf("<strong>Abstract</strong>") != -1 && scrapingContext.getPageContent().indexOf("<strong>Table of Contents </strong>") != -1) {
                str9 = scrapingContext.getPageContent().substring(scrapingContext.getPageContent().indexOf("<strong>Abstract</strong>") + "<strong>Abstract</strong>".length(), scrapingContext.getPageContent().indexOf("<strong>Table of Contents </strong>")).replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "").trim();
            }
            if (scrapingContext.getPageContent().indexOf("<td class=\"bodyCopyBlackLarge\" nowrap>Hardcover</td>") != -1) {
                String replaceAll = scrapingContext.getPageContent().substring(scrapingContext.getPageContent().indexOf("<td class=\"bodyCopyBlackLarge\" nowrap>Hardcover</td>"), scrapingContext.getPageContent().indexOf("<td class=\"bodyCopyBlackLarge\" nowrap><span class=\"sectionHeaders\">&raquo;</span>")).replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "");
                String substring = replaceAll.substring(replaceAll.indexOf(CONST_ISBN) + CONST_ISBN.length());
                str4 = substring.substring(0, substring.indexOf("&nbsp;"));
            }
            NodeList elementsByTagName2 = parseDOM.getElementsByTagName("p");
            int i2 = 0;
            while (true) {
                if (i2 >= elementsByTagName2.getLength()) {
                    break;
                }
                Node item2 = elementsByTagName2.item(i2);
                if (item2.hasAttributes() && "bodyCopyBlackLargeSpaced".equals(((Element) item2).getAttributeNode("class").getValue()) && item2.hasChildNodes()) {
                    NodeList childNodes = item2.getChildNodes();
                    for (int i3 = 0; i3 < childNodes.getLength(); i3++) {
                        if (childNodes.item(i3).getNodeValue().indexOf(CONST_DATE) != -1) {
                            String substring2 = childNodes.item(i3).getNodeValue().substring(18);
                            str7 = substring2.substring(substring2.length() - 5).trim();
                            str6 = substring2.substring(0, substring2.length() - 4).trim();
                            str5 = childNodes.item(i3 + 2).getNodeValue().trim();
                        }
                        if (childNodes.item(i3).getNodeValue().indexOf(CONST_PAGES) != -1) {
                            str2 = childNodes.item(i3).getNodeValue().substring(CONST_PAGES.length()).trim();
                        }
                        if (childNodes.item(i3).getNodeValue().indexOf(CONST_EDITION) != -1) {
                            str8 = childNodes.item(i3).getNodeValue().substring(CONST_EDITION.length()).trim();
                        }
                    }
                } else {
                    i2++;
                }
            }
            NodeList elementsByTagName3 = parseDOM.getElementsByTagName("a");
            int i4 = 0;
            for (int i5 = 39; i5 < elementsByTagName3.getLength(); i5++) {
                Node item3 = elementsByTagName3.item(i5);
                if (((Element) item3).getAttributeNode(Constants.ATTR_REFERENCE).getValue().indexOf("<in>au)") != -1) {
                    if (i4 > 0) {
                        str = String.valueOf(str) + " and " + item3.getFirstChild().getNodeValue();
                    }
                    if (i4 == 0) {
                        i4 = i5;
                        str = String.valueOf(str) + item3.getFirstChild().getNodeValue();
                        if (item3.getFirstChild().getNodeValue().indexOf(",") != -1 && str10 == null) {
                            str10 = item3.getFirstChild().getNodeValue().substring(0, item3.getFirstChild().getNodeValue().trim().indexOf(","));
                        } else if (item3.getFirstChild().getNodeValue().trim().indexOf(" ") != -1 && str10 == null) {
                            str10 = item3.getFirstChild().getNodeValue().trim().substring(0, item3.getFirstChild().getNodeValue().trim().indexOf(" "));
                        } else if (str10 == null) {
                            str10 = item3.getFirstChild().getNodeValue().trim();
                        }
                    }
                }
            }
            return String.valueOf(IEEE_BOOK) + " { " + (String.valueOf(str10.replaceAll("[^0-9A-Za-z]", "")) + ":" + str7) + ", author = {" + str + "}, title = {" + str3 + "}, year = {" + str7 + "}, url = {" + url + "}, pages = {" + str2 + "}, edition = {" + str8 + "}, publisher = {" + str5 + "}, isbn = {" + str4 + "}, abstract = {" + str9 + "}, month = {" + str6 + "}}";
        } catch (Exception e) {
            throw new InternalFailureException(e);
        }
    }

    public String getContentAsStringPostRequest(URL url) throws ScrapingException {
        try {
            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
            httpURLConnection.setAllowUserInteraction(false);
            httpURLConnection.setDoInput(true);
            httpURLConnection.setDoOutput(false);
            httpURLConnection.setUseCaches(false);
            httpURLConnection.setRequestMethod(Constants.HEADER_POST);
            httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)");
            httpURLConnection.connect();
            StringWriter stringWriter = new StringWriter();
            InputStreamReader inputStreamReader = new InputStreamReader(httpURLConnection.getInputStream(), "utf-8");
            while (true) {
                int read = inputStreamReader.read();
                if (read < 0) {
                    httpURLConnection.disconnect();
                    inputStreamReader.close();
                    stringWriter.flush();
                    stringWriter.close();
                    return stringWriter.toString();
                }
                stringWriter.write(read);
            }
        } catch (ConnectException e) {
            log.fatal("Could not get content for URL " + url.toString() + " : " + e.getMessage());
            throw new InternalFailureException(e);
        } catch (IOException e2) {
            log.fatal("Could not get content for URL " + url.toString() + " : " + e2.getMessage());
            throw new InternalFailureException(e2);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }
}
