package org.bibsonomy.scraper.url.kde.acm;

import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.XmlUtils;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:org/bibsonomy/scraper/url/kde/acm/ACMBasicScraper.class */
public class ACMBasicScraper extends AbstractUrlScraper {
    private Logger log = Logger.getLogger(ACMBasicScraper.class);
    private static final String ACM_HOST_NAME = "http://portal.acm.org/";
    private static final String BIBTEX_STRING_ON_ACM = "BibTeX";
    private static final String BROKEN_END = "},\n }";
    private static final String info = "ACM Scraper: This scraper parses a publication page from the " + href("http://portal.acm.org/portal.cfm", "ACM Digital Library");
    private static final List<Tuple<Pattern, Pattern>> patterns = Collections.singletonList(new Tuple(Pattern.compile(".*portal.acm.org"), AbstractUrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            StringBuffer stringBuffer = new StringBuffer("");
            Document dom = XmlUtils.getDOM(scrapingContext.getPageContent());
            Iterator<String> it = extractSinglePath(dom).iterator();
            while (it.hasNext()) {
                NodeList elementsByTagName = XmlUtils.getDOM(new URL(ACM_HOST_NAME + it.next())).getElementsByTagName("pre");
                for (int i = 0; i < elementsByTagName.getLength(); i++) {
                    Node item = elementsByTagName.item(i);
                    if (item.getChildNodes().getLength() > 0) {
                        stringBuffer.append(" " + item.getChildNodes().item(0).getNodeValue());
                    }
                }
            }
            int indexOf = stringBuffer.indexOf(BROKEN_END, (stringBuffer.length() - BROKEN_END.length()) - 1);
            if (indexOf > 0) {
                stringBuffer.replace(indexOf, stringBuffer.length(), "}\n}");
            }
            BibTexUtils.addFieldIfNotContained(stringBuffer, "url", scrapingContext.getUrl().toString());
            String extractAbstract = extractAbstract(dom);
            if (extractAbstract != null) {
                BibTexUtils.addFieldIfNotContained(stringBuffer, "abstract", extractAbstract);
            } else {
                this.log.info("ACMBasicScraper: Abstract not available");
            }
            String trim = stringBuffer.toString().trim();
            if ("".equals(trim)) {
                throw new ScrapingFailureException("getting bibtex failed");
            }
            scrapingContext.setBibtexResult(trim);
            return true;
        } catch (Exception e) {
            e.printStackTrace();
            throw new InternalFailureException(e);
        }
    }

    private String extractAbstract(Document document) {
        Attr attributeNode;
        Attr attributeNode2;
        String str = null;
        NodeList elementsByTagName = document.getElementsByTagName("a");
        int i = 0;
        while (true) {
            if (i >= elementsByTagName.getLength()) {
                break;
            }
            Node item = elementsByTagName.item(i);
            if (item.hasAttributes() && (attributeNode = ((Element) item).getAttributeNode("name")) != null && "abstract".equals(attributeNode.getValue()) && "ABSTRACT".equals(item.getChildNodes().item(0).getNodeValue())) {
                Node nextSibling = item.getParentNode().getNextSibling().getNextSibling();
                if (nextSibling.hasAttributes() && (attributeNode2 = ((Element) nextSibling).getAttributeNode("class")) != null && "abstract".equals(attributeNode2.getValue())) {
                    Node nextSibling2 = nextSibling.getNextSibling();
                    str = (nextSibling2 == null || !"p".equals(nextSibling2.getNodeName())) ? XmlUtils.getText(nextSibling) : XmlUtils.getText(nextSibling2);
                }
            }
            i++;
        }
        return str;
    }

    private String extractPathFromOnclickNode(String str) {
        int indexOf = str.indexOf("'") + 1;
        return str.substring(indexOf, str.indexOf("'", indexOf));
    }

    private List<String> extractSinglePath(Document document) {
        ArrayList arrayList = new ArrayList();
        NodeList elementsByTagName = document.getElementsByTagName("a");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Node item = elementsByTagName.item(i);
            if (item.getChildNodes().getLength() > 0 && BIBTEX_STRING_ON_ACM.equals(item.getChildNodes().item(0).getNodeValue())) {
                arrayList.add(extractPathFromOnclickNode(item.getAttributes().getNamedItem("onclick").getNodeValue()));
            }
        }
        return arrayList;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }
}
