package org.bibsonomy.scraper.url.kde.dlib;

import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringEscapeUtils;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.PersonNameUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.PageNotSupportedException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.43.jar:org/bibsonomy/scraper/url/kde/dlib/DLibScraper.class */
public class DLibScraper extends AbstractUrlScraper {
    private static final String DLIB_HOST = "dlib.org";
    private static final String HTML_PAGE = "html";
    private static final String META_DATA_PAGE = "meta.xml";
    private static final String PATTERN_TITLE = "<title>(.*)</title>";
    private static final String PATTERN_CREATOR = "<creator>(.*)</creator>";
    private static final String PATTERN_DATE = "<date date-type = \"publication\">(.*)</date>";
    private static final String PATTERN_YEAR = ".*([0-9]{4}).*";
    private static final String PATTERN_TYPE = "<type resource-type = \"work\">(.*)</type>";
    private static final String PATTERN_IDENTIFIER_DOI = "<identifier uri-type = \"DOI\">(.*)</identifier>";
    private static final String PATTERN_IDENTIFIER_URL = "<identifier uri-type = \"URL\">(.*)</identifier>";
    private static final String PATTERN_JOURNAL = "<serial-name>(.*)</serial-name>";
    private static final String PATTERN_ISSN = "<issn>(.*)</issn>";
    private static final String PATTERN_VOLUME = "<volume>(.*)</volume>";
    private static final String PATTERN_ISSUE = "<issue>(.*)</issue>";
    private static final String PATTERN_BIBTEX_KEY = "dlib/(.*)/(.*)/";
    private static final String SITE_URL = "http://www.dlib.org/";
    private static final String SITE_NAME = "D-Lib";
    private static final String INFO = "Scraper for metadata from " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*dlib.org$"), AbstractUrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        if (!scrapingContext.getUrl().getHost().endsWith(DLIB_HOST)) {
            return false;
        }
        try {
            scrapingContext.setScraper(this);
            String str = null;
            if (scrapingContext.getUrl().toString().endsWith(META_DATA_PAGE)) {
                str = scrapingContext.getPageContent();
            } else if (scrapingContext.getUrl().toString().endsWith("html")) {
                str = WebUtils.getContentAsString(new URL(scrapingContext.getUrl().toString().substring(0, scrapingContext.getUrl().toString().length() - 4) + META_DATA_PAGE));
            }
            if (str == null) {
                throw new PageNotSupportedException("This dlib page is not supported.");
            }
            String buildBibtex = buildBibtex(str, scrapingContext.getUrl().toString());
            if (buildBibtex == null) {
                throw new ScrapingFailureException("getting bibtex failed");
            }
            scrapingContext.setBibtexResult(StringEscapeUtils.unescapeHtml(buildBibtex));
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    private String buildBibtex(String str, String str2) {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("@article{");
        Iterator<String> it = extractElement(PATTERN_BIBTEX_KEY, str2).iterator();
        while (it.hasNext()) {
            stringBuffer.append(it.next());
        }
        List<String> extractElement = extractElement(PATTERN_TITLE, str);
        if (extractElement.size() > 0) {
            stringBuffer.append(",\ntitle = {");
            stringBuffer.append(extractElement.get(0));
            stringBuffer.append("}");
        }
        List<String> extractElement2 = extractElement(PATTERN_CREATOR, str);
        if (extractElement2.size() > 0) {
            stringBuffer.append(",\nauthor = {");
            Iterator<String> it2 = extractElement2.iterator();
            while (it2.hasNext()) {
                stringBuffer.append(it2.next());
                stringBuffer.append(PersonNameUtils.PERSON_NAME_DELIMITER);
            }
            stringBuffer = stringBuffer.delete(stringBuffer.length() - 5, stringBuffer.length());
            stringBuffer.append("}");
        }
        List<String> extractElement3 = extractElement(PATTERN_DATE, str);
        if (extractElement3.size() > 0) {
            String str3 = extractElement(PATTERN_YEAR, extractElement3.get(0)).get(0);
            stringBuffer.append(",\nyear = {");
            stringBuffer.append(str3);
            stringBuffer.append("}");
            stringBuffer.append(",\nmonth = {");
            stringBuffer.append(extractElement3.get(0).replace(str3, ""));
            stringBuffer.append("}");
        }
        List<String> extractElement4 = extractElement(PATTERN_IDENTIFIER_DOI, str);
        if (extractElement4.size() > 0) {
            stringBuffer.append(",\ndoi = {");
            stringBuffer.append(extractElement4.get(0));
            stringBuffer.append("}");
        }
        List<String> extractElement5 = extractElement(PATTERN_IDENTIFIER_URL, str);
        if (extractElement5.size() > 0) {
            stringBuffer.append(",\nurl = {");
            stringBuffer.append(extractElement5.get(0));
            stringBuffer.append("}");
        }
        List<String> extractElement6 = extractElement(PATTERN_JOURNAL, str);
        if (extractElement6.size() > 0) {
            stringBuffer.append(",\njournal = {");
            stringBuffer.append(extractElement6.get(0));
            stringBuffer.append("}");
        }
        List<String> extractElement7 = extractElement(PATTERN_ISSN, str);
        if (extractElement7.size() > 0) {
            stringBuffer.append(",\nissn = {");
            stringBuffer.append(extractElement7.get(0));
            stringBuffer.append("}");
        }
        List<String> extractElement8 = extractElement(PATTERN_VOLUME, str);
        if (extractElement8.size() > 0) {
            stringBuffer.append(",\nvolume = {");
            stringBuffer.append(extractElement8.get(0));
            stringBuffer.append("}");
        }
        List<String> extractElement9 = extractElement(PATTERN_ISSUE, str);
        if (extractElement9.size() > 0) {
            stringBuffer.append(",\nnumber = {");
            stringBuffer.append(extractElement9.get(0));
            stringBuffer.append("}");
        }
        stringBuffer.append("\n}");
        return stringBuffer.toString();
    }

    private List<String> extractElement(String str, String str2) {
        LinkedList linkedList = new LinkedList();
        Matcher matcher = Pattern.compile(str).matcher(str2);
        while (matcher.find()) {
            int groupCount = matcher.groupCount();
            for (int i = 1; i <= groupCount; i++) {
                linkedList.add(matcher.group(i));
            }
        }
        return linkedList;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
