package org.bibsonomy.scraper.InformationExtraction;

import com.hp.hpl.jena.util.FileManager;
import java.beans.XMLEncoder;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.naming.NamingException;
import net.didion.jwnl.dictionary.database.DatabaseManagerImpl;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.model.util.PersonNameUtils;
import org.bibsonomy.scraper.Scraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.ie.BibExtraction;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.18.jar:org/bibsonomy/scraper/InformationExtraction/IEScraper.class */
public class IEScraper implements Scraper {
    private static final Pattern yearPattern = Pattern.compile("\\d{4}");

    @Override // org.bibsonomy.scraper.Scraper
    public boolean scrape(ScrapingContext scrapingContext) throws ScrapingException {
        String convertISO2UTF8 = convertISO2UTF8(scrapingContext.getSelectedText());
        if (convertISO2UTF8 == null || convertISO2UTF8.trim().equals("")) {
            return false;
        }
        try {
            HashMap<String, String> extraction = new BibExtraction().extraction(convertISO2UTF8);
            if (extraction == null) {
                return false;
            }
            StringBuffer bibtex2 = getBibtex(extraction);
            if (scrapingContext.getUrl() != null) {
                BibTexUtils.addField(bibtex2, DatabaseManagerImpl.URL, scrapingContext.getUrl().toString());
            }
            scrapingContext.setBibtexResult(bibtex2.toString());
            extraction.put("ie_selectedText", convertISO2UTF8);
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            XMLEncoder xMLEncoder = new XMLEncoder(byteArrayOutputStream);
            xMLEncoder.writeObject(extraction);
            xMLEncoder.close();
            scrapingContext.setMetaResult(byteArrayOutputStream.toString("UTF-8"));
            scrapingContext.setScraper(this);
            return true;
        } catch (IOException e) {
            throw new ScrapingException(e);
        } catch (ClassNotFoundException e2) {
            throw new ScrapingException(e2);
        } catch (NamingException e3) {
            throw new ScrapingException((Exception) e3);
        }
    }

    private StringBuffer getBibtex(HashMap<String, String> hashMap) {
        hashMap.put("year", getYearFromDate(hashMap.get(BibTexUtils.ADDITIONAL_MISC_FIELD_DATE)));
        StringBuffer stringBuffer = new StringBuffer("@misc{" + BibTexUtils.generateBibtexKey(hashMap.get("author"), hashMap.get("editor"), hashMap.get("year"), hashMap.get("title")) + ",\n");
        for (String str : hashMap.keySet()) {
            String str2 = hashMap.get(str);
            if (str2 != null) {
                String replace = str2.replace('{', '(').replace('}', ')');
                if ("author".equals(str) || "editor".equals(str)) {
                    replace = cleanPerson(replace);
                }
                stringBuffer.append(str + " = {" + replace + "},\n");
            }
        }
        int lastIndexOf = stringBuffer.lastIndexOf(",");
        stringBuffer.replace(lastIndexOf, lastIndexOf + 1, "\n}");
        return stringBuffer;
    }

    private String getYearFromDate(String str) {
        if (str == null) {
            return null;
        }
        Matcher matcher = yearPattern.matcher(str);
        if (matcher.find()) {
            return matcher.group();
        }
        return null;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return "IEScraper: Extraction of bibliographic references by information extraction. Author: Thomas Steuber";
    }

    @Override // org.bibsonomy.scraper.Scraper
    public Collection<Scraper> getScraper() {
        return Collections.singletonList(this);
    }

    private String cleanPerson(String str) {
        return str.contains(PersonNameUtils.PERSON_NAME_DELIMITER) ? str : str.contains(FileManager.PATH_DELIMITER) ? str.replace(FileManager.PATH_DELIMITER, PersonNameUtils.PERSON_NAME_DELIMITER) : str.contains(",") ? str.replace(",", PersonNameUtils.PERSON_NAME_DELIMITER) : str;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public boolean supportsScrapingContext(ScrapingContext scrapingContext) {
        return scrapingContext.getSelectedText() != null;
    }

    public static ScrapingContext getTestContext() {
        ScrapingContext scrapingContext = new ScrapingContext(null);
        scrapingContext.setSelectedText("Michael May and Bettina Berendt and Antoine Cornuejols and Joao Gama and Fosca Giannotti and Andreas Hotho and Donato Malerba and Ernestina Menesalvas and Katharina Morik and Rasmus Pedersen and Lorenza Saitta and Yucel Saygin and Assaf Schuster and Koen Vanhoof. Research Challenges in Ubiquitous Knowledge Discovery. Next Generation of Data Mining (Chapman & Hall/Crc Data Mining and Knowledge Discovery Series), Chapman & Hall/CRC,2008.");
        return scrapingContext;
    }

    public String getSupportedSiteName() {
        return null;
    }

    public String getSupportedSiteURL() {
        return null;
    }

    private String convertISO2UTF8(String str) {
        String str2 = null;
        if (str == null) {
            return null;
        }
        try {
            str2 = new String(str.getBytes("ISO-8859-1"), "UTF-8");
        } catch (UnsupportedEncodingException e) {
        }
        return str2;
    }
}
