package org.bibsonomy.scraper.id.kde.doi;

import java.io.IOException;
import java.net.URL;
import java.util.Collection;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.scraper.Scraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.converter.HTMLMetaDataDublinCoreToBibtexConverter;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.ValidationUtils;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.id.DOIUtils;

/* loaded from: input_file:org/bibsonomy/scraper/id/kde/doi/HTMLMetaDataDOIScraper.class */
public class HTMLMetaDataDOIScraper extends HTMLMetaDataDublinCoreToBibtexConverter implements Scraper {
    private static final String INFO = "The HTMLMetaDataDOIScraper gets a doi from the webpage, if no URL scraper matched the previously redirected page.";
    private static final Pattern DOIPATTERN_HIGHWIRE_PRESS_TAGS = Pattern.compile("<meta\\s+name=\"citation_doi\"\\s+content=\"(.*?)\"");

    @Override // org.bibsonomy.scraper.Scraper
    public boolean scrape(ScrapingContext scrapingContext) throws ScrapingException {
        String doiFromMetaData = getDoiFromMetaData(scrapingContext.getUrl());
        if (!ValidationUtils.present(doiFromMetaData)) {
            doiFromMetaData = DOIUtils.getDoiFromURL(scrapingContext.getUrl());
        }
        if (!ValidationUtils.present(doiFromMetaData)) {
            return false;
        }
        scrapingContext.setSelectedText(doiFromMetaData);
        return false;
    }

    protected String getDoiFromMetaData(URL url) throws ScrapingException {
        try {
            if (!ValidationUtils.present(url)) {
                return null;
            }
            String contentAsString = WebUtils.getContentAsString(url);
            Matcher matcher = DOIPATTERN_HIGHWIRE_PRESS_TAGS.matcher(contentAsString);
            if (matcher.find()) {
                return matcher.group(1);
            }
            String str = extractData(contentAsString).get("doi");
            if (ValidationUtils.present(str)) {
                return str;
            }
            return null;
        } catch (IOException e) {
            throw new ScrapingException(e);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public Collection<Scraper> getScraper() {
        return Collections.singletonList(this);
    }

    @Override // org.bibsonomy.scraper.Scraper
    public boolean supportsScrapingContext(ScrapingContext scrapingContext) {
        return scrapingContext.getDoiURL() == null && !DOIUtils.isSupportedSelection(scrapingContext.getSelectedText());
    }

    public String getSupportedSiteName() {
        return null;
    }

    public String getSupportedSiteURL() {
        return null;
    }
}
