package org.bibsonomy.scraper.generic;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.regex.Pattern;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.Scraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.converter.HTMLMetaDataHighwirePressToBibtexConverter;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.ValidationUtils;
import org.bibsonomy.util.WebUtils;

/* loaded from: input_file:org/bibsonomy/scraper/generic/HighwirePressScraper.class */
public class HighwirePressScraper implements Scraper {
    private static final String SITE_NAME = "HighwirePressScraper";
    private static final HTMLMetaDataHighwirePressToBibtexConverter HIGHWIRE_PRESS_CONVERTER = new HTMLMetaDataHighwirePressToBibtexConverter();
    private static final String SITE_URL = "https://scholar.google.com/intl/en/scholar/inclusion.html#indexing";
    private static final String INFO = "The HighwirePressScraper resolves bibtex out of HTML Metatags, which are defined in the Highwire Press tags Metaformat, example given " + AbstractUrlScraper.href(SITE_URL, "here") + "\n Because the values of HighwirePress-Metadata are not standardized, the scraper may not always be successful.";
    private static final Pattern HIGHWIRE_PRESS_PATTERN_TITLE = Pattern.compile("(?im)<\\s*meta(?=[^>]*name=\"citation_title\")[^>]*content=\"([^\"]*)\"[^>]*>");
    private static final Pattern HIGHWIRE_PRESS_PATTERN_AUTHOR = Pattern.compile("(?im)<\\s*meta(?=[^>]*name=\"citation_author\")[^>]*content=\"([^\"]*)\"[^>]*>");

    @Override // org.bibsonomy.scraper.Scraper
    public boolean scrape(ScrapingContext scrapingContext) throws ScrapingException {
        try {
            String contentAsString = WebUtils.getContentAsString(scrapingContext.getUrl().toString());
            if (!ValidationUtils.present(contentAsString)) {
                return false;
            }
            String bibtex = HIGHWIRE_PRESS_CONVERTER.toBibtex(contentAsString);
            if (!ValidationUtils.present(bibtex)) {
                return false;
            }
            scrapingContext.setScraper(this);
            scrapingContext.setBibtexResult(BibTexUtils.addFieldIfNotContained(bibtex, "url", scrapingContext.getUrl().toString()));
            return true;
        } catch (IOException e) {
            throw new ScrapingException(e);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public Collection<Scraper> getScraper() {
        return Collections.singleton(this);
    }

    @Override // org.bibsonomy.scraper.Scraper
    public boolean supportsScrapingContext(ScrapingContext scrapingContext) {
        try {
            String contentAsString = WebUtils.getContentAsString(scrapingContext.getUrl().toString());
            if (HIGHWIRE_PRESS_PATTERN_TITLE.matcher(contentAsString).find()) {
                if (HIGHWIRE_PRESS_PATTERN_AUTHOR.matcher(contentAsString).find()) {
                    return true;
                }
            }
            return false;
        } catch (Exception e) {
            return false;
        }
    }

    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
