package org.bibsonomy.scraper.url.kde.ats;

import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.BibTexUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.CitedbyScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.generic.GenericRISURLScraper;
import org.bibsonomy.util.WebUtils;
import org.bibsonomy.util.XmlUtils;

/* loaded from: input_file:org/bibsonomy/scraper/url/kde/ats/ATSScraper.class */
public class ATSScraper extends GenericRISURLScraper implements CitedbyScraper {
    private static final String BIBTEX_URL = "http://www.atsjournals.org/action/downloadCitation?doi=";
    private static final int ID_GROUP = 0;
    private static final Log log = LogFactory.getLog(ATSScraper.class);
    private static final String SITE_URL = "http://www.atsjournals.org/";
    private static final String SITE_NAME = "American Thoracic Society Journals";
    private static final String INFO = "This scraper parses a publication page from the " + href(SITE_URL, SITE_NAME);
    private static final List<Pair<Pattern, Pattern>> URL_PATTERNS = Collections.singletonList(new Pair(Pattern.compile(".*atsjournals.org"), AbstractUrlScraper.EMPTY_PATTERN));
    private static final Pattern ID_PATTERN = Pattern.compile("\\d+.*");
    private static final Pattern ABSTRACT_PATTERN = Pattern.compile("<div class=\"abstractSection abstractInFull\"><p.*?>(.*?)</p></div>");
    private static final Pattern CITEDBY = Pattern.compile("<div class=\"citedByEntry\">(.*)</div></div>");

    private static String extractId(String str) {
        Matcher matcher = ID_PATTERN.matcher(str);
        if (matcher.find()) {
            return matcher.group(ID_GROUP);
        }
        return null;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return URL_PATTERNS;
    }

    private static String abstractParser(URL url) {
        try {
            Matcher matcher = ABSTRACT_PATTERN.matcher(WebUtils.getContentAsString(url.toString(), WebUtils.getCookies(url)));
            if (matcher.find()) {
                return XmlUtils.getText(XmlUtils.getDOM(matcher.group(1)));
            }
            return null;
        } catch (Exception e) {
            log.error("error while getting abstract " + url, e);
            return null;
        }
    }

    @Override // org.bibsonomy.scraper.generic.AbstractGenericFormatURLScraper
    protected String postProcessScrapingResult(ScrapingContext scrapingContext, String str) {
        return BibTexUtils.addFieldIfNotContained(str, "abstract", abstractParser(scrapingContext.getUrl()));
    }

    @Override // org.bibsonomy.scraper.generic.AbstractGenericFormatURLScraper
    protected String getDownloadURL(URL url, String str) {
        return BIBTEX_URL + extractId(url.toString());
    }

    @Override // org.bibsonomy.scraper.CitedbyScraper
    public boolean scrapeCitedby(ScrapingContext scrapingContext) throws ScrapingException {
        try {
            Matcher matcher = CITEDBY.matcher(WebUtils.getContentAsString(scrapingContext.getUrl().toString()));
            if (!matcher.find()) {
                return false;
            }
            scrapingContext.setCitedBy(matcher.group(1));
            return true;
        } catch (Exception e) {
            log.error("error while getting cited by " + scrapingContext.getUrl().toString(), e);
            return false;
        }
    }
}
