package org.bibsonomy.scraper.url.kde.annualreviews;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.Tuple;
import org.bibsonomy.scraper.UrlScraper;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.PageNotSupportedException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;

/* loaded from: input_file:org/bibsonomy/scraper/url/kde/annualreviews/AnnualreviewsScraper.class */
public class AnnualreviewsScraper extends UrlScraper {
    private static final String HOST = "arjournals.annualreviews.org";
    private static final String DOWNLOAD_PATH_AND_QUERY = "/action/downloadCitation?format=bibtex&include=cit&doi=";
    private static final String INFO = "Annual Rewiews Scraper: Supports journals from " + href("http://arjournals.annualreviews.org/", "Annual Reviews");
    private static final Pattern doiPattern = Pattern.compile("/doi/abs/(.*)");
    private static final Pattern doiPatternQuery = Pattern.compile("doi=([^&]*)");
    private static final List<Tuple<Pattern, Pattern>> patterns = Collections.singletonList(new Tuple(Pattern.compile(".*arjournals.annualreviews.org"), UrlScraper.EMPTY_PATTERN));

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        String download;
        scrapingContext.setScraper(this);
        String str = null;
        Matcher matcher = doiPattern.matcher(scrapingContext.getUrl().getPath());
        if (matcher.find()) {
            str = matcher.group(1);
        }
        if (str != null) {
            download = download(str, scrapingContext);
        } else {
            Matcher matcher2 = doiPatternQuery.matcher(scrapingContext.getUrl().getQuery());
            if (matcher2.find()) {
                str = matcher2.group(1);
            }
            if (str == null) {
                throw new PageNotSupportedException("This page arjournals.annualreviews.org is not supported.");
            }
            download = download(str, scrapingContext);
        }
        if (download == null) {
            throw new ScrapingFailureException("Bibtex download failed. Can't scrape any bibtex.");
        }
        scrapingContext.setBibtexResult(download);
        return true;
    }

    private String download(String str, ScrapingContext scrapingContext) throws ScrapingException {
        try {
            return getPageContent((HttpURLConnection) new URL("http://arjournals.annualreviews.org/action/downloadCitation?format=bibtex&include=cit&doi=" + str).openConnection(), getCookie());
        } catch (MalformedURLException e) {
            throw new InternalFailureException(e);
        } catch (IOException e2) {
            throw new InternalFailureException(e2);
        }
    }

    private String getCookie() throws IOException {
        HttpURLConnection httpURLConnection = (HttpURLConnection) new URL("http://arjournals.annualreviews.org").openConnection();
        httpURLConnection.setAllowUserInteraction(true);
        httpURLConnection.setDoInput(true);
        httpURLConnection.setDoOutput(false);
        httpURLConnection.setUseCaches(false);
        HttpURLConnection.setFollowRedirects(true);
        httpURLConnection.setInstanceFollowRedirects(false);
        httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)");
        httpURLConnection.connect();
        httpURLConnection.getHeaderFields();
        String headerField = httpURLConnection.getHeaderField("Set-Cookie");
        if (headerField != null && headerField.indexOf(";") >= 0) {
            headerField = headerField.substring(0, headerField.indexOf(";"));
        }
        httpURLConnection.disconnect();
        return headerField;
    }

    private String getPageContent(HttpURLConnection httpURLConnection, String str) throws IOException {
        httpURLConnection.setAllowUserInteraction(true);
        httpURLConnection.setDoInput(true);
        httpURLConnection.setDoOutput(false);
        httpURLConnection.setUseCaches(false);
        HttpURLConnection.setFollowRedirects(true);
        httpURLConnection.setInstanceFollowRedirects(false);
        httpURLConnection.setRequestProperty("Cookie", str);
        httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)");
        httpURLConnection.connect();
        StringWriter stringWriter = new StringWriter();
        BufferedInputStream bufferedInputStream = new BufferedInputStream(httpURLConnection.getInputStream());
        while (true) {
            int read = bufferedInputStream.read();
            if (read < 0) {
                httpURLConnection.disconnect();
                bufferedInputStream.close();
                stringWriter.flush();
                stringWriter.close();
                return stringWriter.toString();
            }
            stringWriter.write(read);
        }
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public List<Tuple<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }
}
