package org.bibsonomy.scraper.url.kde.osa;

import java.io.BufferedInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bibsonomy.common.Pair;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ReferencesScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.UrlUtils;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.BeanFactory;
import org.springframework.web.servlet.support.WebContentGenerator;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.8.14.jar:org/bibsonomy/scraper/url/kde/osa/OSAScraper.class */
public class OSAScraper extends AbstractUrlScraper implements ReferencesScraper {
    private static final String OSA_HOST = "osapublishing.org";
    private static final String HTTP = "https://www.";
    private static final String OSA_BIBTEX_DOWNLOAD_PATH = "/custom_tags/IB_Download_Citations.cfm";
    private static final Log log = LogFactory.getLog(OSAScraper.class);
    private static final String SITE_URL = "https://www.osapublishing.org/";
    private static final String SITE_NAME = "Optical Society of America";
    private static final String info = "This Scraper parses a publication from the " + href(SITE_URL, SITE_NAME) + ".";
    private static final Pattern inputPattern = Pattern.compile("<input\\b[^>]*>");
    private static final Pattern valuePattern = Pattern.compile("value=\"[^\"]*\"");
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*osapublishing.org"), AbstractUrlScraper.EMPTY_PATTERN));
    static final Pattern references_pattern = Pattern.compile("(?s)<h3>References</h3>\\s+<div .*>\\s+<ol>(.*)</ol>");

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return info;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        String str = null;
        Matcher matcher = inputPattern.matcher(scrapingContext.getPageContent());
        while (true) {
            if (!matcher.find()) {
                break;
            }
            String group = matcher.group();
            if (group.contains("name=\"articles\"")) {
                Matcher matcher2 = valuePattern.matcher(group);
                if (matcher2.find()) {
                    String group2 = matcher2.group();
                    str = group2.substring(7, group2.length() - 1);
                    break;
                }
            }
        }
        try {
            try {
                String content = getContent(new URL("https://www.osapublishing.org/custom_tags/IB_Download_Citations.cfm"), WebUtils.getCookies(scrapingContext.getUrl()), str, "export_bibtex");
                if (content == null) {
                    throw new ScrapingFailureException("getting bibtex failed");
                }
                scrapingContext.setBibtexResult(content);
                return true;
            } catch (IOException e) {
                throw new InternalFailureException("An unexpected IO error has occurred. No Cookie has been generated.");
            }
        } catch (MalformedURLException e2) {
            throw new InternalFailureException(e2);
        } catch (IOException e3) {
            throw new InternalFailureException(e3);
        }
    }

    private static String getContent(URL url, String str, String str2, String str3) throws IOException {
        HttpURLConnection createConnnection = WebUtils.createConnnection(url);
        createConnnection.setAllowUserInteraction(false);
        createConnnection.setDoInput(true);
        createConnnection.setDoOutput(true);
        createConnnection.setUseCaches(false);
        createConnnection.setRequestMethod(WebContentGenerator.METHOD_POST);
        createConnnection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
        createConnnection.setRequestProperty("Set-Cookie", str);
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("articles=");
        stringBuffer.append(UrlUtils.safeURIEncode(str2) + BeanFactory.FACTORY_BEAN_PREFIX);
        stringBuffer.append("ArticleAction=");
        stringBuffer.append(UrlUtils.safeURIEncode(str3));
        createConnnection.setRequestProperty("Content-Length", String.valueOf(stringBuffer.length()));
        DataOutputStream dataOutputStream = new DataOutputStream(createConnnection.getOutputStream());
        dataOutputStream.writeBytes(stringBuffer.toString());
        dataOutputStream.flush();
        dataOutputStream.close();
        createConnnection.connect();
        StringWriter stringWriter = new StringWriter();
        BufferedInputStream bufferedInputStream = new BufferedInputStream(createConnnection.getInputStream());
        while (true) {
            int read = bufferedInputStream.read();
            if (read < 0) {
                createConnnection.disconnect();
                return stringWriter.toString();
            }
            stringWriter.write(read);
        }
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }

    @Override // org.bibsonomy.scraper.ReferencesScraper
    public boolean scrapeReferences(ScrapingContext scrapingContext) throws ScrapingException {
        try {
            Matcher matcher = references_pattern.matcher(WebUtils.getContentAsString(scrapingContext.getUrl()));
            if (!matcher.find()) {
                return false;
            }
            scrapingContext.setReferences(matcher.group(1));
            return true;
        } catch (Exception e) {
            log.error("error while scraping references for " + scrapingContext.getUrl(), e);
            return false;
        }
    }
}
