package org.bibsonomy.scraper.url.kde.webofknowledge;

import com.hp.hpl.jena.sparql.sse.Tags;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.bibsonomy.common.Pair;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.scraper.exceptions.ScrapingFailureException;
import org.bibsonomy.util.WebUtils;
import org.springframework.beans.factory.BeanFactory;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-3.6.2.jar:org/bibsonomy/scraper/url/kde/webofknowledge/WebOfKnowledgeScraper.class */
public class WebOfKnowledgeScraper extends AbstractUrlScraper {
    private static final String BASE_URL_1 = "http://apps.webofknowledge.com/OutboundService.do?action=go";
    private static final String BASE_URL_2 = "http://ets.webofknowledge.com/ETS/saveDataToFile.do";
    private static final String SITE_URL = "http://apps.webofknowledge.com/";
    private static final String SITE_NAME = "Web of Knowledge";
    private static final String INFO = "Scrapes publications from " + href(SITE_URL, SITE_NAME) + ".";
    private static final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*apps.webofknowledge.com"), Pattern.compile("/full_record.do.*")));
    private static final Pattern sidPattern = Pattern.compile("SID=([^\\&]*)");
    private static final Pattern selectedIdsPattern = Pattern.compile("<(?=[\\w\\s=\\\"]*name=\\\"selectedIds\\\")(?=[\\w\\s=\\\"]*id=\\\"selectedIds\\\")[\\w\\s=\\\"]*value=\\\"(\\d+)\\\"[\\w\\s=\\\"/]*>");
    private static final Pattern qidPattern = Pattern.compile("qid=(\\d+)");
    private static final Pattern recordIDPattern = Pattern.compile("<(?=[\\w\\s=\\\":]*name=\\\"recordID\\\")[\\w\\s=\\\"]*value=\\\"([A-Z]+:\\d+)\\\"[\\w\\s=\\\"=/]*>");
    private static final Pattern downloadQidPattern = Pattern.compile("<(?=[\\w\\s=\\\"']*name=\\\"qid\\\")(?=[\\w\\s=\\\"']*id=\\\"qid\\\")[\\w\\s=\\\"']*value=[\\\"'](\\d+)[\\\"'][\\w\\s=\\\"'/]*>");

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return patterns;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        scrapingContext.setScraper(this);
        try {
            URL url = scrapingContext.getUrl();
            String cookies = WebUtils.getCookies(new URL("http://webofknowledge.com/?DestApp=UA"));
            Matcher matcher = sidPattern.matcher(url.getQuery());
            if (!matcher.find()) {
                throw new ScrapingFailureException("article ID not found in URL");
            }
            String group = matcher.group(1);
            Matcher matcher2 = qidPattern.matcher(url.getQuery());
            if (!matcher2.find()) {
                throw new ScrapingFailureException("record ID not found in URL");
            }
            String group2 = matcher2.group(1);
            String contentAsString = WebUtils.getContentAsString(url, cookies);
            Matcher matcher3 = selectedIdsPattern.matcher(contentAsString);
            if (!matcher3.find()) {
                throw new ScrapingFailureException("selected publications not found (selectedIds is missing)");
            }
            String group3 = matcher3.group(1);
            Matcher matcher4 = recordIDPattern.matcher(contentAsString);
            if (!matcher4.find()) {
                throw new ScrapingFailureException("record ID not found in URL");
            }
            Matcher matcher5 = downloadQidPattern.matcher(WebUtils.getPostContentAsString(cookies, new URL(BASE_URL_1), createPostParamString(matcher4.group(1), group, group2, group3)));
            if (!matcher5.find()) {
                throw new ScrapingFailureException("Bibtex not found");
            }
            String replaceEach = StringUtils.replaceEach(WebUtils.getPostContentAsString(cookies, new URL(BASE_URL_2), getDownloadPostString(matcher5.group(1), group)), new String[]{"{{", "}}"}, new String[]{Tags.LBRACE, "}"});
            if (replaceEach == null) {
                return false;
            }
            scrapingContext.setBibtexResult(replaceEach);
            return true;
        } catch (IOException e) {
            throw new InternalFailureException(e);
        }
    }

    private String createPostParamString(String str, String str2, String str3, String str4) {
        return "viewType=fullRecord&product=UA&mark_id=UA&colName=WOS&search_mode=GeneralSearch&locale=go&recordID=" + str.replace(":", "%3A") + BeanFactory.FACTORY_BEAN_PREFIX + "sortBy=PY.D%3BLD.D%3BSO.A%3BVL.D%3BPG.A%3BAU.A&mode=outputService&qid=" + str3 + BeanFactory.FACTORY_BEAN_PREFIX + "SID=" + str2 + BeanFactory.FACTORY_BEAN_PREFIX + "format=saveToFile&filters=USAGEIND+AUTHORSIDENTIFIERS+ACCESSION_NUM+FUNDING+SUBJECT_CATEGORY+JCR_CATEGORY+LANG+IDS+PAGEC+SABBR+CITREFC+ISSN+PUBINFO+KEYWORDS+CITTIMES+ADDRS+CONFERENCE_SPONSORS+DOCTYPE+ABSTRACT+CONFERENCE_INFO+SOURCE+TITLE+AUTHORS++&selectedIds=" + str4 + BeanFactory.FACTORY_BEAN_PREFIX + "mark_to=&mark_from=&count_new_items_marked=0&value%28record_select_type%29=selrecords&marked_list_candidates=10&LinksAreAllowedRightClick=CitedRefList.do&LinksAreAllowedRightClick=CitingArticles.do&LinksAreAllowedRightClick=OneClickSearch.do&LinksAreAllowedRightClick=full_record.do&bib_fields_option=ABSTRACT++&fields_selection=USAGEIND+AUTHORSIDENTIFIERS+ACCESSION_NUM+FUNDING+SUBJECT_CATEGORY+JCR_CATEGORY+LANG+IDS+PAGEC+SABBR+CITREFC+ISSN+PUBINFO+KEYWORDS+CITTIMES+ADDRS+CONFERENCE_SPONSORS+DOCTYPE+ABSTRACT+CONFERENCE_INFO+SOURCE+TITLE+AUTHORS++&save_options=bibtex";
    }

    private String getDownloadPostString(String str, String str2) {
        return "locale=go&fileOpt=bibtex&colName=WOS&startYear=&endYear=&action=saveDataToFile&qid=" + str + BeanFactory.FACTORY_BEAN_PREFIX + "parentQid=1&sortBy=PY.D;LD.D;SO.A;VL.D;PG.A;AU.A&filters=USAGEIND AUTHORSIDENTIFIERS ACCESSION_NUM FUNDING SUBJECT_CATEGORY JCR_CATEGORY LANG IDS PAGEC SABBR CITREFC ISSN PUBINFO KEYWORDS CITTIMES ADDRS CONFERENCE_SPONSORS DOCTYPE ABSTRACT CONFERENCE_INFO SOURCE TITLE AUTHORS&numRecsToRetrieve=500&SID=" + str2 + BeanFactory.FACTORY_BEAN_PREFIX + "product=UA&numRecords=1&subType=&recNum=1&mark_to=1";
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return SITE_NAME;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return SITE_URL;
    }
}
