package org.bibsonomy.scraper.url.kde.librarything;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringEscapeUtils;
import org.bibsonomy.common.Pair;
import org.bibsonomy.model.util.PersonNameUtils;
import org.bibsonomy.scraper.AbstractUrlScraper;
import org.bibsonomy.scraper.ScrapingContext;
import org.bibsonomy.scraper.exceptions.InternalFailureException;
import org.bibsonomy.scraper.exceptions.ScrapingException;
import org.bibsonomy.util.WebUtils;

/* loaded from: input_file:WEB-INF/lib/bibsonomy-scraper-2.0.36.jar:org/bibsonomy/scraper/url/kde/librarything/LibrarythingScraper.class */
public class LibrarythingScraper extends AbstractUrlScraper {
    private static final String URL_LIBRARYTHING_PAGE = "http://www.librarything.com";
    private static final String SITE_URL = "http://www.librarything.com/";
    private static final String URL_LIBRARYTHING_PAGE_HOST = "librarything.com";
    private static final String PATTERN_LINK = "<a\\b[^>]*>([^<]*)</a>";
    private static final String SITE_NAME = "librarything";
    private static final String INFO = "Extracts publication from " + href("http://www.librarything.com/work-info", SITE_NAME) + ". If a http://www.librarything.com/work page is selected, then the scraper trys to download the according work-info page.";
    private static String LIBRARYTHING_PATTERN_BIBLIOGRAPHIC_INFOS = "<td class=\"bookeditfield\" id=\"bookedit_publication\">([^<]*)</td>";
    private static String LIBRARYTHING_PATTERN_OTHER_AUTHORS = "<td class=\"bookeditfield\" id=\"bookedit_otherauthors\">([^<]*)</td>";
    private static String LIBRARYTHING_PATTERN_TITLE = "<td class=\"bookeditfield\" id=\"bookedit_title\"><b>([^<]*)</b></td>";
    private static String LIBRARYTHING_PATTERN_WORK_TITLE = "<span class=\"bookeditfield\" id=\"bookedit_title\"><b>([^<]*)</b></span>";
    private static String LIBRARYTHING_PATTERN_AUTHOR_LINK = "<td class=\"bookeditfield\" id=\"bookedit_authorunflip\">(.*)</td>";
    private static String LIBRARYTHING_PATTERN_AUTHOR = "<h2>by <a href=\"/author/[^>]*>([^<]*)</a></h2>";
    private static String LIBRARYTHING_PATTERN_DATE = "<td class=\"bookeditfield\" id=\"bookedit_date\">([^<]*)</td>";
    private static String LIBRARYTHING_PATTERN_ISBN = "<td class=\"bookeditfield\" id=\"bookedit_ISBN\">([^<]*)</td>";
    private static String LIBRARYTHING_PATTERN_WORK_AUTHOR_LINK = "<td class=\"left\">Author</td><td class=\"bookNonEditField\">(.*)</td>";
    private static String LIBRARYTHING_PATTERN_WORK_ISBN_10 = "<td class=\"left\">ISBN-10</td><td class=\"bookNonEditField\">([^<]*)</td>";
    private static String LIBRARYTHING_PATTERN_WORK_ISBN_13 = "<td class=\"left\">ISBN-13</td><td class=\"bookNonEditField\">([^<]*)</td>";
    final List<Pair<Pattern, Pattern>> patterns = Collections.singletonList(new Pair(Pattern.compile(".*librarything\\..*"), AbstractUrlScraper.EMPTY_PATTERN));
    private String author = null;
    private String title = null;
    private String year = null;
    private String misc = null;
    private String key = SITE_NAME;

    @Override // org.bibsonomy.scraper.AbstractUrlScraper
    protected boolean scrapeInternal(ScrapingContext scrapingContext) throws ScrapingException {
        URL url;
        scrapingContext.setScraper(this);
        if (scrapingContext.getUrl().getHost().contains(URL_LIBRARYTHING_PAGE_HOST)) {
            url = scrapingContext.getUrl();
        } else {
            String url2 = scrapingContext.getUrl().toString();
            int indexOf = url2.indexOf("librarything.");
            String substring = url2.substring(0, indexOf + 13);
            String substring2 = url2.substring(indexOf + 12);
            try {
                url = new URL(substring + "com" + substring2.substring(substring2.indexOf("/")));
            } catch (MalformedURLException e) {
                throw new InternalFailureException(e);
            }
        }
        try {
            String contentAsString = WebUtils.getContentAsString(url);
            Matcher matcher = Pattern.compile(LIBRARYTHING_PATTERN_AUTHOR).matcher(contentAsString);
            if (matcher.find()) {
                this.author = matcher.group(1);
            } else {
                Matcher matcher2 = Pattern.compile(LIBRARYTHING_PATTERN_WORK_AUTHOR_LINK).matcher(contentAsString);
                if (matcher2.find()) {
                    this.author = matcher2.group();
                    Matcher matcher3 = Pattern.compile(PATTERN_LINK).matcher(this.author);
                    if (matcher3.find()) {
                        this.author = matcher3.group(1);
                    }
                } else {
                    Matcher matcher4 = Pattern.compile(LIBRARYTHING_PATTERN_AUTHOR_LINK).matcher(contentAsString);
                    if (matcher4.find()) {
                        this.author = matcher4.group();
                        Matcher matcher5 = Pattern.compile(PATTERN_LINK).matcher(this.author);
                        if (matcher5.find()) {
                            this.author = matcher5.group(1);
                        }
                    }
                }
            }
            Matcher matcher6 = Pattern.compile(LIBRARYTHING_PATTERN_OTHER_AUTHORS).matcher(contentAsString);
            if (matcher6.find()) {
                String group = matcher6.group(1);
                if (this.author == null && !group.equals("")) {
                    this.author = group;
                } else if (!group.equals("")) {
                    this.author += PersonNameUtils.PERSON_NAME_DELIMITER + matcher6.group(1);
                }
            }
            Matcher matcher7 = Pattern.compile(LIBRARYTHING_PATTERN_TITLE).matcher(contentAsString);
            if (matcher7.find()) {
                this.title = matcher7.group(1);
            } else {
                Matcher matcher8 = Pattern.compile(LIBRARYTHING_PATTERN_WORK_TITLE).matcher(contentAsString);
                if (matcher8.find()) {
                    this.title = matcher8.group(1);
                }
            }
            Matcher matcher9 = Pattern.compile(LIBRARYTHING_PATTERN_DATE).matcher(contentAsString);
            if (matcher9.find()) {
                this.year = matcher9.group(1);
                this.key += this.year;
            }
            Matcher matcher10 = Pattern.compile(LIBRARYTHING_PATTERN_ISBN).matcher(contentAsString);
            if (matcher10.find()) {
                this.misc = "isbn={" + matcher10.group(1) + "}";
            } else {
                Matcher matcher11 = Pattern.compile(LIBRARYTHING_PATTERN_WORK_ISBN_10).matcher(contentAsString);
                if (matcher11.find()) {
                    if (this.misc == null) {
                        this.misc = "isbn={" + matcher11.group(1) + "}";
                    } else {
                        this.misc += ", isbn={" + matcher11.group(1) + "}";
                    }
                }
                Matcher matcher12 = Pattern.compile(LIBRARYTHING_PATTERN_WORK_ISBN_13).matcher(contentAsString);
                if (matcher12.find()) {
                    if (this.misc == null) {
                        this.misc = "isbn={" + matcher12.group(1) + "}";
                    } else {
                        this.misc += ", isbn={" + matcher12.group(1) + "}";
                    }
                }
            }
            StringBuffer stringBuffer = new StringBuffer();
            stringBuffer.append("@book{" + this.key + ",\n");
            if (this.author != null) {
                stringBuffer.append("\tauthor = {" + this.author + "},\n");
            }
            if (this.title != null) {
                stringBuffer.append("\ttitle = {" + this.title + "},\n");
            }
            if (this.year != null) {
                stringBuffer.append("\tyear = {" + this.year + "},\n");
            }
            if (this.misc != null) {
                stringBuffer.append("\t" + this.misc + ",\n");
            }
            if (url != null) {
                stringBuffer.append("\turl = {" + url + "},\n");
            }
            String unescapeHtml = StringEscapeUtils.unescapeHtml(stringBuffer.toString());
            scrapingContext.setBibtexResult(unescapeHtml.substring(0, unescapeHtml.length() - 2) + "\n}\n");
            return true;
        } catch (IOException e2) {
            throw new InternalFailureException(e2);
        }
    }

    @Override // org.bibsonomy.scraper.Scraper
    public String getInfo() {
        return INFO;
    }

    @Override // org.bibsonomy.scraper.AbstractUrlScraper, org.bibsonomy.scraper.UrlScraper
    public List<Pair<Pattern, Pattern>> getUrlPatterns() {
        return this.patterns;
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteName() {
        return "Librarything";
    }

    @Override // org.bibsonomy.scraper.UrlScraper
    public String getSupportedSiteURL() {
        return URL_LIBRARYTHING_PAGE;
    }
}
