package org.bibsonomy.search.index.utils.extractor;

import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.commons.io.FilenameUtils;

/* loaded from: input_file:org/bibsonomy/search/index/utils/extractor/PDFExtractor.class */
public class PDFExtractor implements ContentExtractor {
    @Override // org.bibsonomy.search.index.utils.extractor.ContentExtractor
    public boolean supports(String str) {
        return "pdf".equalsIgnoreCase(FilenameUtils.getExtension(str));
    }

    @Override // org.bibsonomy.search.index.utils.extractor.ContentExtractor
    public String extractContent(File file) throws IOException {
        FileInputStream fileInputStream = new FileInputStream(file);
        PdfReader pdfReader = new PdfReader(fileInputStream);
        PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);
        StringBuilder sb = new StringBuilder();
        for (int i = 1; i <= pdfReader.getNumberOfPages(); i++) {
            SimpleTextExtractionStrategy simpleTextExtractionStrategy = new SimpleTextExtractionStrategy();
            pdfReaderContentParser.processContent(i, simpleTextExtractionStrategy);
            sb.append(simpleTextExtractionStrategy.getResultantText());
            sb.append(" ");
        }
        fileInputStream.close();
        pdfReader.close();
        return sb.toString().trim();
    }
}
