/*
 * Decompiled with CFR 0.152.
 */
package com.streamscape.lib.utils.parser;

import com.streamscape.ds.session.Session;
import com.streamscape.lib.utils.parser.DefaultParser;
import com.streamscape.lib.utils.parser.FileParser;
import com.streamscape.lib.utils.parser.FileParserSettings;
import com.streamscape.lib.utils.parser.ImageParser;
import com.streamscape.lib.utils.parser.PDFBoxParser;
import com.streamscape.lib.utils.parser.POIParser;
import com.streamscape.lib.utils.parser.StDocument;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.io.FilenameUtils;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;

public class ParseProcessor {
    private static Set<String> PDFBOX_TYPES = new HashSet<String>();
    private static Set<String> POI_TYPES;
    private static Tika tika;

    public static StDocument parseToDoc(Session session, InputStream is, String fileName, boolean preserveStructure, boolean forceGrammar, Map<String, Object> config) throws Exception {
        TikaInputStream tikaStream = TikaInputStream.get((InputStream)is);
        String type = ParseProcessor.getTika().detect((InputStream)tikaStream);
        tikaStream.reset();
        FileParserSettings settings = new FileParserSettings();
        settings.setPreserveStructure(preserveStructure);
        settings.addOptions(config);
        settings.setForceGrammar(forceGrammar);
        FileParser parser = type.startsWith("image") ? new ImageParser(session, (InputStream)tikaStream, type, settings) : (PDFBOX_TYPES.contains(type) ? new PDFBoxParser(session, (InputStream)tikaStream, type, settings) : (POI_TYPES.contains(type) ? new POIParser(session, (InputStream)tikaStream, type, settings) : new DefaultParser(session, (InputStream)tikaStream, type, settings)));
        StDocument doc = ((FileParser)parser).parse();
        if (config != null && config.containsKey(FileParserSettings.FILE_NAME) && doc.getFileName() == null) {
            String fName = (String)config.get(FileParserSettings.FILE_NAME);
            doc.setFileName(fName);
            if (fName != null) {
                ArrayList<String> l = new ArrayList<String>();
                String bName = FilenameUtils.getBaseName(fName);
                StringTokenizer tokenizer = new StringTokenizer(bName, "._- ");
                while (tokenizer.hasMoreTokens()) {
                    l.add(tokenizer.nextToken());
                }
                LinkedList<String> fl = new LinkedList<String>();
                for (String token : l) {
                    if (token.matches("\\d+")) continue;
                    fl.add(token);
                }
                doc.setSemanticPath(String.join((CharSequence)" ", fl));
            }
            doc.setDocExtension(FilenameUtils.getExtension(fName));
        }
        if (config != null && config.containsKey(FileParserSettings.FILE_DATE) && doc.getCreatedOn() == null) {
            Date dt = (Date)config.get(FileParserSettings.FILE_DATE);
            doc.setCreatedOn(dt);
            doc.setModifiedOn(dt);
        }
        return doc;
    }

    public static String parseToString(InputStream is) throws IOException, TikaException {
        return new Tika().parseToString(is);
    }

    private static Tika getTika() {
        if (tika == null) {
            tika = new Tika();
        }
        return tika;
    }

    public static String getText(InputStream is) throws TikaException, IOException {
        return ParseProcessor.getTika().parseToString(is);
    }

    public static void main(String[] args) throws Exception {
        String fileName = "c:\\bin\\nodes\\ner\\2.ppt";
        TikaInputStream tikaStream = TikaInputStream.get((InputStream)new FileInputStream(new File(fileName)));
        new Tika().detect((InputStream)tikaStream);
    }

    static {
        PDFBOX_TYPES.add(FileParser.PDF_TYPE);
        POI_TYPES = new HashSet<String>();
        POI_TYPES.add(FileParser.DOCX_TYPE);
        POI_TYPES.add(FileParser.PPT_TYPE);
        POI_TYPES.add(FileParser.PPTX_TYPE);
    }
}

