/*
 * Decompiled with CFR 0.152.
 */
package org.opensearch.neuralsearch.processor;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.ingest.AbstractProcessor;
import org.opensearch.ingest.IngestDocument;
import org.opensearch.neuralsearch.processor.chunker.Chunker;
import org.opensearch.neuralsearch.processor.chunker.ChunkerFactory;
import org.opensearch.neuralsearch.processor.chunker.ChunkerParameterParser;
import org.opensearch.neuralsearch.processor.util.ChunkUtils;
import org.opensearch.neuralsearch.stats.events.EventStatName;
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
import org.opensearch.neuralsearch.util.ProcessorDocumentUtils;

public final class TextChunkingProcessor
extends AbstractProcessor {
    public static final String TYPE = "text_chunking";
    public static final String FIELD_MAP_FIELD = "field_map";
    public static final String ALGORITHM_FIELD = "algorithm";
    private static final String DEFAULT_ALGORITHM = "fixed_token_length";
    public static final String IGNORE_MISSING = "ignore_missing";
    public static final boolean DEFAULT_IGNORE_MISSING = false;
    private static final Map<String, Runnable> chunkingAlgorithmIncrementers = Map.of("delimiter", () -> EventStatsManager.increment(EventStatName.TEXT_CHUNKING_DELIMITER_EXECUTIONS), "fixed_token_length", () -> EventStatsManager.increment(EventStatName.TEXT_CHUNKING_FIXED_TOKEN_LENGTH_EXECUTIONS), "fixed_char_length", () -> EventStatsManager.increment(EventStatName.TEXT_CHUNKING_FIXED_CHAR_LENGTH_EXECUTIONS));
    private int maxChunkLimit;
    private Chunker chunker;
    private final Map<String, Object> fieldMap;
    private final boolean ignoreMissing;
    private final ClusterService clusterService;
    private final AnalysisRegistry analysisRegistry;
    private final Environment environment;

    public TextChunkingProcessor(String tag, String description, Map<String, Object> fieldMap, Map<String, Object> algorithmMap, boolean ignoreMissing, Environment environment, ClusterService clusterService, AnalysisRegistry analysisRegistry) {
        super(tag, description);
        this.fieldMap = fieldMap;
        this.ignoreMissing = ignoreMissing;
        this.environment = environment;
        this.clusterService = clusterService;
        this.analysisRegistry = analysisRegistry;
        this.parseAlgorithmMap(algorithmMap);
    }

    public String getType() {
        return TYPE;
    }

    private boolean shouldProcessChunk(Object chunkObject) {
        return !this.ignoreMissing || Objects.nonNull(chunkObject);
    }

    private void parseAlgorithmMap(Map<String, Object> algorithmMap) {
        Object algorithmValue;
        String algorithmKey;
        if (algorithmMap.size() > 1) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Unable to create %s processor as [%s] contains multiple algorithms", TYPE, ALGORITHM_FIELD));
        }
        if (algorithmMap.isEmpty()) {
            algorithmKey = DEFAULT_ALGORITHM;
            algorithmValue = new HashMap();
        } else {
            Map.Entry<String, Object> algorithmEntry = algorithmMap.entrySet().iterator().next();
            algorithmKey = algorithmEntry.getKey();
            algorithmValue = algorithmEntry.getValue();
            if (!(algorithmValue instanceof Map)) {
                throw new IllegalArgumentException(String.format(Locale.ROOT, "Unable to create %s processor as parameters for [%s] algorithm must be an object", TYPE, algorithmKey));
            }
        }
        if (!ChunkerFactory.CHUNKER_ALGORITHMS.contains(algorithmKey)) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Chunking algorithm [%s] is not supported. Supported chunking algorithms are %s", algorithmKey, ChunkerFactory.CHUNKER_ALGORITHMS));
        }
        Map chunkerParameters = (Map)algorithmValue;
        this.maxChunkLimit = ChunkerParameterParser.parseIntegerWithDefault(chunkerParameters, Chunker.MAX_CHUNK_LIMIT_FIELD, Chunker.DEFAULT_MAX_CHUNK_LIMIT);
        if (this.maxChunkLimit <= 0 && this.maxChunkLimit != Chunker.DISABLED_MAX_CHUNK_LIMIT) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Parameter [%s] must be positive or %s to disable this parameter", Chunker.MAX_CHUNK_LIMIT_FIELD, Chunker.DISABLED_MAX_CHUNK_LIMIT));
        }
        chunkerParameters.put("analysis_registry", this.analysisRegistry);
        this.chunker = ChunkerFactory.create(algorithmKey, chunkerParameters);
    }

    private boolean isListOfString(Object value) {
        if (!(value instanceof List)) {
            return false;
        }
        for (Object element : (List)value) {
            if (element instanceof String) continue;
            return false;
        }
        return true;
    }

    private int getMaxTokenCount(Map<String, Object> sourceAndMetadataMap) {
        int defaultMaxTokenCount = (Integer)IndexSettings.MAX_TOKEN_COUNT_SETTING.get(this.environment.settings());
        String indexName = sourceAndMetadataMap.get("_index").toString();
        IndexMetadata indexMetadata = this.clusterService.state().metadata().index(indexName);
        if (Objects.isNull(indexMetadata)) {
            return defaultMaxTokenCount;
        }
        return (Integer)IndexSettings.MAX_TOKEN_COUNT_SETTING.get(indexMetadata.getSettings());
    }

    public IngestDocument execute(IngestDocument ingestDocument) {
        Map sourceAndMetadataMap = ingestDocument.getSourceAndMetadata();
        String indexName = sourceAndMetadataMap.get("_index").toString();
        ProcessorDocumentUtils.validateMapTypeValue(FIELD_MAP_FIELD, sourceAndMetadataMap, this.fieldMap, indexName, this.clusterService, this.environment, true);
        HashMap<String, Object> runtimeParameters = new HashMap<String, Object>();
        int maxTokenCount = this.getMaxTokenCount(sourceAndMetadataMap);
        int chunkStringCount = this.getChunkStringCountFromMap(sourceAndMetadataMap, this.fieldMap);
        runtimeParameters.put("max_token_count", maxTokenCount);
        runtimeParameters.put(Chunker.MAX_CHUNK_LIMIT_FIELD, this.maxChunkLimit);
        runtimeParameters.put(Chunker.CHUNK_STRING_COUNT_FIELD, chunkStringCount);
        this.chunkMapType(sourceAndMetadataMap, this.fieldMap, runtimeParameters);
        this.recordChunkingExecutionStats(this.chunker.getAlgorithmName());
        return ingestDocument;
    }

    private int getChunkStringCountFromMap(Map<String, Object> sourceAndMetadataMap, Map<String, Object> fieldMap) {
        int chunkStringCount = 0;
        for (Map.Entry<String, Object> fieldMapEntry : fieldMap.entrySet()) {
            String originalKey = fieldMapEntry.getKey();
            Object targetKey = fieldMapEntry.getValue();
            if (targetKey instanceof Map) {
                Object sourceObject = sourceAndMetadataMap.get(originalKey);
                if (sourceObject instanceof List) {
                    List sourceObjectList = (List)sourceObject;
                    for (Object source : sourceObjectList) {
                        if (!(source instanceof Map)) continue;
                        chunkStringCount += this.getChunkStringCountFromMap((Map)source, (Map)targetKey);
                    }
                    continue;
                }
                if (!(sourceObject instanceof Map)) continue;
                chunkStringCount += this.getChunkStringCountFromMap((Map)sourceObject, (Map)targetKey);
                continue;
            }
            Object chunkObject = sourceAndMetadataMap.get(originalKey);
            chunkStringCount += this.getChunkStringCountFromLeafType(chunkObject);
        }
        return chunkStringCount;
    }

    private int getChunkStringCountFromLeafType(Object value) {
        if (value instanceof String) {
            return StringUtils.isEmpty((CharSequence)((String)value)) ? 0 : 1;
        }
        if (this.isListOfString(value)) {
            return (int)((List)value).stream().filter(s -> !StringUtils.isEmpty((CharSequence)s)).count();
        }
        return 0;
    }

    private void chunkMapType(Map<String, Object> sourceAndMetadataMap, Map<String, Object> fieldMap, Map<String, Object> runtimeParameters) {
        for (Map.Entry<String, Object> fieldMapEntry : fieldMap.entrySet()) {
            String originalKey = fieldMapEntry.getKey();
            Object targetKey = fieldMapEntry.getValue();
            if (targetKey instanceof Map) {
                Object sourceObject = sourceAndMetadataMap.get(originalKey);
                if (sourceObject instanceof List) {
                    List sourceObjectList = (List)sourceObject;
                    for (Object source : sourceObjectList) {
                        if (!(source instanceof Map)) continue;
                        this.chunkMapType((Map)source, (Map)targetKey, runtimeParameters);
                    }
                    continue;
                }
                if (!(sourceObject instanceof Map)) continue;
                this.chunkMapType((Map)sourceObject, (Map)targetKey, runtimeParameters);
                continue;
            }
            Object chunkObject = sourceAndMetadataMap.get(originalKey);
            if (!this.shouldProcessChunk(chunkObject)) continue;
            List<String> chunkedResult = this.chunkLeafType(chunkObject, runtimeParameters);
            sourceAndMetadataMap.put(String.valueOf(targetKey), chunkedResult);
        }
    }

    private List<String> chunkLeafType(Object value, Map<String, Object> runTimeParameters) {
        List<String> result = new ArrayList<String>();
        if (value == null) {
            return result;
        }
        if (value instanceof String) {
            if (StringUtils.isBlank((CharSequence)String.valueOf(value))) {
                return result;
            }
            result = ChunkUtils.chunkString(this.chunker, value.toString(), runTimeParameters);
        } else if (this.isListOfString(value)) {
            result = ChunkUtils.chunkList(this.chunker, (List)value, runTimeParameters);
        }
        return result;
    }

    private void recordChunkingExecutionStats(String algorithmName) {
        EventStatsManager.increment(EventStatName.TEXT_CHUNKING_PROCESSOR_EXECUTIONS);
        Optional.ofNullable(chunkingAlgorithmIncrementers.get(algorithmName)).ifPresent(Runnable::run);
    }
}

