package edu.iu.nwb.preprocessing.text.normalization;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Dictionary;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.cishell.framework.algorithm.Algorithm;
import org.cishell.framework.algorithm.AlgorithmExecutionException;
import org.cishell.framework.data.BasicData;
import org.cishell.framework.data.Data;
import org.cishell.utilities.StringUtilities;
import org.osgi.service.log.LogService;
import prefuse.data.Table;
import prefuse.data.column.Column;

/* loaded from: input_file:edu/iu/nwb/preprocessing/text/normalization/StandardNormalizer.class */
public class StandardNormalizer implements Algorithm {
    private Data inputData;
    private Table inputTable;
    private String separator;
    private Set<String> columnsToNormalize;
    private LogService logger;
    private Analyzer analyzer;
    private boolean usingDefaultStopWords;

    public StandardNormalizer(Data data, Table table, LogService logService, String str, Set<String> set, String[] strArr, boolean z) {
        this.inputData = data;
        this.inputTable = table;
        this.logger = logService;
        this.separator = str;
        this.columnsToNormalize = set;
        this.usingDefaultStopWords = z;
        if (strArr != null) {
            this.analyzer = new SnowballAnalyzer("English", strArr);
        } else {
            logService.log(2, "No stop words.");
            this.analyzer = new SnowballAnalyzer("English");
        }
    }

    public Data[] execute() throws AlgorithmExecutionException {
        Table instantiate = this.inputTable.getSchema().instantiate();
        instantiate.addRows(this.inputTable.getRowCount());
        copyAndNormalize(this.inputTable, instantiate, this.columnsToNormalize, this.separator);
        if (this.usingDefaultStopWords) {
            this.logger.log(2, "The stop word file you specified could not be read. The built-in default was used.");
        }
        return prepareOutputData(instantiate, this.columnsToNormalize);
    }

    private void copyAndNormalize(Table table, Table table2, Set<String> set, String str) throws AlgorithmExecutionException {
        for (int i = 0; i < table.getColumnCount(); i++) {
            Column column = table.getColumn(i);
            Column column2 = table2.getColumn(i);
            if (!set.contains(table.getColumnName(i))) {
                copyColumn(column, column2);
            } else if (column.getColumnType().equals(String.class)) {
                normalizeColumn(column, column2, str);
            } else {
                this.logger.log(2, "Selected columns must be Strings");
            }
        }
    }

    private void normalizeColumn(Column column, Column column2, String str) throws AlgorithmExecutionException {
        for (int i = 0; i < column.getRowCount(); i++) {
            String string = column.getString(i);
            if (string != null) {
                column2.setString(normalize(string, str), i);
            }
        }
    }

    private void copyColumn(Column column, Column column2) {
        for (int i = 0; i < column.getRowCount(); i++) {
            column2.set(column.get(i), i);
        }
    }

    private String normalize(String str, String str2) throws AlgorithmExecutionException {
        TokenStream tokenStream = this.analyzer.tokenStream("unused", new StringReader(str));
        ArrayList arrayList = new ArrayList();
        while (true) {
            try {
                Token next = tokenStream.next();
                if (next == null) {
                    return StringUtilities.implodeItems(arrayList, str2);
                }
                arrayList.add(next.termText());
            } catch (IOException e) {
                throw new AlgorithmExecutionException("Unable to normalize text.", e);
            }
        }
    }

    private Data[] prepareOutputData(Table table, Set<String> set) {
        Data basicData = new BasicData(table, Table.class.getName());
        Dictionary metadata = basicData.getMetadata();
        metadata.put("Label", String.format("with normalized %s", StringUtilities.implodeItems(set, ", ")));
        metadata.put("Parent", this.inputData);
        metadata.put("Type", "Matrix");
        return new Data[]{basicData};
    }
}
