package edu.iu.nwb.preprocessing.duplicatenodedetector;

import edu.iu.nwb.preprocessing.duplicatenodedetector.util.GraphSearchAlgorithms;
import edu.iu.nwb.preprocessing.duplicatenodedetector.util.ListMap;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import org.cishell.framework.algorithm.Algorithm;
import org.cishell.framework.algorithm.AlgorithmExecutionException;
import org.cishell.framework.data.BasicData;
import org.cishell.framework.data.Data;
import org.cishell.utilities.TableUtilities;
import prefuse.data.Graph;
import prefuse.data.Node;
import prefuse.data.Schema;
import prefuse.data.Table;
import prefuse.util.collections.IntIterator;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.similaritymetrics.Jaro;

/* loaded from: input_file:edu/iu/nwb/preprocessing/duplicatenodedetector/DuplicateNodeDetectorAlgorithm.class */
public class DuplicateNodeDetectorAlgorithm implements Algorithm {
    private static final String TEXT_TYPE = "file:text/plain";
    public static final String SIMILARITY_COLUMN_NAME = "similarity";
    public static final String UNIQUE_INDEX_COLUMN_NAME = "uniqueIndex";
    public static final String COMBINE_VALUES_COLUMN_NAME = "combineValues";
    public static final String NOT_THE_PRIMARY_NODE = "";
    private final AbstractStringMetric similarityChecker = new Jaro();
    private Data inputData;
    private String compareAttributeName;
    private float mergeOnSimilarity;
    private float makeNoteOnSimilarity;
    private int numPrefixLetters;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/iu/nwb/preprocessing/duplicatenodedetector/DuplicateNodeDetectorAlgorithm$SimilarityReport.class */
    public static class SimilarityReport implements Comparable {
        protected String leftName;
        protected String rightName;
        protected double similarity;

        public SimilarityReport(String str, String str2, double d) {
            this.leftName = str;
            this.rightName = str2;
            this.similarity = d;
        }

        @Override // java.lang.Comparable
        public int compareTo(Object obj) {
            if (!(obj instanceof SimilarityReport)) {
                throw new ClassCastException("A SimilarityReport can only be compared to other SimilarityReports.");
            }
            SimilarityReport similarityReport = (SimilarityReport) obj;
            if (this.similarity < similarityReport.similarity) {
                return -1;
            }
            return this.similarity > similarityReport.similarity ? 1 : 0;
        }
    }

    public DuplicateNodeDetectorAlgorithm(Data[] dataArr, Dictionary dictionary) {
        this.inputData = dataArr[0];
        this.compareAttributeName = (String) dictionary.get("compareAttribute");
        this.mergeOnSimilarity = ((Float) dictionary.get("mergeOnSimilarity")).floatValue();
        this.makeNoteOnSimilarity = ((Float) dictionary.get("makeNoteOnSimilarity")).floatValue();
        this.numPrefixLetters = ((Integer) dictionary.get("numPrefixLetters")).intValue();
    }

    public Data[] execute() throws AlgorithmExecutionException {
        Table constructAlteredNodeTable = constructAlteredNodeTable((Graph) this.inputData.getData());
        StringBuffer stringBuffer = new StringBuffer();
        Graph makeMergeGraph = makeMergeGraph(constructAlteredNodeTable, stringBuffer);
        StringBuffer stringBuffer2 = new StringBuffer();
        return formatAsData(createTableWithMergeInfo(constructAlteredNodeTable, makeMergeGraph, stringBuffer2), stringBuffer, stringBuffer2);
    }

    private Table constructAlteredNodeTable(Graph graph) {
        return populateTable(createTableSchema(graph.getNodeTable().getSchema(), new Table()), graph);
    }

    private Graph makeMergeGraph(Table table, StringBuffer stringBuffer) {
        Graph makeEmptyMergeGraph = makeEmptyMergeGraph(table);
        ListMap sortNodesByAttributePrefix = sortNodesByAttributePrefix(table, this.compareAttributeName, this.numPrefixLetters);
        ArrayList<SimilarityReport> arrayList = new ArrayList();
        for (List list : sortNodesByAttributePrefix.values()) {
            for (int i = 0; i < list.size(); i++) {
                Integer num = (Integer) list.get(i);
                for (int i2 = i; i2 < list.size(); i2++) {
                    Integer num2 = (Integer) list.get(i2);
                    float compareNodesCaseInsensitiveBy = compareNodesCaseInsensitiveBy(this.compareAttributeName, num, num2, table);
                    if (compareNodesCaseInsensitiveBy >= this.mergeOnSimilarity) {
                        makeEmptyMergeGraph.addEdge(num.intValue(), num2.intValue());
                    } else if (compareNodesCaseInsensitiveBy >= this.makeNoteOnSimilarity) {
                        arrayList.add(new SimilarityReport(table.getString(num.intValue(), this.compareAttributeName), table.getString(num2.intValue(), this.compareAttributeName), compareNodesCaseInsensitiveBy));
                    }
                }
            }
        }
        if (!arrayList.isEmpty()) {
            Collections.sort(arrayList);
            Collections.reverse(arrayList);
            for (SimilarityReport similarityReport : arrayList) {
                stringBuffer.append(similarityReport.similarity + " similar:\r\n");
                stringBuffer.append("  \"" + similarityReport.leftName + "\"\r\n");
                stringBuffer.append("  \"" + similarityReport.rightName + "\"\r\n");
            }
        }
        return makeEmptyMergeGraph;
    }

    private Table createTableWithMergeInfo(Table table, Graph graph, StringBuffer stringBuffer) {
        Table copyTable = TableUtilities.copyTable(table);
        List<Collection> extractWeakComponentClusters = extractWeakComponentClusters(graph);
        stringBuffer.append("Merge report\n");
        stringBuffer.append("Similarly named entities will be merged into the one with the longest name.\n\n");
        int i = 1;
        for (Collection collection : extractWeakComponentClusters) {
            if (collection.size() > 1) {
                stringBuffer.append(setMergeInfoForCluster(copyTable, collection, i));
                i++;
            }
        }
        stringBuffer.append("End of merge report.\n");
        return copyTable;
    }

    private StringBuffer setMergeInfoForCluster(Table table, Collection collection, int i) {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("======== Merge " + i + " ========\n");
        Integer selectNodeWithLongestAttributeValue = selectNodeWithLongestAttributeValue(table, collection, this.compareAttributeName);
        String string = table.getString(selectNodeWithLongestAttributeValue.intValue(), this.compareAttributeName);
        int intValue = selectNodeWithLongestAttributeValue.intValue() + 1;
        stringBuffer.append(String.valueOf(string) + " will have the following merged in:\n");
        Iterator it = collection.iterator();
        while (it.hasNext()) {
            Integer num = (Integer) it.next();
            if (!selectNodeWithLongestAttributeValue.equals(num)) {
                stringBuffer.append(String.valueOf(setMergeInfoForNode(table, num, intValue)) + "\n");
            }
        }
        stringBuffer.append("\n");
        return stringBuffer;
    }

    private String setMergeInfoForNode(Table table, Integer num, int i) {
        table.setInt(num.intValue(), UNIQUE_INDEX_COLUMN_NAME, i);
        table.setString(num.intValue(), COMBINE_VALUES_COLUMN_NAME, NOT_THE_PRIMARY_NODE);
        return table.getString(num.intValue(), this.compareAttributeName);
    }

    private Integer selectNodeWithLongestAttributeValue(Table table, Collection collection, String str) {
        if (collection.isEmpty()) {
            throw new IllegalArgumentException("Must give at least one node.");
        }
        int i = Integer.MIN_VALUE;
        Integer num = null;
        Iterator it = collection.iterator();
        while (it.hasNext()) {
            Integer num2 = (Integer) it.next();
            if (table.canGetString(str)) {
                String string = table.getString(num2.intValue(), str);
                if (string.length() > i) {
                    i = string.length();
                    num = num2;
                }
            }
        }
        return num;
    }

    private Data[] formatAsData(Table table, StringBuffer stringBuffer, StringBuffer stringBuffer2) throws AlgorithmExecutionException {
        Data basicData = new BasicData(table, Table.class.getName());
        Dictionary metadata = basicData.getMetadata();
        metadata.put("Modified", new Boolean(true));
        metadata.put("Parent", this.inputData);
        metadata.put("Type", "Matrix");
        metadata.put("Label", "Merge Table: based on " + this.compareAttributeName);
        Data basicData2 = new BasicData(stringToFile(stringBuffer.toString(), "nodeLog"), TEXT_TYPE);
        Dictionary metadata2 = basicData2.getMetadata();
        metadata2.put("Parent", this.inputData);
        metadata2.put("Type", "Text");
        metadata2.put("Label", "Text Log: Noteworthy nodes that will NOT be merged");
        Data basicData3 = new BasicData(stringToFile(stringBuffer2.toString(), "mergeLog"), TEXT_TYPE);
        Dictionary metadata3 = basicData3.getMetadata();
        metadata3.put("Parent", this.inputData);
        metadata3.put("Type", "Text");
        metadata3.put("Label", "Text Log: Nodes that will be merged");
        return new Data[]{basicData, basicData3, basicData2};
    }

    private float compareNodesCaseInsensitiveBy(String str, Integer num, Integer num2, Table table) {
        return this.similarityChecker.getSimilarity(table.getString(num.intValue(), str).toLowerCase(), table.getString(num2.intValue(), str).toLowerCase());
    }

    private Graph makeEmptyMergeGraph(Table table) {
        Table table2 = new Table();
        table2.addColumn(Graph.DEFAULT_SOURCE_KEY, Integer.TYPE);
        table2.addColumn(Graph.DEFAULT_TARGET_KEY, Integer.TYPE);
        table2.addColumn(SIMILARITY_COLUMN_NAME, Float.TYPE);
        return new Graph(table, table2, false);
    }

    private Table createTableSchema(Schema schema, Table table) {
        for (int i = 0; i < schema.getColumnCount(); i++) {
            table.addColumn(schema.getColumnName(i), schema.getColumnType(i));
        }
        table.addColumn(UNIQUE_INDEX_COLUMN_NAME, Integer.TYPE);
        table.addColumn(COMBINE_VALUES_COLUMN_NAME, String.class, "*");
        return table;
    }

    private Table populateTable(Table table, Graph graph) {
        Iterator nodes = graph.nodes();
        while (nodes.hasNext()) {
            Node node = (Node) nodes.next();
            table.addRow();
            for (int i = 0; i < node.getColumnCount(); i++) {
                table.set(table.getRowCount() - 1, i, node.get(i));
            }
            table.set(table.getRowCount() - 1, UNIQUE_INDEX_COLUMN_NAME, new Integer(table.getRowCount()));
        }
        return table;
    }

    public List extractWeakComponentClusters(Graph graph) {
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        Iterator nodes = graph.nodes();
        while (nodes.hasNext()) {
            Integer num = new Integer(((Node) nodes.next()).getRow());
            if (!hashSet.contains(num)) {
                LinkedHashSet undirectedDepthFirstSearch = GraphSearchAlgorithms.undirectedDepthFirstSearch(graph, num);
                hashSet.addAll(undirectedDepthFirstSearch);
                arrayList.add(undirectedDepthFirstSearch);
            }
        }
        return arrayList;
    }

    private ListMap sortNodesByAttributePrefix(Table table, String str, int i) {
        ListMap listMap = new ListMap();
        IntIterator rows = table.rows();
        while (rows.hasNext()) {
            int nextInt = rows.nextInt();
            String string = table.getTuple(nextInt).getString(str);
            if (string != null) {
                listMap.put(extractPrefixKey(string, i), new Integer(nextInt));
            }
        }
        return listMap;
    }

    private String extractPrefixKey(String str, int i) {
        return (i > str.length() || i < 1) ? i > str.length() ? str : NOT_THE_PRIMARY_NODE : str.substring(0, i);
    }

    private File stringToFile(String str, String str2) throws AlgorithmExecutionException {
        try {
            File createTempFile = File.createTempFile(str2, "txt");
            FileWriter fileWriter = new FileWriter(createTempFile);
            fileWriter.write(str);
            fileWriter.close();
            return createTempFile;
        } catch (IOException e) {
            throw new AlgorithmExecutionException("Could not create file from string", e);
        }
    }
}
