package edu.iu.nwb.analysis.isidupremover;

import edu.iu.nwb.analysis.isidupremover.tuplecomparison.ISIPubComparer;
import edu.iu.nwb.analysis.isidupremover.tuplecomparison.MainPubComparer;
import edu.iu.nwb.shared.isiutil.ISITag;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Dictionary;
import org.cishell.framework.CIShellContext;
import org.cishell.framework.algorithm.AlgorithmExecutionException;
import org.cishell.framework.data.Data;
import org.osgi.service.log.LogService;
import prefuse.data.Table;
import prefuse.data.Tuple;
import prefuse.data.util.TableIterator;
import prefuse.util.collections.IntIterator;

/* loaded from: input_file:edu/iu/nwb/analysis/isidupremover/ISIDupRemover.class */
public class ISIDupRemover {
    Data[] data;
    Dictionary parameters;
    CIShellContext context;
    private static final String LOG_FILE_NAME = "isiduplicateremoverlog";
    private LogService log;
    private ISIPubComparer mainPubComparer = new MainPubComparer();

    public TablePair removeDuplicatePublications(Table table, LogService logService, boolean z) throws AlgorithmExecutionException {
        if (!tableSanityCheckPasses(table)) {
            this.log.log(2, "Unable to remove duplicates from table. Returning original table.");
            return new TablePair(table, table);
        }
        this.log = logService;
        StringBuffer stringBuffer = new StringBuffer();
        Integer num = null;
        Object obj = null;
        int i = 0;
        IntIterator rowsSortedBy = table.rowsSortedBy(ISITag.UNIQUE_ID.columnName, true);
        ArrayList arrayList = new ArrayList();
        while (rowsSortedBy.hasNext()) {
            Integer num2 = (Integer) rowsSortedBy.next();
            String string = table.getString(num2.intValue(), ISITag.UNIQUE_ID.columnName);
            if (string == null) {
                i++;
            } else if (!string.equals(obj)) {
                num = num2;
                obj = string;
            } else if (determineWhichToRemove(table, num2, num, stringBuffer).equals(num2)) {
                arrayList.add(num2);
            } else {
                arrayList.add(num);
                num = num2;
                obj = string;
            }
        }
        if (z) {
            logService.log(3, stringBuffer.toString());
        }
        try {
            File createTempFile = File.createTempFile(LOG_FILE_NAME, ".txt");
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(createTempFile));
            bufferedWriter.write(stringBuffer.toString());
            bufferedWriter.close();
            Table table2 = new Table();
            table2.addColumns(table.getSchema());
            Table table3 = new Table();
            table3.addColumns(table.getSchema());
            TableIterator it = table.iterator();
            while (it.hasNext()) {
                Integer num3 = (Integer) it.next();
                if (arrayList.contains(num3)) {
                    table3.addTuple(table.getTuple(num3.intValue()));
                } else {
                    table2.addTuple(table.getTuple(num3.intValue()));
                }
            }
            logService.log(3, "The original " + table.getRowCount() + " records have been processed to remove duplicate unique ISI IDs leaving " + table2.getRowCount() + " records.");
            if (i > 0) {
                logService.log(2, i + " records did not have unique IDs (specified with the UT tag in ISI format), so we were unable to determine whether there were duplicates of these records. The absence of a unique ID is most likely a flaw in the original data.");
            }
            if (createTempFile != null) {
                logService.log(3, "");
                logService.log(3, "Wrote log to " + createTempFile.getAbsolutePath());
            }
            return new TablePair(table2, table3);
        } catch (IOException e) {
            throw new AlgorithmExecutionException("Unable to write removed duplicates log.", e);
        }
    }

    private Integer determineWhichToRemove(Table table, Integer num, Integer num2, StringBuffer stringBuffer) {
        Integer num3;
        Tuple tuple = table.getTuple(num.intValue());
        Tuple tuple2 = table.getTuple(num2.intValue());
        String string = tuple.getString(ISITag.UNIQUE_ID.columnName);
        String string2 = tuple.getString(ISITag.TITLE.columnName);
        String string3 = tuple2.getString(ISITag.TITLE.columnName);
        stringBuffer.append("Found a pair of publication records with ID '" + string + "'\r\n");
        if (string2 == null && string3 == null) {
            stringBuffer.append("Neither have a title specified (Very unusual).");
        } else {
            if (string2 == null) {
                stringBuffer.append("The first does not have a title.");
                stringBuffer.append("Removing first.");
                return num;
            }
            if (string3 == null) {
                stringBuffer.append("The second does not have a title.");
                stringBuffer.append("Removing second.");
                return num2;
            }
        }
        if (string2.equals(string3)) {
            stringBuffer.append("Both titled '" + string2 + "'\r\n");
        } else {
            stringBuffer.append("The first titled '" + string2 + "'\r\n");
            stringBuffer.append("The second titled '" + string3 + "'\r\n");
        }
        int compare = this.mainPubComparer.compare(tuple, tuple2, stringBuffer);
        if (compare > 0) {
            stringBuffer.append("Removing second\r\n");
            num3 = num2;
        } else if (compare < 0) {
            stringBuffer.append("Removing first\r\n");
            num3 = num;
        } else {
            stringBuffer.append("Arbitrarily removing first\r\n");
            num3 = num;
        }
        stringBuffer.append("\r\n");
        stringBuffer.append("--------------------\r\n");
        stringBuffer.append("\r\n");
        return num3;
    }

    private boolean tableSanityCheckPasses(Table table) {
        if (table.canGetString(ISITag.UNIQUE_ID.columnName)) {
            return true;
        }
        this.log.log(2, "ISI Table does not have a unique ID column (abbreviated UT).It is possible that no records (a.k.a papers) in the original ISI file specified a unique ID.Therefore, we are unable to determine which papers are duplicates.");
        return false;
    }
}
