Skip to content

Commit

Permalink
split summary.csv into summary and summary_extended (#890)
Browse files Browse the repository at this point in the history
* split summary.cssv into summary and summary_extended

* rename files for sequential batch

* clean up

* slices should clarify CSV result type

* improved error message on extended/non-extended divergence

* fix tests

* rename new test

* Issue 856: refactoring of how filenames are chosen for Detailed Summary (#894)

* refactor how filenames are chosen

* move files to new recommended name

* rename ResultFile to ResultTypeAndSlice, and CVR_CDF to CDF_CVR

* address additional PR comments

* rename files: CVR_CDF to CDF_CVR

* refator to make explicit the difference between a results file (what we previously referred to as the summary files) and an output file (the results files plus rctab_cvr and cdf_cvr)

* fix comment

* refactor how filenames are chosen

* move files to new recommended name

* rename ResultFile to ResultTypeAndSlice, and CVR_CDF to CDF_CVR

* address additional PR comments

* rename files: CVR_CDF to CDF_CVR

* refator to make explicit the difference between a results file (what we previously referred to as the summary files) and an output file (the results files plus rctab_cvr and cdf_cvr)

* fix comment

* PR comments addressed and tests fixed

* add warning message for sanitized slice filename collision

* fix logging message location

---------

Co-authored-by: yezr <[email protected]>

---------

Co-authored-by: yezr <[email protected]>
  • Loading branch information
artoonie and yezr authored Nov 6, 2024
1 parent a174799 commit 6623238
Show file tree
Hide file tree
Showing 193 changed files with 329 additions and 175 deletions.
5 changes: 5 additions & 0 deletions src/main/java/network/brightspots/rcv/AuditableFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

final class AuditableFile extends File {
public AuditableFile(String pathname) {
super(pathname);
}

public AuditableFile(Path pathname) {
super(pathname.toAbsolutePath().toString());
}

public void finalizeAndHash() {
String hash = Utils.bytesToHex(FileUtils.getHashBytes(this, "SHA-512"));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ private void watchParseCvrServiceProgress(Service<LoadedCvrData> service) {
// and calculate the width of the filename column
perSourceCvrCountTable.getItems().clear();
int maxFilenameLength = 0;
for (ResultsWriter.CvrSourceData sourceData : data.getCvrSourcesData()) {
for (OutputWriter.CvrSourceData sourceData : data.getCvrSourcesData()) {
String countString = String.format("%,d", sourceData.getNumCvrs());
String fileString = new File(sourceData.source.getFilePath()).getName();
perSourceCvrCountTable.getItems().add(new Pair<>(fileString, countString));
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ private void endCvr() {
handleEmptyCells(config.getMaxRankingsAllowedWhenNotSetToMaximum() + 1);
}
String computedCastVoteRecordId =
String.format("%s-%d", ResultsWriter.sanitizeStringForOutput(excelFileName), cvrIndex);
String.format("%s-%d", OutputWriter.sanitizeStringForOutput(excelFileName), cvrIndex);

// add precinct ID if needed
if (precinctColumnIndex != null) {
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/network/brightspots/rcv/Tabulator.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
import javafx.util.Pair;
import network.brightspots.rcv.CastVoteRecord.VoteOutcomeType;
import network.brightspots.rcv.ContestConfig.TabulateBySlice;
import network.brightspots.rcv.ResultsWriter.RoundSnapshotDataMissingException;
import network.brightspots.rcv.OutputWriter.RoundSnapshotDataMissingException;

final class Tabulator {

Expand Down Expand Up @@ -794,8 +794,8 @@ private List<String> doRegularElimination(
// to generate the results spreadsheets
// param: timestamp string to use when creating output filenames
void generateSummaryFiles(String timestamp) throws IOException {
ResultsWriter writer =
new ResultsWriter()
OutputWriter writer =
new OutputWriter()
.setNumRounds(currentRound)
.setCandidatesToRoundEliminated(candidateToRoundEliminated)
.setWinnerToRound(winnerToRound)
Expand All @@ -805,8 +805,8 @@ void generateSummaryFiles(String timestamp) throws IOException {
.setRoundToResidualSurplus(roundToResidualSurplus);

List<String> candidateOrder = roundTallies.get(1).getSortedCandidatesByTally();
writer.generateOverallSummaryFiles(roundTallies, tallyTransfers, candidateOrder);
writer.generateBySliceSummaryFiles(roundTalliesBySlices, tallyTransfersBySlice, candidateOrder);
writer.generateContestResultFiles(roundTallies, tallyTransfers, candidateOrder);
writer.generateBySliceResultsFiles(roundTalliesBySlices, tallyTransfersBySlice, candidateOrder);

if (config.isGenerateCdfJsonEnabled()) {
try {
Expand Down
22 changes: 11 additions & 11 deletions src/main/java/network/brightspots/rcv/TabulatorSession.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import network.brightspots.rcv.ContestConfig.Provider;
import network.brightspots.rcv.ContestConfig.UnrecognizedProviderException;
import network.brightspots.rcv.FileUtils.UnableToCreateDirectoryException;
import network.brightspots.rcv.ResultsWriter.RoundSnapshotDataMissingException;
import network.brightspots.rcv.OutputWriter.RoundSnapshotDataMissingException;
import network.brightspots.rcv.Tabulator.TabulationAbortedException;

@SuppressWarnings("RedundantSuppression")
Expand Down Expand Up @@ -115,8 +115,8 @@ boolean convertToCdf(BiConsumer<Double, Double> progressUpdate) {
} else {
Tabulator.SliceIdSet sliceIds =
new Tabulator(castVoteRecords.getCvrs(), config).getEnabledSliceIds();
ResultsWriter writer =
new ResultsWriter()
OutputWriter writer =
new OutputWriter()
.setNumRounds(0)
.setSliceIds(sliceIds)
.setContestConfig(config)
Expand Down Expand Up @@ -338,7 +338,7 @@ private LoadedCvrData parseCastVoteRecords(
boolean encounteredSourceProblem = false;

// Per-source data for writing generic CSV
List<ResultsWriter.CvrSourceData> cvrSourceData = new ArrayList<>();
List<OutputWriter.CvrSourceData> cvrSourceData = new ArrayList<>();

// At each iteration of the following loop, we add records from another source file.
for (int sourceIndex = 0; sourceIndex < config.rawConfig.cvrFileSources.size(); ++sourceIndex) {
Expand All @@ -353,7 +353,7 @@ private LoadedCvrData parseCastVoteRecords(

// Update the per-source data for the results writer
cvrSourceData.add(
new ResultsWriter.CvrSourceData(
new OutputWriter.CvrSourceData(
source, reader, sourceIndex, startIndex, castVoteRecords.size() - 1));

// Check for unrecognized candidates
Expand Down Expand Up @@ -416,8 +416,8 @@ private LoadedCvrData parseCastVoteRecords(
// Output the RCTab-CSV CVR
if (shouldOutputRcTabCvr) {
try {
ResultsWriter writer =
new ResultsWriter().setContestConfig(config).setTimestampString(timestampString);
OutputWriter writer =
new OutputWriter().setContestConfig(config).setTimestampString(timestampString);
this.rctabCvrFilePath =
writer.writeRcTabCvrCsv(
castVoteRecords,
Expand Down Expand Up @@ -461,11 +461,11 @@ public static class LoadedCvrData {

private List<CastVoteRecord> cvrs;
private final int numCvrs;
private final List<ResultsWriter.CvrSourceData> cvrSourcesData;
private final List<OutputWriter.CvrSourceData> cvrSourcesData;
private boolean isDiscarded;
private final boolean doesMatchAllMetadata;

LoadedCvrData(List<CastVoteRecord> cvrs, List<ResultsWriter.CvrSourceData> cvrSourcesData) {
LoadedCvrData(List<CastVoteRecord> cvrs, List<OutputWriter.CvrSourceData> cvrSourcesData) {
this.cvrs = cvrs;
this.successfullyReadAll = cvrs != null;
this.numCvrs = cvrs != null ? cvrs.size() : 0;
Expand Down Expand Up @@ -504,7 +504,7 @@ public int numCvrs() {
return numCvrs;
}

List<ResultsWriter.CvrSourceData> getCvrSourcesData() {
List<OutputWriter.CvrSourceData> getCvrSourcesData() {
return cvrSourcesData;
}

Expand All @@ -522,7 +522,7 @@ List<CastVoteRecord> getCvrs() {

public void printSummary() {
Logger.info("Cast Vote Record summary:");
for (ResultsWriter.CvrSourceData sourceData : cvrSourcesData) {
for (OutputWriter.CvrSourceData sourceData : cvrSourcesData) {
Logger.info("Source %d: %s", sourceData.sourceIndex + 1, sourceData.source.getFilePath());
Logger.info(" uses provider: %s", sourceData.source.getProvider());
Logger.info(" read %d cast vote records", sourceData.getNumCvrs());
Expand Down
132 changes: 102 additions & 30 deletions src/test/java/network/brightspots/rcv/TabulatorTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@

package network.brightspots.rcv;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

Expand All @@ -31,12 +33,14 @@
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import network.brightspots.rcv.OutputWriter.OutputFileIdentifiers;
import network.brightspots.rcv.OutputWriter.OutputType;
import network.brightspots.rcv.Tabulator.TabulationAbortedException;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.DisplayName;
Expand Down Expand Up @@ -147,8 +151,8 @@ private static boolean compareLists(

private static boolean fileCompareLineByLine(String path1, String path2) {
boolean result = true;
try (BufferedReader br1 = new BufferedReader(new FileReader(path1, StandardCharsets.UTF_8));
BufferedReader br2 = new BufferedReader(new FileReader(path2, StandardCharsets.UTF_8))) {
try (BufferedReader br1 = new BufferedReader(new FileReader(path1, UTF_8));
BufferedReader br2 = new BufferedReader(new FileReader(path2, UTF_8))) {
int currentLine = 1;
int errorCount = 0;

Expand Down Expand Up @@ -186,11 +190,16 @@ private static boolean fileCompareLineByLine(String path1, String path2) {
return result;
}

// given stem and suffix returns path to file in test asset folder
private static Path getTestDirectory(String stem) {
return Paths.get(System.getProperty("user.dir"), TEST_ASSET_FOLDER, stem);
}

// given stem and suffix returns path to file in test asset folder
private static String getTestFilePath(String stem, String suffix) {
return Paths.get(System.getProperty("user.dir"), TEST_ASSET_FOLDER, stem, stem + suffix)
.toAbsolutePath()
.toString();
Path directory = getTestDirectory(stem);
String filename = stem + suffix;
return Paths.get(directory.toString(), filename).toAbsolutePath().toString();
}

private static void runTabulationTest(String testStem) {
Expand Down Expand Up @@ -223,7 +232,7 @@ private static void runTabulationTest(String stem, String expectedException,

if (config.isMultiSeatSequentialWinnerTakesAllEnabled()) {
for (int i = 1; i <= config.getNumberOfWinners(); i++) {
compareFiles(config, stem, timestampString, Integer.toString(i));
compareFiles(config, stem, timestampString, i);
}
} else {
compareFiles(config, stem, timestampString, null);
Expand All @@ -232,12 +241,14 @@ private static void runTabulationTest(String stem, String expectedException,
int numSlicedFilesChecked = 0;
for (ContestConfig.TabulateBySlice slice : config.enabledSlices()) {
for (String sliceName : session.loadSliceNamesFromCvrs(slice, config)) {
String outputType = ResultsWriter.sanitizeStringForOutput(
String.format("%s_%s_summary", sliceName, slice.toLowerString()));
if (compareFiles(config, stem, outputType, ".json", timestampString, null, true)) {
OutputFileIdentifiers outputFileIdentifiersJson = new OutputFileIdentifiers(
OutputType.DETAILED_JSON, slice, sliceName);
OutputFileIdentifiers outputFileIdentifiersCsv = new OutputFileIdentifiers(
OutputType.DETAILED_CSV, slice, sliceName);
if (compareFiles(config, stem, outputFileIdentifiersJson, timestampString, null, true)) {
numSlicedFilesChecked++;
}
if (compareFiles(config, stem, outputType, ".csv", timestampString, null, true)) {
if (compareFiles(config, stem, outputFileIdentifiersCsv, timestampString, null, true)) {
numSlicedFilesChecked++;
}
}
Expand All @@ -256,7 +267,7 @@ private static void runConvertToCdfTest(String stem) {

String timestampString = session.getTimestampString();
ContestConfig config = ContestConfig.loadContestConfig(configPath);
compareFiles(config, stem, "cvr_cdf", ".json", timestampString, null, false);
compareFiles(config, stem, OutputType.CDF_CVR, timestampString, null, false);

cleanOutputFolder(session);
}
Expand Down Expand Up @@ -319,37 +330,50 @@ private static void cleanOutputFolder(TabulatorSession session) {
}

private static void compareFiles(
ContestConfig config, String stem, String timestampString, String sequentialId) {
compareFiles(config, stem, "summary", ".json", timestampString, sequentialId, false);
compareFiles(config, stem, "summary", ".csv", timestampString, sequentialId, false);
ContestConfig config, String stem, String timestampString, Integer sequentialId) {
compareFiles(config, stem, OutputType.DETAILED_JSON, timestampString, sequentialId, false);
compareFiles(config, stem, OutputType.DETAILED_CSV, timestampString, sequentialId, false);
compareExtendedSummaryToSummary(config, timestampString, sequentialId);
if (config.isGenerateCdfJsonEnabled()) {
compareFiles(config, stem, "cvr_cdf", ".json", timestampString, sequentialId, false);
compareFiles(config, stem, OutputType.CDF_CVR, timestampString, sequentialId, false);
}
}

/**
* Helper comparison for non-slice files.
*/
private static boolean compareFiles(
ContestConfig config,
String stem,
OutputType outputType,
String timestampString,
Integer sequentialId,
boolean onlyCheckIfExpectedFileExists) {
OutputFileIdentifiers actualOutputFileIdentifiers = new OutputFileIdentifiers(outputType);
return compareFiles(
config,
stem,
actualOutputFileIdentifiers,
timestampString,
sequentialId,
onlyCheckIfExpectedFileExists);
}

/**
* Returns whether the files were compared at all.
* If they were compared and not equal, the test will fail.
*/
private static boolean compareFiles(
ContestConfig config,
String stem,
String outputType,
String extension,
OutputFileIdentifiers actualOutputFileIdentifiers,
String timestampString,
String sequentialId,
Integer sequentialId,
boolean onlyCheckIfExpectedFileExists) {
String actualOutputPath =
ResultsWriter.getOutputFilePath(
config.getOutputDirectory(), outputType, timestampString, sequentialId)
+ extension;
String expectedPath =
getTestFilePath(
stem,
ResultsWriter.sequentialSuffixForOutputPath(sequentialId)
+ "_expected_"
+ outputType
+ extension);
String actualOutputPath = actualOutputFileIdentifiers.getPath(
config.getOutputDirectory(), timestampString, sequentialId).toAbsolutePath().toString();
String expectedPath = actualOutputFileIdentifiers.getPath(getTestDirectory(stem).toString(),
stem, "expected", sequentialId).toString();

Logger.info("Comparing files:\nGenerated: %s\nReference: %s", actualOutputPath, expectedPath);
boolean didCompare = true;
Expand All @@ -365,6 +389,54 @@ private static boolean compareFiles(
return didCompare;
}

/**
* Rather than storing both the extended summary and non-extended summary files in git, we can
* directly check that the non-extended file is precisely what we expect: everything in the
* extended file except for the inactive ballot breakdown.
*/
private static void compareExtendedSummaryToSummary(
ContestConfig config, String timestampString, Integer sequentialId) {
String dir = config.getOutputDirectory();
String summaryPath = new OutputFileIdentifiers(OutputType.SUMMARY_CSV).getPath(
dir, timestampString, sequentialId).toAbsolutePath().toString();
String detailedPath = new OutputFileIdentifiers(OutputType.DETAILED_CSV).getPath(
dir, timestampString, sequentialId).toAbsolutePath().toString();

try (BufferedReader brSummary = new BufferedReader(new FileReader(summaryPath, UTF_8));
BufferedReader brDetailed = new BufferedReader(new FileReader(detailedPath, UTF_8))) {
while (true) {
String lineDetailed = brDetailed.readLine();
// If the extended file has reached its end, then the non-extended file must have too
if (lineDetailed == null) {
assertNull(brSummary.readLine(), "Extended file is missing a line");
return;
}

// If the extended file should be excluded, continue without moving the file pointer
// in the non-extended file. For now, there's only one type of row excluded, and they
// happen to all start with "Inactive Ballots by"
if (lineDetailed.startsWith("Inactive Ballots by")) {
continue;
}

// This line should be equal in both files. Ensure the line exists and they're equal
// in both files.
String lineSummary = brSummary.readLine();
assertNotNull(lineSummary, "Summary file is missing a line");
if (!lineSummary.equals(lineDetailed)) {
fail("Line differes in extended vs non-extended CSV: \n%s\n%s".formatted(
lineSummary, lineDetailed));
}
}
} catch (FileNotFoundException exception) {
Logger.severe("File not found!\n%s", exception);
fail();
} catch (IOException exception) {
Logger.severe("Error reading file!\n%s", exception);
fail();
}
}

@BeforeAll
static void setup() {
Logger.setup();
Expand Down

0 comments on commit 6623238

Please sign in to comment.