Compare Csv file


#1

Hi, I would like to compare two CSV Files and write the differences to a third CSV file. I have tried using tokenize but it is deprecated.


#2

same, do you have an idea?


#3

Diff programming tends to involve significant amount of efforts. It may look easy at first, but soon turns out bloody.

I would rather recommend you to use diff-dedicated tools. For example WinMerge:

You can execute any exe from Groovy script (= Katalon Test Case script) using ProcessBuilder.
See the following article.

WinMerge has Command Line Interface

https://manual.winmerge.org/en/Version_control.html

So it would be a fun to launch WinMerge from Katalon Studio.


#4

hi,

simple demo where used PDFUtil lib and apache pdfBox libs

download pdfUtil.jar from
http://www.testautomationguru.com/introducing-pdfutil-to-compare-pdf-files-extract-resources/
and add it to the project Drivers folder
restart KS

This is first demo pdf page file1.pdf
human
cat
dog
snake
worm
eagle
rabbit

This is second demo pdf page file2.pdf
human
cat
dog
squirrel
worm
eagle
rabbit

and the third file file3.pdf have differences
pages have differences file1 have words [first, snake] which are not in file2, file2 instead have [second, squirrel]

TESTCASE
import com.testautomationguru.utility.PDFUtil;
PDFUtil pdfUtil = new PDFUtil();
// returns the pdf content from page number 1
String file_1 = pdfUtil.getText(“path where your pdf lies/file1.pdf”,1);
String file_2 = pdfUtil.getText(“path where your pdf lies/file2.pdf”,1);

def diff1 = CustomKeywords.‘readPdfFile.verifyPdfContent.findNotMatching’(file_1, file_2)
def diff2 = CustomKeywords.‘readPdfFile.verifyPdfContent.findNotMatching’(file_2, file_1)

String message = "pages have differences file1 have words "+diff1+ " which are not in file2, file2 instead have "+diff2

println "pages have differences file1 have words "+diff1+ " which are not in file2, file2 instead have "+diff2

KEYWORDS
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.pdmodel.PDPage
import org.apache.pdfbox.pdmodel.PDPageContentStream
import org.apache.pdfbox.pdmodel.font.PDFont
import org.apache.pdfbox.pdmodel.font.PDType1Font
import org.apache.pdfbox.text.PDFTextStripper
@Keyword
public List findNotMatching(String sourceStr, String anotherStr){
StringTokenizer at = new StringTokenizer(sourceStr, " ");
StringTokenizer bt = null;
int i = 0, token_count = 0;
String token = null;
boolean flag = false;
List missingWords = new ArrayList();
while (at.hasMoreTokens()) {
token = at.nextToken();
bt = new StringTokenizer(anotherStr, " ");
token_count = bt.countTokens();
while (i < token_count) {
String s = bt.nextToken();
if (token.equals(s)) {
flag = true;
break;
} else {
flag = false;
}
i++;
}
i = 0;
if (flag == false)
missingWords.add(token);
}
return missingWords;
}

public void writeDfferences(String message, String filename){
	
	PDDocument doc = new PDDocument();
	try {
		PDPage page = new PDPage();
		doc.addPage(page);
		
		PDFont font = PDType1Font.TIMES_ROMAN;

		PDPageContentStream contents = new PDPageContentStream(doc, page);
		contents.beginText();
		contents.setFont(font, 12);
		contents.newLineAtOffset(50, 700);
		contents.showText(message);
		contents.endText();
		contents.close();
		
		doc.save(filename);
	}
	finally {
		doc.close();
	}
}

CustomKeywords.‘readPdfFile.verifyPdfContent.writeDfferences’(message, “path where your pdf lies/file3.pdf”)


#5

hello,

sorry, read again this subject, and question was CSV not pdf, will try to do for csv same :slight_smile:


#6

hi,

ok, i got it something, works but not so pretty :slight_smile:

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;

import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;



String originalPath = System.getProperty("user.dir")+"\\ExcelFiles\\original.csv";
String comparePath = System.getProperty("user.dir")+"\\ExcelFiles\\compare.csv";
String differencesPath = System.getProperty("user.dir")+"\\ExcelFiles\\differences.csv";

//createCSVFile();
int[] rowsCols = getRowsColsNo(originalPath);
int[] rowsCols2 = getRowsColsNo(comparePath);
System.out.println(rowsCols[0]); //count of rows
System.out.println(rowsCols[1]); //count of columns

File file = new File(differencesPath);
if(file.delete())
{
	System.out.println("File deleted successfully");
}
else
{
	System.out.println("Failed to delete the file");
}
createCSVFile(differencesPath);//create new file

String mess = "original.csv does not match with compare.csv ";
//start from row 0 (header)
writeToDiffFile(differencesPath, null, mess, 0, 0);//write header wrote
String[] strDiff = new String[rowsCols[0]];
for (int i = 0; i < rowsCols[0]; i++) {
	String line = Files.readAllLines(Paths.get(originalPath)).get(i);
	String line2 = Files.readAllLines(Paths.get(comparePath)).get(i);
	String diff = getDiff(line, line2);
	strDiff[0] = diff;
	strDiff[1] = "in row "+i;
	writeToDiffFile(differencesPath, strDiff, null, 1, i);
}


public static int[] getRowsColsNo(String fileName) {
	Scanner scanIn = null;
	int rows = 0;
	int cols = 0;
	String InputLine = "";
	try {
		scanIn = new Scanner(new BufferedReader(
				new FileReader(fileName)));
		scanIn.useDelimiter(",");
		while (scanIn.hasNextLine()) {
			InputLine = scanIn.nextLine();
			String[] InArray = InputLine.split(",");
			rows++;
			cols = InArray.length;
		}

	} catch (Exception e) {
		System.out.println(e);
	}
	//scanIn.close();
	return [rows, cols] ;
}

public static void createCSVFile(String fileName) throws IOException {
	//String[] HEADERS = { "author", "title","year"};
	CSVPrinter printer = null;
	FileWriter out = new FileWriter(fileName);
	try {
		printer = new CSVPrinter(out, CSVFormat.DEFAULT);
		//        .withHeader(HEADERS));
		printer.printRecord();
	} catch (IOException e) {
		e.printStackTrace();
	}
	printer.flush();
	out.close();
}


public static void writeToDiffFile(String fileName, String[] values, String val, int counter, int rowNum) throws IOException {
	CSVPrinter csvPrinter;
	//FileWriter out = new FileWriter(fileName);
	Writer writer = Files.newBufferedWriter(Paths.get(fileName), StandardOpenOption.APPEND);
	//BufferedWriter writer = Files.newBufferedWriter(Paths.get(fileName), StandardOpenOption.APPEND);
	if(counter == 1) {
		csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT);
		 //       .withHeader("author", "title", "year"));
		csvPrinter.printRecord(Arrays.asList(values));
	}
	else{
		csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT);
		csvPrinter.printRecord(val);
	}
	csvPrinter.flush();
	writer.close();
}

public static String getDiff(String str1, String str2){
	String[] strList1 = str1.split(",");
	String[] strList2 = str2.split(",");

	List<String> list1 = Arrays.asList(strList1);
	List<String> list2 = Arrays.asList(strList2);

	// Prepare a union
	List<String> union = new ArrayList<>(list1);
	union.addAll(list2);

	// Prepare an intersection
	List<String> intersection = new ArrayList<>(list1);
	intersection.retainAll(list2);

	// Subtract the intersection from the union
	union.removeAll(intersection);
	List<String> arr = new ArrayList<>();
	for (String s : union) {
		arr.add(s);
	}
	return String.join(", ", arr);
}

original.csv
“author”,“title”,“year”
“Dan Simmons”,“Hyperion”,2019
“Douglas Adams”,“The Hitchhiker’s Guide to the Galaxy”,2020
“Mat Pat”,“Java Book”,2018
“Tim Given”,“Maths”,2015

compare.csv
“author”,“title”,“year”
“Dan Simmons”,“Hyperion”,
,“The Hitchhiker’s Guide to the Galaxy”,2020
“Mat Pat”,“Java Book”,100
“Tim Given”,

differences.csv

“original.csv does not match with compare.csv "
“”,in row 0,
2019,in row 1,
“”“Douglas Adams””, “,in row 2,
“2018, 100”,in row 3,
“”“Maths””, 2015",in row 4,