/*	
 *	DECRYPTER
 *	By Jeremy Lennert
 *	November 14, 2000
 *	v. 1.01b (untested)
 *
 *	8bc.cpp		{8-bit constant}
 *
 *	Uses frequencies generated by Frequency Plotter
 *	to attempt to decrypt an English plaintext
 *	encrypted by XOR-ing with an 8-bit constant key.
 *
 *	Prints first three likely candidates and their keys
 *	to an output file.
 *
 *	Input ciphertext: ciphertext.txt
 *	Output plaintext: plaintext.txt
 */


// COMPILER DIRECTIVES
#include <fstream.h>

// CONSTANTS
#define TEXTLENGTH 5001
#define ACCEPTABLE_ERROR 0.5

// EXTERNAL VARIABLES
float letters[256];				// Holds letter frequencies
float digraph[256][256];		// Holds digram frequencies
float trigraph[256][256][256];	// Holds trigram frequencies
int length;		// Length of text

float char_count[256];				// Frequencies of candidate plaintexts
float digraph_count[256][256];		// Declared globally so that they
float trigraph_count[256][256][256];// are stored in heap

// FUNCTION PROTOTYPES
void get_letters();		// Imports letter frequencies from file
void get_digraph();		// Imports digraph from file
void get_trigraph();	// Imports trigraph from file

float check_letters(unsigned char* text);	// Checks decrypted text against 
											// letter frequency table
float check_digraph(unsigned char* text);	// Checks against digraph table
float check_trigraph(unsigned char* text);	// Checks against trigraph table

void crypt(unsigned char key, unsigned char* tocrypt, unsigned char* crypted);
	// Encrypts or decrypts (XOR) text in tocrypt with key, outputs to crypted
float check_key(unsigned char key, unsigned char* ciphertext, unsigned char* plaintext);
	// Checks key to see if it decrypts ciphertext to a reasonable plaintext
	// by calling crypt, check_letters, check_digraph, and check_trigraph


// FUNCTION DEFINITIONS

int main()
{
	ifstream cipher;
	ofstream plain;
	unsigned char ciphertext[TEXTLENGTH];	// Holds ciphertext in memory.  If
								// ciphertext is longer than TEXTLENGTH
								// characters, it is truncated to fit in array
	int freq[256];	// Number of times each char appears in ciphertext, used to
					// make a guess at keys
	unsigned char sortfreq[256];	// Stores list of chars in order of most
									// common occurrence
	unsigned char testkey;		// Key being tried
	unsigned char plaintext[TEXTLENGTH];	// Text that ciphertext decrypts to
											// when testkey is used to decrypt
	unsigned char goodkey[3];	// Three best tried keys
	float margin[3];		// Difference with goodkeys between expected and
							// actual letter, digram, and trigram frequencies
	int a, b, c, d;	// Loop variables

	cipher.open("ciphertext.txt");

	/*
	 *	Import ciphertext to the ciphertext[] variable
	 *	
	 *	There are more efficient methods for importing the ciphertext than
	 *	this loop, but this allows us to count the length of the text
	 */
	cout << "Reading ciphertext . . .\n";
	length = 0;
	ciphertext[0] = '\0';
	while ( !cipher.eof() && (length < TEXTLENGTH) )
	{
		cipher >> ciphertext[length];
		ciphertext[++length] = '\0';
	}

	/*
	 *	Initialize the freq[] array to 0's
	 *	Initialize the sortfreq[] array to -1's
	 *
	 *	Count frequencies of letters in ciphertext[]
	 */
	cout << "Analysing ciphertext . . .\n";
	for (a = 0; a < 256; a++)
	{
		freq[a] = 0;
		sortfreq[a] = -1;
	}
	for (a = 0; a < length; a++)
	{
		freq[ciphertext[a]]++;
	}

	/*
	 *	Sort letters by frequency of occurrence and store in sortfreq[]
	 *
	 *	Sorts by checking relative frequencies of two letters and slides
	 *	less frequent letter downwards on the list
	 */
	for (a = 0; a < 256; a++)
	{
		c = b = a;
		for (d = 0; d < 256; d++)
		{
			if ( (sortfreq[d] == -1) || (freq[b] > freq[sortfreq[d]]) )
			{
				c = sortfreq[d];
				sortfreq[d] = b;
				b = c;
			}
		}
	}

	/*
	 *	Import letter, digram, and trigram frequencies
	 */
	cout << "Getting frequencies . . .\n";
	get_letters();
	get_digraph();
	get_trigraph();

	/*
	 *	Start guessing and checking keys
	 */
	cout << "Cryptanalysing cipher . . .\n";
	goodkey[0] = goodkey[1] = goodkey[2] = 0;
	for (a = 0; a < 256; a++)
	{
		testkey = (sortfreq[a] ^ ' ');	// Check key that decrypts sortfreq[a]
										// to a space
		margin[0] = check_key(testkey, ciphertext, plaintext);	// Check key
		if (margin[0] <= ACCEPTABLE_ERROR)	// If it's a good key, save it
		{
			goodkey[0] = testkey;
			if (goodkey[2])			// Quit once three acceptable keys are found
			{
				break;
			}
			else if (goodkey[1])	// Otherwise shift keys in goodkey[]
			{
				goodkey[2] = goodkey[1];
				margin[2] = margin[1];
				goodkey[1] = goodkey[0];
				margin[1] = margin[0];
				goodkey[0] = 0;
			}
			else
			{
				goodkey[1] = goodkey[0];
				margin[1] = margin[0];
				goodkey[0] = 0;
			}
		}
	}

	/*
	 *	Output the three good decryptions (if that many were found) to
	 *	a text file (plaintext.txt) -- that is, if any were found
	 */
	if (goodkey[0] || goodkey[1] || goodkey[2])
	{
		cout << "Outputing plaintexts to file . . .\n";
		plain.open("plaintext.txt");
		for (a = 0; a < 3; a++)
		{
			if (goodkey[a])
			{
				plain << "Key: " << goodkey[a] << endl;
				plain << "Entropy: " << margin[a] << endl << endl;
				crypt(goodkey[a], ciphertext, plaintext);
				plain << plaintext << endl << endl << endl;
			}
		}
		cout << "Done.\n";
	}
	else
	{
		cout << "No reasonable plaintexts found!\n";
	}

	cin.ignore();	// Don't close application until user presses a key
	
	return 0;
}

void get_letters()	// Will be rewritten to accomodate a sparse array later
{
	int x;		// Loop variable
	ifstream table;
	table.open("chars.txt");
	
	for (x = 0; x < 256; x++)	// Until end of file
	{
		if (table.eof()) break;
		table >> letters[x];
	}
}

void get_digraph()	// Will be rewritten to accomodate a sparse array later
{
	int x, y;	// Loop variables
	ifstream table;
	table.open("digraphs.txt");

	for (x = 0; x < 256; x++)
	{
		for (y = 0; y < 256; y++)
		{
			if (table.eof()) break;
			table >> digraph[x][y];
		}
	}
}

void get_trigraph()	// Will be rewritten to accomodate a sparse array later
{
	int x, y, z;	// Loop variables
	ifstream table;
	table.open("trigraphs.txt");

	for (x = 0; x < 256; x++)
	{
		for (y = 0; y < 256; y++)
		{
			for (z = 0; z < 256; z++)
			{
				if (table.eof()) break;
				table >> trigraph[x][y][z];
			}
		}
	}
}

float check_letters(unsigned char* text)
{
	int x;	// Loop variable
	float error = 0.0;

	for (x = 0; text[x] != '\0'; x++)
	{
		char_count[text[x]]++;	// Find letter frequencies in candidate plaintext
	}

	for (x = 0; x < 256; x++)
	{
		error += (char_count[x] - length * letters[x]) *
			(char_count[x] - length * letters[x]);		// Calculate squared error
	}

	error /= length * length;	// Calculate mean squared error

	return error;
}

float check_digraph(unsigned char* text)
{
	int x, y;	// Loop variables
	float error = 0.0;

	for (x = 0; text[x+1] != '\0'; x++)
	{
		digraph_count[text[x]][text[x+1]]++;	// Find digram frequencies
												// in candidate plaintext
	}

	for (x = 0; x < 256; x++)
	{
		for (y = 0; y < 256; y++)
		{
			error += (digraph_count[x][y] - length * digraph[x][y]) *
				(digraph_count[x][y] - length * digraph[x][y]);	// Squared error
		}
	}

	error /= length * length;	// Calculate mean squared error

	return error;
}

float check_trigraph(unsigned char* text)
{
	int x, y, z;	// Loop variables
	float error = 0.0;

	for (x = 0; text[x+2] != '\0'; x++)
	{
		trigraph_count[text[x]][text[x+1]][text[x+2]]++;// Find trigram frequencies
														// in candidate plaintext
	}

	for (x = 0; x < 256; x++)
	{
		for (y = 0; y < 256; y++)
		{
			for (z = 0; z < 256; z++)
			{
				error += (trigraph_count[x][y][z] - length * trigraph[x][y][z]) *
					(trigraph_count[x][y][z] - length * trigraph[x][y][z]);
							// Calculate squared error
			}
		}
	}

	error /= length * length;	// Calculate mean squared error

	return error;
}

void crypt(unsigned char key, unsigned char* tocrypt, unsigned char* crypted)
{
	int x;	// Loop variable
	for (x = 0; tocrypt[x] != '\0'; x++)
	{
		crypted[x] = tocrypt[x] ^ key;
	}
}

float check_key(unsigned char key, unsigned char* ciphertext, unsigned char* plaintext)
{
	float error[3];
	float total_error;

	crypt(key, ciphertext, plaintext);

	error[0] = check_letters(plaintext);
	error[1] = check_digraph(plaintext);
	error[2] = check_trigraph(plaintext);

	total_error = (error[0] / (float)length) +
		(error[1] / (float)length * (float)length) +
		(error[2] / (float)length * (float)length * (float)length);

	return total_error;
}