/*
 * PIMPPA - md5sum creation / checking
 *
 *
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#include <unistd.h>
#include <sys/mman.h>
#include <limits.h>

#include "md5.h"
#include "pimppa.h"

int Verbose=0;

#define SCAN_SAFE	1
#define SCAN_WIPE	2

int lookup(char *hexmd5sum);
int printsums(char *pattern);
int scan(int mode);

#define ACTION_CREATE	0
#define ACTION_SCAN		1
#define ACTION_PRINT	2
#define ACTION_LOOKUP	3
#define ACTION_WIPE		4
#define AMOUNT_ACTIONS	5

void usage(char *name)
{
	fprintf(stderr, "Usage: %s <options>\n\n"
			"-c        Create md5sums for online files\n"
			"-l <sum>  Look up files matching a sum\n"
			"-p <pat>  Print sums of files matching SQL filename pattern\n"
			"-s        Scan whole database for collisions\n"
			"-v        Verbose\n"
			"-w        Wipe DB from md5-colliding files\n", name);
}

int create_sums(int backup_id)
{
	MYSQL *src_db, *dst_db;
	MYSQL_RES *sql_res;
	MYSQL_ROW sql_row;
	unsigned long total_files=0,errors=0;
	FILE *fp;

/********* Connect databases ****************/

	src_db=p_connect();
	if(!src_db)
		return(0);

	dst_db=p_connect();
	if(!dst_db)
	{
		mysql_close(src_db);
		return(0);
	}

/******** create sums *******/

	fprintf(stderr, "Creating md5sums for files...\n");

	p_query(src_db, "SELECT area_path, file_name, file_id "
			"FROM p_files, p_areas "
			"WHERE %s%ld) "
			"  AND area_id=file_area%s",
		(backup_id > 0 ? "(file_backup=" : "NOT (file_flags & "),
		(backup_id > 0 ? backup_id : FILE_OFFLINE),
		(backup_id > 0 ? " ORDER BY area_name,file_name" : ""));
	if(mysql_error(src_db)[0])
		return(-1);
	sql_res=mysql_store_result(src_db);

	sql_row=mysql_fetch_row(sql_res);
	while(sql_row)
	{
		char fullpath[PATH_MAX];
		char MD5Signature[16];
		char escaped[16*2+1];
		unsigned long file_id;
		
		total_files++;

		sprintf(fullpath, "%s%s", sql_row[0], sql_row[1]);
		file_id=atol(sql_row[2]);
	
		// Got all we need, lets fetch the next row
		sql_row=mysql_fetch_row(sql_res);
		
		fp=fopen(fullpath, "r");
		if(!fp)
		{
			fprintf(stderr, "Error opening %s\n", fullpath);
			errors++;
			continue;
		}

		if(md5_stream(fp, MD5Signature))
		{
			fprintf(stderr, "md5_stream() failure on %s\n", fullpath);
			errors++;
			fclose(fp);
			continue;
		}
		
		fclose(fp);
	
		if(Verbose)
		{
			int cnt;

			for(cnt=0;cnt<16;cnt++)
				printf("%02x", (unsigned char)MD5Signature[cnt]);
		
			printf("  %s\n", fullpath);
		}

		mysql_escape_string(escaped, MD5Signature, 16);

		p_query(dst_db, "UPDATE p_files SET file_md5sum='%s' "
				"WHERE file_id=%ld",
			escaped, file_id);
	
	}

	mysql_free_result(sql_res);

	mysql_close(src_db);
	mysql_close(dst_db);

	fprintf(stderr, "Calculated md5sums for %ld files\n", total_files-errors);
	if(errors)
		fprintf(stderr, "There were %ld errors!\n", errors);	

	return(total_files);
}
	

int main(int argc, char *argv[])
{
	int go=1,i=0;
	char *actions[AMOUNT_ACTIONS];
	int backup_id=0;

	if(argc==1)
	{
		usage(argv[0]);
		return(0);
	}

	for(i=0;i<AMOUNT_ACTIONS;i++)
		actions[i]=NULL;

	while(go)
	{
		switch(getopt(argc, argv, "b:chl:p:svVw"))
		{
			case 'b':
				actions[ACTION_CREATE]=(char *)1;
				backup_id=atoi(optarg);
				break;
			case 'c':
				actions[ACTION_CREATE]=(char *)1;
				break;
			case 'h':
				usage(argv[0]);
				return(0);
				break;
			case 'l':
				actions[ACTION_LOOKUP]=optarg;
				break;
			case 'p':
				actions[ACTION_PRINT]=optarg;
				break;
			case 's':
				actions[ACTION_SCAN]=(char *)1;
				break;
			case 'v':
				Verbose=1;
				break;
			case 'V':
				printf("%s %s %s\n", PACKAGE, argv[0], VERSION);
				return(0);
				break;
			case 'w':
				actions[ACTION_WIPE]=(char *)1;
				break;
			default:
				go=0;
				break;
		}
	}	

	if(actions[ACTION_SCAN])
		scan(SCAN_SAFE);
	if(actions[ACTION_WIPE])
		scan(SCAN_WIPE);
	if(actions[ACTION_CREATE])
		create_sums(backup_id);
	if(actions[ACTION_PRINT])
		printsums(actions[ACTION_PRINT]);
	if(actions[ACTION_LOOKUP])
		lookup(actions[ACTION_LOOKUP]);
	
	return(0);
}

/* 
 * Verify all md5sums in db for duplicates, print paths of
 * the colliding entries.
 *
 */
int scan(int mode)
{
	MYSQL *src_db, *dst_db;
	MYSQL_RES *sql_res;
	MYSQL_ROW sql_row;
	unsigned long coll_files=0;

/********* Connect databases ****************/

	src_db=p_connect();
	if(!src_db)
		return(0);

	dst_db=p_connect();
	if(!dst_db)
	{
		mysql_close(src_db);
		return(0);
	}

/******** check *******/

	fprintf(stderr, "Finding collisions from online files...\n");

	p_query(src_db, "SELECT file_md5sum FROM p_files " 
			"WHERE file_md5sum!='\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0' "
			"GROUP BY file_md5sum HAVING count(*)>1");
	if(mysql_error(src_db)[0])
		return(-1);
	sql_res=mysql_use_result(src_db);

	while((sql_row=mysql_fetch_row(sql_res)))
	{
		char fullpath[PATH_MAX];
		MYSQL_RES *sql_res2;
		MYSQL_ROW sql_row2;
		char escaped[16*2+1];
		unsigned long file_id;
	
		file_id=0;
		mysql_escape_string(escaped, sql_row[0], 16);

// ORDER BY file_date DESC

		p_query(dst_db, "SELECT area_path, file_name, file_id "
				"FROM p_files, p_areas "
				"WHERE file_md5sum='%s' "
				"  AND NOT (file_flags & %ld) "
				"  AND file_area=area_id "
				"ORDER BY file_date DESC", 
			escaped, FILE_OFFLINE);
		sql_res2=mysql_store_result(dst_db);
		if(sql_res2)
		{
			if(mode==SCAN_WIPE)
			{
				if((sql_row2=mysql_fetch_row(sql_res2)))
				{
					sprintf(fullpath, "%s%s", sql_row2[0], sql_row2[1]);
					file_id=atol(sql_row2[2]);
				}
				mysql_free_result(sql_res2);
		
				if(file_id)
				{
					remove(fullpath);
					p_query(dst_db, "DELETE FROM p_files "
							"WHERE file_id=%ld",
						file_id);
					if(Verbose)
						printf("Deleted colliding %s\n", fullpath);
					coll_files++;
				}
			}
			else
			{	
				if(mysql_num_rows(sql_res2)>0)
				{
					printf("feh ");
					while((sql_row2=mysql_fetch_row(sql_res2)))
					{
						printf("%s%s ", 
								sql_row2[0], sql_row2[1]);
					}
					printf("\n");
					coll_files++;
				}
				mysql_free_result(sql_res2);
			}
		}

	}

	mysql_free_result(sql_res);

	mysql_close(src_db);
	mysql_close(dst_db);

	if(mode==SCAN_SAFE)
		fprintf(stderr, "%ld files collided.\n", coll_files);
	else
		fprintf(stderr, "%ld files deleted.\n", coll_files);

	return(coll_files);
}

int printsums(char *pattern)
{
	MYSQL *src_db;
	MYSQL_RES *sql_res;
	MYSQL_ROW sql_row;
	int cnt;
	
	src_db=p_connect();
	if(!src_db)
		return(0);

	p_query(src_db, "SELECT area_path, file_name, file_md5sum "
			"FROM p_files, p_areas "
			"WHERE file_name like '%s' AND file_area=area_id",
		pattern);
	sql_res=mysql_store_result(src_db);
	while((sql_row=mysql_fetch_row(sql_res)))
	{
		for(cnt=0;cnt<16;cnt++)
			printf("%02x", (unsigned char)sql_row[2][cnt]);
		
		printf("  %s%s\n", sql_row[0], sql_row[1]);
	}

	mysql_free_result(sql_res);

	return(0);
}

int lookup(char *md5hex)
{
	MYSQL *db;
	MYSQL_RES *sql_res;
	MYSQL_ROW sql_row;
	unsigned char md5sum[16],md5sum_esc[16*2+1];
	int cnt,hextmp,i;
	unsigned char tmp[3];

	db=p_connect();
	if(!db)
		return(0);

	// This is a lamo piece of ass
	tmp[2]=0;
	for (cnt = 0, i=0; cnt < 32; cnt+=2, i++)
	{
		tmp[0]=md5hex[cnt];
		tmp[1]=md5hex[cnt+1];
		sscanf(tmp, "%02x", &hextmp);
		md5sum[i]=hextmp;

	}

	mysql_escape_string(md5sum_esc, md5sum, 16);

	p_query(db, "SELECT area_path, file_name " 
		    "FROM p_files, p_areas "
		    "WHERE file_md5sum='%s' AND file_area=area_id",
		md5sum_esc);
	sql_res=mysql_store_result(db);
	if(sql_res)
	{
		while((sql_row=mysql_fetch_row(sql_res)))
		{
			printf("%s%s\n", sql_row[0], sql_row[1]);
		}
		mysql_free_result(sql_res);
	}

	mysql_close(db);

	return(0);
}
