/*
  Copyright Mission Critical Linux, 2000

  Kimberlite is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.

  Kimberlite is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Kimberlite; see the file COPYING.  If not, write to the
  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
  MA 02139, USA.
*/
/*
 *  $Id: diskstatusblock.c,v 1.4 2000/09/13 19:39:03 burke Exp $
 *
 *  Copyright (C) 2000 Mission Critical Linux, LLC
 *
 *  author: Tim Burke <burke@missioncriticallinux.com>
 *  description: Interface to node status block.
 *
 * diskstatusblock.c
 *
 * This file implements the disk access API used to represent node status.
 * Basically this consists of initializing/reading/writing/validating
 * the NodeStatusBlock.
 *
 * It's primary customer is the quorum daemon.
 */
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <signal.h>
#include <time.h>

#include <logger.h>
#include <sys/syslog.h>
#include <clucfg.h>
#include "diskstate.h"
#include "disk_proto.h"

/*
 * Forward routine declarations.
 */
#ifdef notdef
static void readVerifyStatusBlock(off_t offsetStatus, NodeStatusBlock *statbExpected);
#endif // notdef

static const char *version __attribute__ ((unused)) = "$Id: diskstatusblock.c,v 1.4 2000/09/13 19:39:03 burke Exp $";
/*
 * For performing raw IO operations, the size of read/write requests must be
 * a multiple of 512.  Here we calculate the size of a NodeStatusBlock structure
 * rounded up to the nearest 512 byte boundary.  Rather than compute these
 * sizes in every read/write request, do it here up front.
 */
static int statbLen = sizeof(NodeStatusBlock);
#ifdef notdef
static int statbLenBlocked = ((sizeof(NodeStatusBlock)) % 512) ? 
	((sizeof(NodeStatusBlock)) + (512 - ((sizeof(NodeStatusBlock)) % 512))) :
	(sizeof(NodeStatusBlock));
#endif
static int statbLenBlocked = ((sizeof(NodeStatusBlock)) % 512) ? 
        ((sizeof(NodeStatusBlock) + 512) / 512) * 512 :
	(sizeof(NodeStatusBlock));

/*
 * Write the status block out to disk.
 * The "aligned" parameter, when set nonzero indicates that the caller
 * guarantees that *statBlock is 512 byte aligned and that there is enough
 * room in the memory *statBlock to fit a NodeStatusBlock, but also the space
 * needed to round the write request up to 512 bytes in length.  All this is
 * done to avoid remapping a bounceio buffer for each request.
 */
int writeStatusBlock(off_t writeOffset, NodeStatusBlock *statBlock, int aligned) {
	int len;

	if (statBlock->magic_number != STATUS_BLOCK_MAGIC_NUMBER) {
		clulog(LOG_ERR, "writeStatusBlock: invalid magic# 0x%lx\n",
			statBlock->magic_number);
		return(-1);
	}
	if (aligned) {
		len = statbLenBlocked;
	}
	else {
		len = statbLen;
	}
	return diskRawWriteShadow(writeOffset, (char *)statBlock, len,
				  (ulong)&((NodeStatusBlock *)0)->check_sum);
}

#ifdef notdef
/*
 * Debug routine to read back the status block and verify that it is 
 * what we expected.
 */
static void readVerifyStatusBlock(off_t offsetStatus, NodeStatusBlock *statbExpected) {
	NodeStatusBlock statbRead;
	int retval;

	bzero((void *)&statbRead, sizeof(NodeStatusBlock));
	retval = readStatusBlock(offsetStatus, &statbRead, 0);
	if (retval < 0) {
		clulog(LOG_ERR, "readVerifyStatusBlock: read failed.\n");
		return;
	}
	if (bcmp((char *)&statbRead, (char *)statbExpected, 
			sizeof(NodeStatusBlock))) {
		clulog(LOG_ERR, "readVerifyStatusBlock: ERROR - validation failure.\n");
		printStatusBlock(statbExpected, "Expected");
		printStatusBlock(&statbRead, "Read Back");
	}
}
#endif // notdef

/*
 * Reads in the shared state disk header from the shared partition.
 * Stuffing the results into the passed data struct.
 * The "aligned" parameter, when set nonzero indicates that the caller
 * guarantees that *statb is 512 byte aligned and that there is enough
 * room in the memory *statb to fit a NodeStatusBlock, but also the space
 * needed to round the write request up to 512 bytes in length.  All this is
 * done to avoid remapping a bounceio buffer for each request.
 * Returns: -1 on error, 0 on success.
 */
int readStatusBlock(off_t offsetStatus, NodeStatusBlock *statb, int aligned) {
	int ret, len;

	if (aligned) {
            len = statbLenBlocked;
	}
	else {
	    len = statbLen;
	}
	ret =  diskRawReadShadow(offsetStatus, (char *)statb, len,
				  (ulong)&((NodeStatusBlock *)0)->check_sum, 1);

	if(ret) {
		clulog(LOG_ERR, "readStatusBlock: bad ret %d from diskRawReadShadow\n", ret);
		return(ret);
	}	
	if(statb->magic_number != STATUS_BLOCK_MAGIC_NUMBER) {
		clulog(LOG_ERR, "readStatusBlock: invalid magic# 0x%lx\n",
			statb->magic_number);
		return(-1);
	}
	return(0);
}

void printPartnerStatus(int status) {
    char stateStr[80];

    if (status >= LAST_STATE) {
	strcpy(stateStr, "UNKNOWN");
    }
    else {
	strcpy(stateStr, nodeStateStrings[status]);
    }

    clulog(LOG_DEBUG, "Partner's status is: %s, (%d)\n", stateStr, status);
}

void printStatusBlock(NodeStatusBlock *statb, char *whose) {
    clulog(LOG_DEBUG, "------ Node Status Block %s------------\n", whose);
    clulog(LOG_DEBUG, "magic# = 0x%lx\n", statb->magic_number);
    clulog(LOG_DEBUG, "version = %d\n", statb->version);
    clulog(LOG_DEBUG, "nodename = %s\n", (char *)statb->nodename);
    clulog(LOG_DEBUG, "timestamp = %s", ctime(&statb->timestamp));
    clulog(LOG_DEBUG, "update node# = %d\n", statb->updateNodenum);
    clulog(LOG_DEBUG, "incarnation# = %ld\n", (long)statb->incarnationNumber);
    clulog(LOG_DEBUG, "state = %d\n", statb->state);
    clulog(LOG_DEBUG, "-----------------------------------\n");
}

/*
 * Initialize the on-disk data structures representing disk state.
 * This will later be overwritten when the quorum daemon polling loop really
 * starts up.  Its main purpose is to wipe the disk to a clean slate.
 */
int initializePartitionServiceState(void) {
    NodeStatusBlock statb;
    int retval1, retval2;
    off_t offset;

    /*
     * Just wiping out any prior settings.
     */
    bzero(&statb, sizeof(NodeStatusBlock));
    statb.magic_number = STATUS_BLOCK_MAGIC_NUMBER;
    statb.state = NODE_DOWN;
    strcpy(statb.nodename, "none");
    offset = (OFFSET_FIRST_STATUS_BLOCK + (0 * SPACE_PER_STATUS_BLOCK));
    retval1 = writeStatusBlock(offset, &statb,0);
    offset = (OFFSET_FIRST_STATUS_BLOCK + (1 * SPACE_PER_STATUS_BLOCK));
    retval2 = writeStatusBlock(offset, &statb,0);
    if (retval1 != 0) {
	clulog(LOG_CRIT, "initializePartitionServiceState: unable to initialize partition state.\n");
	return(retval1);
    }
    if (retval2 != 0) {
	clulog(LOG_CRIT, "initializePartitionServiceState: unable to initialize partition state.\n");
	return(retval2);
    }
    /*
     * Now also zero out the data structures which describe which services
     * being served.
     */
    return(initializePartitionServiceBlocks());
}

/*
 * Debugging routine to print node status information into the log file.
 * Also called as part of the read/recovery scanning code.
 * Parameter: doPrint - when non-zero will print results.  Otherwise it
 * quietly just reads in both status blocks.
 */
void printStatusBlocks(int doPrint) {
    NodeStatusBlock statblk;
    int retval;
    int myNodenum, partnerNodenum;
    off_t	offsetOfMyStatusBlock;
    off_t	offsetOfPartnerStatusBlock;


    myNodenum = cluGetLocalNodeId();
    /*
     * Kludge to figure out the partner's node number, falls
     * apart for more than 2 nodes.
     */
    if (myNodenum == 0) {
	partnerNodenum = 1;
    }
    else {
	partnerNodenum = 0;
    }

    offsetOfMyStatusBlock = (OFFSET_FIRST_STATUS_BLOCK + 
		(myNodenum * SPACE_PER_STATUS_BLOCK));
    offsetOfPartnerStatusBlock = (OFFSET_FIRST_STATUS_BLOCK + 
		(partnerNodenum * SPACE_PER_STATUS_BLOCK));

    if (doPrint) {
        clulog(LOG_DEBUG, "printStatusBlocks: myNodenum = %d, partnerNodenum = %d.\n",
		myNodenum, partnerNodenum);
        clulog(LOG_DEBUG, "printStatusBlocks: offsetOfMyStatusBlock = %d, offsetOfPartnerStatusBlock = %d.\n",
		(int)offsetOfMyStatusBlock, (int)offsetOfPartnerStatusBlock);
    }
    retval = readStatusBlock(offsetOfPartnerStatusBlock, &statblk, 0);
    if (retval < 0) {
	clulog(LOG_DEBUG, "printStatusBlocks: failed reading partner's status block.\n");
	return;
    }
    if (doPrint) {
        printStatusBlock(&statblk, "PARTNER");
    }
    retval = readStatusBlock(offsetOfMyStatusBlock, &statblk, 0);
    if (retval < 0) {
	clulog(LOG_DEBUG, "printStatusBlocks: failed reading my status block.\n");
	return;
    }
    if (doPrint) {
        printStatusBlock(&statblk, "MY OWN");
    }
}

