/* $Cambridge: hermes/src/prayer/shared/request.c,v 1.2 2008/05/19 15:55:59 dpc22 Exp $ */
/************************************************
 *    Prayer - a Webmail Interface              *
 ************************************************/

/* Copyright (c) University of Cambridge 2000 - 2008 */
/* See the file NOTICE for conditions of use and distribution. */

/* Various routines and data structures for parsing HTTP request on
 * input iostream. Should be able to cope with partial requests if
 * underlying iostream is running in a non-blocking mode: in this
 * case just call request_parse again when data is available. Eventually
 * a complete request will be flagged.
 */

#include "shared.h"

/* request_create() *****************************************************
 *
 * Create a new request structure with own pool (hijacked by lots of
 * subsiduary routines!) and some default values filled in.
 *    config: Global configuration
 *    stream: iostream from this request
 *  frontend: Frontend server => probably need to proxy request.
 *                               Don't trample on buffers when parsing
 *
 * Returns: New request structure
 ***********************************************************************/

struct request *request_create(struct config *config,
                               struct iostream *stream, BOOL frontend)
{
    struct pool *p = pool_create(REQUEST_PREFERRED_BLOCK_SIZE);
    struct request *request = pool_alloc(p, sizeof(struct request));

    /* Make sure cleared out */
    memset(request, 0, sizeof(struct request));

    /* Common fields */
    request->pool = p;
    request->config = config;
    request->stream = stream;

    /* Input buffer */
    request->read_buffer = buffer_create(p, PREFERRED_BUFFER_BLOCK_SIZE);
    request->chunked = NIL;
    request->state = REQUEST_METHOD;
    request->method_size = 0;
    request->hdrs_offset = 0;
    request->hdrs_size = 0;
    request->hdrs_crlfs = 1;
    request->body_offset = 0;
    request->body_current = 0;
    request->body_size = 0;
    request->chunk_state = CHUNK_HDR;
    request->chunk_hdr_offset = 0;
    request->chunk_hdr_current = 0;
    request->chunk_hdr_size = 0;
    request->chunk_body_offset = 0;
    request->chunk_body_current = 0;
    request->chunk_body_size = 0;
    request->chunk_trlr_offset = 0;
    request->chunk_trlr_size = 0;
    request->chunk_trlr_crlfs = 0;      /* Following on from method line */

    request->frontend = frontend;
    request->preserve = frontend;       /* Frontend server _may_ be proxy */
    request->iseof = NIL;
    request->error = NIL;

    /* Decoded request */
    request->request = NIL;
    request->major = 0;
    request->minor = 0;
    request->url = NIL;
    request->url_host = NIL;
    request->url_port = NIL;
    request->url_path = NIL;
    request->argc = NIL;
    request->argv = NIL;
    request->get_suffix = NIL;

    /* Processed request */
    request->method = UNKNOWN;
    request->hdrs = assoc_create(p, 16, T);
    request->form = NIL;

    /* Response information */
    request->status = 501;      /* Not implemented */
    request->dump = NIL;
    request->telemetry = NIL;
    request->telemetry_all = NIL;
    request->telemetry_fd = -1;
    request->response_hdrs = NIL;
    request->write_buffer = buffer_create(p, PREFERRED_BUFFER_BLOCK_SIZE);
    request->gzip_buffer = NIL;
    request->user_agent = NIL;

    /* Following indicate final state of response */
    request->use_http_1_1 = NIL;        /* Until we know better */
    request->persist = T;       /* Until we know better */
    request->use_utf8 = NIL;
    request->allow_gzip = NIL;
    request->use_gzip = NIL;
    request->use_x_gzip = NIL;

    return (request);
}

/* request_free() *******************************************************
 *
 * Free (pool associated with) request.
 *   request:
 ***********************************************************************/

void request_free(struct request *request)
{
    pool_free(request->pool);
}

/* ====================================================================== */

/* request_telemetry() **************************************************
 *
 * Set up HTTP telemetry log for debugging purposes.
 *   request:
 *        fd: Output file descriptor for telemetry
 *       all:   T => Record headers and body
 *            NIL => Record body only
 ***********************************************************************/

void request_telemetry(struct request *request, int fd, BOOL all)
{
    request->telemetry = T;
    request->telemetry_all = all;
    request->telemetry_fd = fd;
    request->preserve = T;      /* Make sure that request not trampled on */
}

void request_dump(struct request *request)
{
    request->dump = T;
}

/* ====================================================================== */

/* request_forge_redirect() *********************************************
 *
 * Forge a HTTP redirect request: used by session_redirect for
 * transparent page substiution.
 *   request:
 *       url: Target URL
 ***********************************************************************/

void request_forge_redirect(struct request *request, char *url)
{
    request->method = GET;
    request->url = url;
    request->url_path = url;
    request->form = NIL;

    /* Allocate a clean new write buffer if we need one */
    if (buffer_size(request->write_buffer) > 0)
        request->write_buffer = buffer_create(request->pool,
                                              PREFERRED_BUFFER_BLOCK_SIZE);

    request_parse_argv(request);
}

/* ====================================================================== */

/* Some crude routines for decoding URL and body of post requests. The
 * following URL: "http://hoohoo.ncsa.uiuc.edu/cgi/overview.html" is old
 * but gives a lot of useful information about forms and CGI.
 *
 * Basic idea is that a form can send data to a Web server using either:
 *  GET:  data is appended to URL after ?
 *  POST: data in request body. Length is defined by Content-Length header
 *
 * Data format is identical in both cases:
 *
 *    key=value&key2=value&key3=value3
 *
 * With any special characters (non-ACSII, "/" and "&") encoded in hex as
 * %XY.
 *
 */

/* request_parse_url() **************************************************
 *
 * Parse request->url into component parts.
 *   request:
 ***********************************************************************/

static BOOL request_parse_url(struct request *request)
{
    struct pool *p = request->pool;
    char *s, *t = request->url;
    char c;

    if (!t)
        return (NIL);

    if (*t == '/') {
        /* Absolute path, no host provided */
        request->url_host = "localhost";
        request->url_port = NIL;
        request->url_path = s = pool_strdup(p, request->url);
        return (T);
    }

    if (!strncasecmp(t, "https://", strlen("https://")))
        t += strlen("http://");
    if (!strncasecmp(t, "http://", strlen("http://")))
        t += strlen("http://");
    else {
        /* Absolute path, no host provided */
        request->url_host = "localhost";
        request->url_port = NIL;
        request->url_path = s = pool_strdup(p, request->url);
        return (T);
    }

    /* Record start of hostname, look for end of hostname */
    s = t;
    while ((c = *t) && (c != '/') && (c != ':'))
        t++;

    if (*t)
        *t++ = '\0';
    request->url_host = pool_strdup(p, s);

    /* Record optional port component */
    if (c == ':') {
        s = t;
        while ((c = *t) && (c != '/'))
            t++;

        if (*t)
            *t++ = '\0';
        request->url_port = pool_strdup(p, s);
    } else
        request->url_port = NIL;

    /* Pathname, default to "/" */

    if (c != '\0')
        request->url_path = s = pool_strdup(p, t);
    else
        request->url_path = pool_strdup(p, "/");
    return (T);
}

/* ====================================================================== */

/* request_parse_method() ***********************************************
 *
 * Parse HTTP method line (e.g: "GET / HTTP/1.0"
 *   request:
 *   config:  Configuration (defines maximum size of method)
 *
 * Returns:    T => method parsed successfully
 *           NIL => Error (temporary end of stream if request->iseos set)
 ***********************************************************************/

static BOOL request_parse_method(struct request *request)
{
    struct config *config = request->config;
    struct iostream *stream = request->stream;
    struct buffer *b = request->read_buffer;
    int c;
    char *token;
    char *method;
    unsigned long count = request->method_size;
    unsigned long maxsize = config->http_max_method_size;

    /* Skip over leading whitespace */
    while (((c = iogetc(stream)) == '\015') || (c == '\012'));

    /* Fetch and record characters until end of line */
    while (c != EOF) {
        bputc(b, c);
        if ((maxsize > 0) && (++count >= maxsize)) {
            request->status = 413;      /* Request Entity too large */
            return (NIL);
        }
        if (c == '\012')
            break;
        c = iogetc(stream);
    }

    request->method_size = buffer_size(b);

    if (c == EOF) {
        /* Record permanent end of file */
        request->iseof = T;
        return (NIL);
    }

    /* Method line is now complete: record and then parse */
    if (request->method_size > 0) {
        method =
            buffer_fetch(b, 0, request->method_size - 1,
                         request->preserve);
        request->request = pool_strdup(b->pool, method);

        if (request->method_size >= 2) {
            /* Chomp trailing CR so they don't appear in log files */
            if (request->request[request->method_size - 2] == '\015')
                request->request[request->method_size - 2] = '\0';
        }
    } else
        request->request = method = pool_strdup(b->pool, "");

    if ((token = string_get_token(&method)) == NIL) {
        /* Bad request */
        request->status = 400;
        return NIL;
    }

    /* Methods listed in RFC 2616. NB: Case dependant */

    if (!strcmp(token, "OPTIONS"))
        request->method = OPTIONS;
    else if (!strcmp(token, "GET"))
        request->method = GET;
    else if (!strcmp(token, "HEAD"))
        request->method = HEAD;
    else if (!strcmp(token, "POST"))
        request->method = POST;
    else if (!strcmp(token, "PUT"))
        request->method = PUT;
    else if (!strcmp(token, "DELETE"))
        request->method = DELETE;
    else if (!strcmp(token, "TRACE"))
        request->method = TRACE;
    else if (!strcmp(token, "CONNECT"))
        request->method = CONNECT;
    else {
        request->method = UNKNOWN;
        /* Bad request */
        request->status = 400;
        return NIL;
    }

    if ((request->method != HEAD) && (request->method != GET) &&
        (request->method != POST)) {
        request->status = 405;  /* Method not allowed */
        return NIL;
    }

    if ((request->url = string_get_token(&method)) == NIL) {
        /* Bad request */
        request->status = 400;
        return NIL;
    }

    if ((request_parse_url(request)) == NIL)
        return NIL;

    /* Default to HTTP 0.9 */
    request->major = 0;
    request->minor = 9;

    token = string_get_token(&method);

    if (token) {
        int major, minor;

        if (strncasecmp(token, "HTTP/", strlen("HTTP/")) ||
            (sscanf(token + strlen("HTTP/"), "%d.%d", &major, &minor) !=
             2)) {
            /* Bad request */
            request->status = 400;
            return NIL;
        }
        request->major = major;
        request->minor = minor;

        if (string_get_token(&method)) {
            /* Bad request: garbage at end of the line */
            request->status = 400;
            return NIL;
        }
    }
    if ((request->major == 1) && (request->minor == 1))
        request->use_http_1_1 = T;

    return (T);
}

/* ====================================================================== */

/* request_process_headers() ********************************************
 *
 * Convert (complete set of) HTTP headers into associative array.
 *   request:
 *      data: List of headers
 *
 * Returns:    T => Headers parsed okay
 *           NIL => Invalid data in headers
 ***********************************************************************/

static BOOL request_process_headers(struct request *request, char *data)
{
    char *header, *key, *oldvalue, *value, *s;

    while ((header = string_get_lws_line(&data, T))) {
        /* Fetch one (possibly folded) header line at a time */
        if (header[0] == '\0')
            continue;

        if (!((key = string_get_token(&header)) &&
              ((value = string_next_token(&header))))) {
            /* Bad request */
            request->status = 400;
            return (NIL);
        }

        /* Convert string to lower case */
        for (s = key; *s; s++)
            *s = tolower(*s);

        if ((s == key) || (s[-1] != ':')) {
            /* Bad request */
            request->status = 400;
            return (NIL);
        }

        s[-1] = '\0';

        if ((oldvalue = assoc_lookup(request->hdrs, key))) {
            s = pool_alloc(request->hdrs->pool,
                           strlen(value) + strlen(oldvalue) + 3);

            strcpy(s, oldvalue);        /* Should be able to improve this */
            strcat(s, ", ");
            strcat(s, value);
            value = s;
        }
        /* Generate assoc entry. Don't need to copy key and value */
        assoc_update(request->hdrs, key, value, NIL);
    }

    return (T);
}

/* ====================================================================== */

/* request_parse_headers_init() *****************************************
 *
 * Set up request parsing engine to read in and parse HTTP headers
 *   request:
 ***********************************************************************/

static BOOL request_parse_headers_init(struct request *request)
{
    struct buffer *b = request->read_buffer;

    /* Only session URLs and telemetry need proxy */
    if (request->preserve && (request->telemetry == NIL) &&
        (strncmp(request->url_path, "/session", strlen("/session")) != 0))
        request->preserve = NIL;

    request->state = REQUEST_HDRS;
    request->hdrs_offset = buffer_size(b);
    request->hdrs_size = 0;
    request->hdrs_crlfs = 1;    /* Following on from method line */

    return (T);
}

/* request_parse_headers() **********************************************
 *
 * Read in and parse HTTP headers from iostream.
 *   request:
 *    config: Prayer Configuration (used for size limits).
 *
 * Returns: T   => Entire HTTP header section was read sucessfully.
 *          NIL => Error parsing headers.
 ***********************************************************************/

static BOOL request_parse_headers(struct request *request)
{
    struct config *config = request->config;
    struct iostream *stream = request->stream;
    struct buffer *b = request->read_buffer;
    unsigned long crlf_count = request->hdrs_crlfs;
    char *data;
    int c = EOF;
    unsigned long count = request->hdrs_size;
    unsigned long maxsize = config->http_max_hdr_size;

    /* Record hdrs location first time into loop */
    if (request->hdrs_offset == 0)
        request->hdrs_offset = buffer_size(b);

    /* Read in data until end of header block located (CRLFCRLF or just LFLF) */

    while ((crlf_count < 2) && ((c = iogetc(stream)) != EOF)) {
        bputc(b, c);
        if ((maxsize > 0) && (++count >= maxsize)) {
            request->status = 413;      /* Request Entity too large */
            return (NIL);
        }

        if (c == '\012')
            crlf_count++;
        else if (c != '\015')
            crlf_count = 0;
    }
    request->hdrs_size = count;
    request->hdrs_crlfs = crlf_count;

    /* Hdrs now complete */

    /* Extract copy of entire header block from buffer */
    data = buffer_fetch(b, request->hdrs_offset, request->hdrs_size,
                        request->preserve);

    request_process_headers(request, data);
    return (T);
}

/* ====================================================================== */

/* request_parse_body_init() ********************************************
 *
 * Set up HTTP parser for payload (chunked or normal)
 *   request:
 *    config: Prayer Configuration (used for size limits).
 *
 * Returns: T   => Setup successful
 *          NIL => Error (typically context-length too large).
 ***********************************************************************/

static BOOL request_parse_body_init(struct request *request)
{
    struct config *config = request->config;
    struct iostream *stream = request->stream;
    struct buffer *b = request->read_buffer;
    char *value;
    unsigned long len;

    if ((value = assoc_lookup(request->hdrs, "content-length"))) {
        if (((len = atoi(value)) > config->http_max_body_size)) {
            /* Eat the body */
            if (len < (5*config->http_max_body_size)) {
                while ((len > 0) && (iogetc(stream) != EOF))
                    len--;
            }

            request->status = 413;      /* Request Entity too large */
            return (NIL);
        }
        request->state = REQUEST_BODY_SIMPLE;
        request->body_offset = buffer_size(b);
        request->body_current = 0L;
        request->body_size = atoi(value);
        return (T);
    }

    if ((value = assoc_lookup(request->hdrs, "transfer-encoding")) &&
        (strcasecmp(value, "identity") != 0)) {
        if (strcasecmp(value, "chunked") != 0) {
            /* We only support "chunked" and  "identity" at the moment */
            request->status = 501;      /* Not implemented */
            return (NIL);
        }
        request->state = REQUEST_BODY_CHUNKED;
        request->chunk_state = CHUNK_HDR;
        request->body_offset = buffer_size(b);
        request->body_current = 0;
        request->body_size = 0;
        /* Decode chunked buffer body as we proceed */
        request->chunked = buffer_create(request->pool,
                                         PREFERRED_BUFFER_BLOCK_SIZE);
        request->chunk_hdr_offset = buffer_size(b);
        request->chunk_hdr_size = 0;
        request->chunk_body_offset = buffer_size(b);
        request->chunk_body_size = 0;
        return (T);
    }

    if (request->method == POST) {
        request->status = 411;  /* POST method needs body */
        return (NIL);
    }

    /* Request has no body */
    request->state = REQUEST_COMPLETE;
    return (T);
}

/* request_parse_body_simple() ******************************************
 * Read in and parse simple HTTP payload from iostream.
 *   request:
 *
 * Returns: T   => Entire HTTP header section was read sucessfully.
 *          NIL => Reached end of file without full request
 ***********************************************************************/

static BOOL request_parse_body_simple(struct request *request)
{
    struct iostream *stream = request->stream;
    struct buffer *b = request->read_buffer;
    unsigned long current = request->body_current;
    unsigned long size = request->body_size;
    int c = EOF;

    /* Record body location and size */
    if (request->body_offset == 0)
        request->body_offset = buffer_size(b);

    while ((current < size) && ((c = iogetc(stream)) != EOF)) {
        bputc(b, c);
        current++;
    }
    request->body_current = current;

    if (current < size) {
        request->status = 400;  /* Insufficient data */
        return (NIL);
    }

    return (T);
}

/* ====================================================================== */

/* HTTP/1.1 Chunked encoding structure:
 *     Chunked-Body   = *chunk
 *                      last-chunk
 *                      trailer
 *                      CRLF
 *
 *     chunk          = chunk-size [ chunk-extension ] CRLF
 *                      chunk-data CRLF
 *     chunk-size     = 1*HEX
 *     last-chunk     = 1*("0") [ chunk-extension ] CRLF
 *
 *     chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
 *     chunk-ext-name = token
 *     chunk-ext-val  = token | quoted-string
 *     chunk-data     = chunk-size(OCTET)
 *     trailer        = *(entity-header CRLF)
 *
 * Chunked encoding is ghastly, however the spec says we have to support
 * it for HTTP requests as well as responses. Doesn't mean that I can find
 * _anything_ with uses chunked encoding for requests.
 */

/* request_parse_chunk_hdr_init() ***************************************
 *
 * Set up HTTP parser for chunk header
 *   request: Current (partly parsed) HTTP request
 ***********************************************************************/

static BOOL request_parse_chunk_hdr_init(struct request *request)
{
    struct buffer *b = request->read_buffer;

    /* This chunk now complete: ready for next chunk... */
    request->chunk_state = CHUNK_HDR;   /* Shared code here! */
    request->chunk_hdr_offset = buffer_size(b);
    request->chunk_hdr_size = 0;
    request->chunk_body_offset = 0;
    request->chunk_body_current = 0;
    request->chunk_body_size = 0;

    return (T);
}

/* request_parse_chunk_hdr() ********************************************
 * Read in and parse simple HTTP chunk header payload from iostream.
 *   request: Current (partly parsed) HTTP request
 *
 * Returns: T   => Entire HTTP header section was read sucessfully.
 *          NIL => End of file 
 ***********************************************************************/

static BOOL request_parse_chunk_hdr(struct request *request)
{
    struct config *config = request->config;
    struct iostream *stream = request->stream;
    struct buffer *b = request->read_buffer;
    int c;
    unsigned long count = buffer_size(b) - request->body_offset;
    unsigned long maxsize = config->http_max_body_size;

    /* Skip over leading whitespace */
    while (((c = iogetc(stream)) == '\015') || (c == '\012'));

    while (c != EOF) {
        if ((maxsize > 0) && (++count >= maxsize)) {
            request->status = 413;      /* Request Entity too large */
            return (NIL);
        }
        bputc(b, c);
        if (c == '\012')
            break;
        c = iogetc(stream);
    }

    if (c == EOF) {
        request->iseof = T;
        return (NIL);
    }

    /* Chunk header is now complete */
    return (T);
}

/* request_process_chunk_hdr() ******************************************
 *
 * Set up HTTP parser using chunk header for chunk payload
 *   request:
 *
 * Returns: T   => Setup successful
 *          NIL => Error (invalid chunk header)
 ***********************************************************************/

static BOOL request_process_chunk_hdr(struct request *request)
{
    struct config *config = request->config;
    struct buffer *b = request->read_buffer;
    char *chunk_header;
    unsigned long count = buffer_size(b) - request->body_offset;
    unsigned long maxsize = config->http_max_body_size;

    /* Fetch linear block */
    request->chunk_hdr_size = buffer_size(b) - request->chunk_hdr_offset;
    chunk_header
        = buffer_fetch(b, request->chunk_hdr_offset,
                       request->chunk_hdr_size, request->preserve);

    /* Derive location and size of following chunk */
    /* Hexidecimal number at start of chunk header line determines chunk size */
    request->chunk_body_offset = buffer_size(b);
    request->chunk_body_current = 0;
    request->chunk_body_size = strtoul(chunk_header, &chunk_header, 16);

    if ((maxsize > 0) && (count + request->chunk_body_size) > maxsize) {
        request->status = 413;  /* Request Entity too large */
        return (NIL);
    }

    /* Sanity check remainder of the chunk header */
    while (string_isspace(*chunk_header))
        chunk_header++;

    if (chunk_header[0] &&
        (chunk_header[0] != '\015') && (chunk_header[0] != '\012')) {
        if (chunk_header[0] != ';') {
            request->status = 400;      /* Invalid request */
            return (NIL);
        }
    }

    return (T);
}

/* request_parse_chunk_body() *******************************************
 * Read in HTTP chunk body from iostream.
 *   request:
 *
 * Returns: T   => Entire HTTP header section was read sucessfully.
 *          NIL => End of file 
 ***********************************************************************/

static BOOL request_parse_chunk_body(struct request *request)
{
    struct config *config = request->config;
    struct iostream *stream = request->stream;
    struct buffer *b = request->read_buffer;
    struct buffer *b2 = request->chunked;
    unsigned long current = request->chunk_body_current;
    unsigned long size = request->chunk_body_size;
    unsigned long count = buffer_size(b) - request->body_offset;
    unsigned long maxsize = config->http_max_body_size;
    int c = EOF;

    /* Record body location first time into loop: should be redundant */
    if (request->chunk_body_offset == 0)
        request->chunk_body_offset = buffer_size(b);

    while ((current < size) && ((c = iogetc(stream)) != EOF)) {
        /* Strictly speaking redundant, but this way at least consistent */
        if ((maxsize > 0) && (++count >= maxsize)) {
            request->status = 413;      /* Request Entity too large */
            return (NIL);
        }
        bputc(b, c);            /* Vanilla version */
        bputc(b2, c);           /* Decoded version */
        current++;
    }
    request->body_current = current;

    if (current < size) {
        request->status = 400;  /* Insufficient data */
        return (NIL);
    }
    return (T);
}

/* ====================================================================== */

/* Routines for processing trailer block in chunked incoding */

/* request_parse_chunk_trlr_init() **************************************
 *
 * Set up HTTP parser for chunk trailer. Even more sillyness!
 *   request: Current (partly parsed) HTTP request
 ***********************************************************************/

static BOOL request_parse_chunk_trlr_init(struct request *request)
{
    struct buffer *b = request->read_buffer;

    request->chunk_state = CHUNK_TRAILER;
    request->chunk_trlr_offset = buffer_size(b);
    request->chunk_trlr_size = 0;
    request->chunk_trlr_crlfs = 1;      /* Following on from method line */

    return (T);
}

/* request_parse_chunk_trlr() *******************************************
 * Read in and parse simple HTTP chunk trailer payload from iostream.
 *   request: Current (partly parsed) HTTP request
 *
 * Returns: T   => Entire HTTP header section was read sucessfully.
 *          NIL => End of file 
 ***********************************************************************/

static BOOL request_parse_chunk_trlr(struct request *request)
{
    struct config *config = request->config;
    struct iostream *stream = request->stream;
    struct buffer *b = request->read_buffer;
    unsigned long crlf_count = request->chunk_trlr_crlfs;
    char *data;
    int c = NIL;
    unsigned long count = buffer_size(b) - request->body_offset;
    unsigned long maxsize = config->http_max_body_size;

    /* Record trlr location first time into loop: should be redundant */
    if (request->chunk_trlr_offset == 0)
        request->chunk_trlr_offset = buffer_size(b);

    /* Read in data until end of trailer block located (CRLFCRLF or just LFLF) */

    while ((crlf_count < 2) && ((c = iogetc(stream)) != EOF)) {
        if ((maxsize > 0) && (++count >= maxsize)) {
            request->status = 413;      /* Request Entity too large */
            return (NIL);
        }
        bputc(b, c);
        request->chunk_trlr_size++;

        if (c == '\012')
            crlf_count++;
        else if (c != '\015')
            crlf_count = 0;
    }
    request->chunk_trlr_crlfs = crlf_count;

    /* Trailer now complete */

    /* Extract copy of entire header block from buffer */
    data = buffer_fetch(b,
                        request->chunk_trlr_offset,
                        request->chunk_trlr_size, request->preserve);
    request_process_headers(request, data);

    /* Just for completeness sake: not actually used at all */
    request->body_size = buffer_size(b) - request->body_offset;
    return (T);
}

/* ====================================================================== */

/* request_parse_body_chunked() *****************************************
 *
 * Read in and parse HTTP chunk format including multiple headers and
 * trailers. I hope! This appears to match my understanding of the HTTP/1.1
 * specification, but I can't find anything silly enought to use chunks in
 * HTTP requests.
 *
 * request: Current (partly parsed) HTTP request
 *
 * Returns: T   => Entire HTTP chunk body section read sucessfully.
 *          NIL => End of file 
 *
 ***********************************************************************/

static BOOL request_parse_body_chunked(struct request *request)
{
    while (request->chunk_state != CHUNK_COMPLETE) {
        switch (request->chunk_state) {
        case CHUNK_HDR:
            if (!request_parse_chunk_hdr(request))
                return (NIL);

            if (!request_process_chunk_hdr(request))
                return (NIL);

            if (request->chunk_body_size == 0)  /* Final chunk */
                request_parse_chunk_trlr_init(request);
            else
                request->chunk_state = CHUNK_BODY;
            break;

        case CHUNK_BODY:
            if (!request_parse_chunk_body(request))
                return (NIL);

            request_parse_chunk_hdr_init(request);
            break;

        case CHUNK_TRAILER:
            if (!request_parse_chunk_trlr(request))
                return (NIL);
            request->chunk_state = CHUNK_COMPLETE;
            break;

        case CHUNK_COMPLETE:
            break;
        }
    }
    return (T);
}

/* ====================================================================== */

/* request_parse() *****************************************************
 *
 * Read in and parse entire HTTP request. Decodes silly chunk encodings.
 *   request: Current HTTP request (may be partly read in and parsed!)
 *    config: Prayer Configuration (used for size limits).
 *
 * Returns: T   => Entire HTTP chunk body section read sucessfully.
 *          NIL => End of file 
 ***********************************************************************/

BOOL request_parse(struct request * request)
{
    while (request->state != REQUEST_COMPLETE) {
        switch (request->state) {
        case REQUEST_METHOD:
            if (!request_parse_method(request))
                return (NIL);

            if (request->major == 0) {
                request->state = REQUEST_COMPLETE;
                break;
            }

            request_parse_headers_init(request);
            break;

        case REQUEST_HDRS:
            if (!request_parse_headers(request))
                return (NIL);

            if (!request_parse_body_init(request))
                return (NIL);
            break;

        case REQUEST_BODY_SIMPLE:
            if (!request_parse_body_simple(request))
                return (NIL);
            request->state = REQUEST_COMPLETE;
            break;

        case REQUEST_BODY_CHUNKED:
            if (!request_parse_body_chunked(request))
                return (NIL);
            request->state = REQUEST_COMPLETE;
            break;

        case REQUEST_COMPLETE:
            break;
        }
    }

    request->state = REQUEST_COMPLETE;
    return (T);
}

/* ====================================================================== */

/* request_complete() ***************************************************
 *
 * Test whether HTTP request has been read in completely
 *
 * Returns: T   => Request complete
 *          NIL => Otherwise
 ***********************************************************************/

BOOL request_complete(struct request * request)
{
    return ((request->state == REQUEST_COMPLETE));
}

/* ====================================================================== */

/* request_parse_form_multipart() ***************************************
 *
 * Decode a file upload request (static support routine)
 *  request:   Complete HTTP request
 *  boundary0: Boundary line to look form 
 *      data0: Pointer to data to be decoded
 *        len: Length of data to be decoded
 *     start0: Returns ptr to start of attachment witin data0
 *       end0: REturns ptr end of attachment within data0
 ***********************************************************************/

static void
request_parse_form_multipart(char *boundary0,
                             char *data0,
                             unsigned long len, char **start0, char **end0)
{
    unsigned char *boundary = (unsigned char *) boundary0;
    unsigned char *data = (unsigned char *) data0;
    unsigned char **start = (unsigned char **) start0;
    unsigned char **end = (unsigned char **) end0;
    unsigned char *s;
    unsigned long blen = strlen((char *) boundary);

    *start = *end = s = data;

    /* Find first boundary line */
    while (len >= (blen + 2)) {
        if ((s[0] == '-') && (s[1] == '-') &&
            !strncmp((char *) &s[2], (char *) boundary, blen) &&
            ((s[blen + 2] == '\012') || (s[blen + 2] == '\015'))) {

            if ((len >= (blen + 3)) && (s[blen + 2] == '\015')
                && (s[blen + 3] == '\012')) {
                s += blen + 4;
                len -= blen + 4;
            } else {
                s += blen + 3;
                len -= blen + 3;
            }

            *start = s;
            break;
        }
        s++;
        len--;
    }

    /* Find second boundary line */

    while (len > (blen + 4)) {
        if ((s[0] == '-') && (s[1] == '-') &&
            !strncmp((char *) &s[2], (char *) boundary, blen) &&
            (s[blen + 2] == '-') && (s[blen + 3] == '-') &&
            ((s[blen + 4] == '\012') || (s[blen + 4] == '\015')))
            break;
        s++;
        len--;
    }

    /* Remove trailing CRLF, CR or LF */
    if ((s >= (*start) + 2) && (s[-2] == '\015') && (s[-1] == '\012'))
        s -= 2;
    else if ((s >= (*start) + 1)
             && ((s[-1] == '\015') || (s[-1] == '\012')))
        s--;

    *end = s;
}

/* ====================================================================== */

/* request_parse_form_multipart_hdrs() **********************************
 *
 * Decode MIME headers from file upload request
 *       hdrs: Associative array that should contain decoded headers
 *      datap: Data to be decoded. Returns ptr to first line after hdrs
 *
 * Returns: T => sucessful. NIL => error decoding headers.
 ***********************************************************************/

static BOOL
request_parse_form_multipart_hdrs(struct assoc *hdrs, char **datap)
{
    char *s, *header, *key, *value, *oldvalue;

    if (!datap)
        return (NIL);

    while ((header = string_get_lws_line((char **) datap, T))) {
        /* Reached end of the block yet? */
        if (header[0] == '\0')
            break;

        if (!hdrs)
            continue;

        if (!((key = string_get_token(&header)) &&
              ((value = string_next_token(&header))))) {
            continue;
        }

        /* Convert string to lower case */
        for (s = key; *s; s++)
            *s = tolower(*s);

        if ((s == key) || (s[-1] != ':')) {
            continue;
        }

        s[-1] = '\0';

        if ((oldvalue = assoc_lookup(hdrs, key))) {
            s = pool_alloc(hdrs->pool,
                           strlen(value) + strlen(oldvalue) + 3);
            strcpy(s, oldvalue);        /* Should be able to improve this */
            strcat(s, ", ");
            strcat(s, value);
            value = s;
        }
        /* Generate assoc entry. Don't need to copy key and value */
        assoc_update(hdrs, key, value, NIL);
    }

    return (T);
}

/* ======================================================================*/

/* request_parse_form_multipart() ***************************************
 *
 * Decode RFC 1867 file upload request into MIME headers and body
 * request: Entire HTTP request to decode
 *    hdrs: Target assoc array for MIME headers
 *  startp: Returns ptr to start of upload body
 *    endp: Returns ptr to end of upload body
 *
 * Returns: T => sucessful. NIL => error decoding headers.
 ***********************************************************************/

BOOL
request_decode_post_multipart(struct request * request,
                              struct assoc * hdrs,
                              char **startp, char **endp)
{
    char *ps, *ct, *s, *boundary;

    if (!((ct = assoc_lookup(request->hdrs, "content-type")) &&
          ((s = string_get_token(&ct))) &&
          !strcasecmp(s, "multipart/form-data;") &&
          ((s = string_next_token(&ct))) &&
          !strncasecmp(s, "boundary=", strlen("boundary=")) &&
          (boundary = (s + strlen("boundary=")))))
        return (NIL);

    if (request->chunked) {
        /* Chunked message body has already been decoded */
        unsigned long size = buffer_size(request->chunked);

        if (size == 0)
            return (NIL);

        /* Fetch linear copy of buffer */
        ps = buffer_fetch(request->chunked, 0, size, request->preserve);
    } else {
        /* Get data from normal message buffer */
        if (request->body_size == 0)
            return (NIL);

        /* Fetch linear copy of buffer */
        ps = buffer_fetch(request->read_buffer, request->body_offset,
                          request->body_size, request->preserve);
    }

    request_parse_form_multipart(boundary, ps, request->body_size, startp,
                                 endp);
    request_parse_form_multipart_hdrs(hdrs, startp);

    return (T);
}

/* ====================================================================== */

/* request_parse_form_string() ******************************************
 *
 * Convert HTTP form entry (from GET or POST request) into associative
 * array.
 *  pool: Target pool
 *  post: String to parse
 *
 * Returns: Assocative array
 ***********************************************************************/

static struct assoc *request_parse_form_string(struct pool *pool,
                                               char *post)
{
    struct assoc *h = assoc_create(pool, 16, T);
    char *key = post;
    char *value;
    char *s;

    /* Skip over any leading noise */
    while (*post && (string_isspace(*post) || string_iseol(*post)))
        post++;

    /* XXX 09/04/2006
     *
     * Following isn't quite correct in fact of invalid input "a&b=c".
     * End up with h{"a&b"} = "c" rather than h{"a"} = "", h{"b"} = "c".
     *
     * Doesn't appear to be doing any harm (really just GIGO), but we
     * should come back and fix this when we have time to test properly
     */
    while (*post) {
        key = string_url_decode_component(&post, '=');
        if (*key) {
            /* Discard image map submit nonsense */
            if ((s = strchr(key, '.')))
                *s = '\0';

            value = string_url_decode_component(&post, '&');    /* Only if we found '=' */
            assoc_update(h, key, value, NIL);   /* Replace, not update  */
        }
    }

    return (h);
}

/* ====================================================================== */

/* request_decode_get() *************************************************
 *
 * Decode a GET form
 *  request: Complete HTTP request
 *
 ***********************************************************************/

static void request_decode_get(struct request *request)
{
    if (!request->get_suffix)
        return;

    request->form = request_parse_form_string(request->pool,
                                              request->get_suffix);
}

/* ====================================================================== */

/* Covert single section of multipart/formdata into key value pair */

static BOOL
request_parse_formdata_single(struct pool *pool, struct assoc *result,
                              char *data)
{
    struct assoc *hdrs = assoc_create(pool, 16, T);
    char *s, *key, *encoding, *type, *charset;
    unsigned long decode_len = 0;

    if (!request_parse_form_multipart_hdrs(hdrs, &data))
        return(NIL);

    if (!(encoding = assoc_lookup(hdrs, "content-transfer-encoding")))
        encoding = "";

    s = assoc_lookup(hdrs, "content-disposition");
    if (!(s && !strncasecmp(s, "form-data", strlen("form-data"))))
        return(NIL);
    s += strlen("form-data");
    s  = string_ustr(s, "name=");
    if (!s)
        return(NIL);

    s += strlen("name=");
    if (!(key = string_get_value(&s)))
        return(NIL);

    /* Discard image map submit nonsense */
    if ((s = strchr(key, '.')))
        *s = '\0';

    /* Strip off BASE64/QPRINT encoding. */
    if (!strcasecmp(encoding, "BASE64")) {
        data = (char *) 
            string_base64_decode((unsigned char *)data,
                                 strlen(data), &decode_len);
    } else if (!strcasecmp(encoding, "QUOTED-PRINTABLE")) { 
        data = (char *)
            string_qprint_decode((unsigned char *)data,
                                 strlen(data), &decode_len);
    }

    if (!data)
        return(NIL);
    
    /* Convert to UTF-8 if not */
    if (!(type = assoc_lookup(hdrs, "content-type")))
        type = "";
    charset = NIL;
    if ((s = string_ustr(type, "charset="))) {
        s += strlen("charset=");
        charset = string_get_value(&s);
    }

    if (charset && (strcasecmp(charset, "UTF-8") != 0)) {
        if (decode_len == 0)
            decode_len = strlen(data);
        data = utf8_from_string(pool, charset, data, decode_len);
    }

    assoc_update(result, key, data, NIL);

    return(T);
}

/* Split multipart/form-data into sections and parse one at a time */

static struct assoc *
request_parse_formdata(struct pool *pool, char *boundary,
                       char *data, unsigned long len)
{
    struct assoc *h = assoc_create(pool, 16, T);
    unsigned long blen = strlen(boundary);
    char *start;
    char *end;
    char *s = data;
    BOOL last_part = NIL;

    /* Find and then skip the first boundary line */
    while (*s) {
        if ((s[0] == '-') && (s[1] == '-') &&
            !strncmp((char *) &s[2], (char *) boundary, blen) &&
            ((s[blen + 2] == '\012') || (s[blen + 2] == '\015'))) {
            s += blen + 2;
            s += (s[0] == '\015' && s[1] == '\012') ? 2 : 1;
            break;
        }
        string_next_line(&s);
    }
    start = s;

    while (*s && !last_part) {
        /* Find next boundary line */
        while (*s) {
            if ((s[0] == '-') && (s[1] == '-') &&
                !strncmp((char *) &s[2], (char *) boundary, blen) &&
                strchr("\015\012-", *s)) {
                break;
            }
            string_next_line(&s);
        }

        /* Record end of the preceding block. Remove trailing CRLF, CR or LF */
        if ((s >= (start+2)) && (s[-2] == '\015') && (s[-1] == '\012'))
            end = s - 2;
        else if ((s >= (start+1)) && ((s[-1] == '\015') || (s[-1] == '\012')))
            end = s - 1;
        else
            end = s;

        /* Process start ... end block */
        *end = '\0';
        request_parse_formdata_single(pool, h, start);

        /* Skip the boundary line. -- at the end indicates last block */
        s += blen + 2;
        if (s[0] == '-'  && s[1] == '-') {
            last_part = T;
            s += 2;
        }
        s += (s[0] == '\015' && s[1] == '\012') ? 2 : 1;

        start = s;
    }

    return(h);
}

/* request_decode_post() ************************************************
 *
 * Decode a POST form
 *  request: Complete HTTP request
 *
 ***********************************************************************/

static void request_decode_post(struct request *request)
{
    char *ps, *ct, *s, *boundary = NIL;
    BOOL formdata = NIL;

    if (((ct = assoc_lookup(request->hdrs, "content-type")) &&
          ((s = string_get_token(&ct))) &&
          !strcasecmp(s, "multipart/form-data;") &&
          ((s = string_next_token(&ct))) &&
          !strncasecmp(s, "boundary=", strlen("boundary=")) &&
          (boundary = (s + strlen("boundary=")))))
        formdata = T;

    if (request->chunked) {
        /* Chunked message body has already been decoded */
        unsigned long size = buffer_size(request->chunked);

        if (size == 0)
            return;

        /* Fetch linear copy of buffer */
        ps = buffer_fetch(request->chunked, 0, size, request->preserve);
    } else {
        /* Get data from normal message buffer */
        if (request->body_size == 0)
            return;

        /* Fetch linear copy of buffer */
        ps = buffer_fetch(request->read_buffer, request->body_offset,
                          request->body_size, request->preserve);
    }

    /* NB: ps will get trashed by form parsing */
    if (formdata) {
        request->form = request_parse_formdata(request->pool, boundary,
                                               ps, request->body_size);

    } else{
        request->form = request_parse_form_string(request->pool, ps);
    }
}

/* ====================================================================== */

/* request_decode_form() ************************************************
 *
 * Decode any kind of form
 *  request: Complete HTTP request
 *
 ***********************************************************************/

void request_decode_form(struct request *request)
{
    if (request->method == POST)
        request_decode_post(request);
    else
        request_decode_get(request);
}

/* ====================================================================== */

/* request_parse_argv() *************************************************
 *
 * Break request->url_path into its component parts.
 * request: Entire HTTP request to decode
 *
 ***********************************************************************/

void request_parse_argv(struct request *request)
{
    char *s;
    char *t;
    int i;

    /* Copy that we can trash */
    s = pool_strdup(request->pool, request->url_path);

    if ((*s == '\0') || !strcmp(s, "/")) {      /* Empty URL is special case */
        request->argc = 0;
        request->argv = pool_alloc(request->pool, sizeof(char *));
        request->argv[0] = NIL;
        return;
    }

    /* Skip leading '/' if present */
    if (*s == '/')
        s++;

    /* First count the number of '/' characters in the url-path */
    for (request->argc = 1, t = s; *t; t++) {
        if (*t == '?')
            break;

        if ((*t == '/') || (*t == '@'))
            request->argc++;
    }

    request->argv
        = pool_alloc(request->pool, (1 + request->argc) * sizeof(char *));

    /* We know where the first argv elt starts */
    request->argv[0] = s;

    i = 0;
    t = s;
    while (*t) {
        if ((*t == '/') || (*t == '@')) {
            *t++ = '\0';        /* Tie off previous string */
            request->argv[++i] = t;     /* Found start of next argv elt */
            continue;
        }

        if (*t == '?') {
            *t++ = '\0';        /* Tie off previous string */
            request->get_suffix = t;    /* Record get location */
            break;
        }
        t++;
    }
    request->argv[request->argc] = NIL; /* Tie off the array */
}

/* ======================================================================*/

/* request_parse_charset() **********************************************
 *
 * Parse "Accept-Charset:" header: decide whether to send UTF-8.
 * request: Decoded HTTP request
 *
 ***********************************************************************/

void request_parse_charset(struct request *request)
{
    char *s, *t, *u;

    if (!(s = assoc_lookup(request->hdrs, "accept-charset")))
        return;

    s = pool_strdup(request->pool, s);  /* Need scratch version */

    while (s) {
        t = strchr(s, ',');

        if (t)
            *t++ = '\0';

        if ((u = strchr(s, ';')))
            *u = '\0';

        s = string_trim_whitespace(s);

        if (!strcasecmp(s, "utf-8")) {
            request->use_utf8 = T;
            break;
        }

        s = t;
    }
}

/* ======================================================================*/

/* request_parse_encoding() **********************************************
 *
 * Parse "Accept-Content-Encoding:" header: determine whether client
 * accepts gzip or x-gzip.
 *
 ***********************************************************************/

void request_parse_encoding(struct request *request)
{
    char *s, *t, *u;

    if (!(s = assoc_lookup(request->hdrs, "accept-encoding")))
        return;

    s = pool_strdup(request->pool, s);  /* Need scratch version */

    request->allow_gzip = T;
    request->use_gzip = NIL;
    request->use_x_gzip = NIL;

    while (s) {
        t = strchr(s, ',');

        if (t)
            *t++ = '\0';

        if ((u = strchr(s, ';')))
            *u = '\0';

        s = string_trim_whitespace(s);

        if (!strcasecmp(s, "gzip")) {
            request->use_gzip = T;
            break;
        }

        if (!strcasecmp(s, "x-gzip")) {
            request->use_x_gzip = T;
            break;
        }

        s = t;
    }
}

/* ======================================================================*/

/* request_test_referer() ***********************************************
 *
 * Check that "Referer:" header contains given hostname if it exists.
 *
 ***********************************************************************/

int
request_test_referer(struct request *request, char *hostname)
{
    char *ref = assoc_lookup(request->hdrs, "referer");
    int len;

    if (!(ref && ref[0] && hostname && hostname[0]))
        return(T);

    while (Uisspace(*ref))
        ref++;

    if (!strncasecmp(ref, "https://", 8))
        ref += 8;
    else if (!strncasecmp(ref, "http://", 7))
        ref += 7;

    len = strlen(hostname);

    if ((len > 0) && !strncasecmp(ref, hostname, len) &&
        (ref[len] == '/' || ref[len] == '\\' || 
         ref[len] == ':' || ref[len] == '\0'))
        return(T);

    return(NIL);
}
