/* $Id: puf.h,v 1.6 2002/05/23 08:43:01 ossi Exp $ *
 *
 * puf 0.9  Copyright (C) 2000-2002 by Oswald Buddenhagen <puf@ossi.cjb.net>
 * based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
 *
 * You may modify and distribute this code under the terms of the GPL.
 * There is NO WARRANTY of any kind. See COPYING for details.
 *
 * puf.h - global definitions
 *
 */

#ifndef _PUF_H
#define _PUF_H

#include <config.h>                                       

#ifdef USE_LL
# define _FILE_OFFSET_BITS 64
#endif
#include <sys/types.h>                             
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <stdio.h>
#include <ctype.h>
#include <strings.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <time.h>
#include <utime.h>
#include <fcntl.h>
#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
#include <limits.h>
#include <locale.h>

/*  Undef this, if you don't want the -d switch  */
#define DEBUG

/*  This may be defined if you want to, but they are not neccessary:  */
/*#define	HTTP_ACCEPT	"Accept: * / *"  */

/*  Undef this, if you want no message about incorrect DNS resolver configs  */
#define CORRECT_DNS

/*  String length limit:  */
#define	SHORTSTR			512

/*  Max buf size when reading from the network.  */
#define	MAXBUFSIZE			0x8000
/*  Overlapping lenght of consecutive buffer scans  */
#define OVERLAPLEN			200
/*  Maximal allowed header length  */
#define MAXHEADERLEN			0x10000

/*  Filename of a url ending with a slash:  */
#define	DEFAULT_INDEX_FILE_NAME		"index.html"

/*  Extension to append to patial files  */
#define PART_EXT			".part"

/*  This could be set higher than 0 if you like the debug output...  */
#define	DEFAULT_VERBOSITY		0

/*  Default max nr of connections:  */
#define	DEFAULT_MAX_ACTIVE		20

/*  Default max of ansynchronous dns lookups:  */
#define	DEFAULT_MAX_DNS_FORKS		10

/*  Default timeout when awaiting DNS lookup completion  */
#define	DEFAULT_TIMEOUT_DNS		60
/*  Default timeout when connecting a host  */
#define	DEFAULT_TIMEOUT_CONNECT		60
/*  Default timeout when awaiting http reply data  */
#define	DEFAULT_TIMEOUT_DATA		120

/*  How long the bandwidht averaging timeframe should be */
#define AVERAGING_TIMEFRAME		100

/*  Default Max number of connection attempts per url:  */
#define	DEFAULT_MAX_ATTEMPTS		5

#ifdef USE_LL
# define SSOFFT "lli"
#else
# define SSOFFT "li"
#endif
#define SOFFT "%"SSOFFT

#ifndef HAVE_STRDUP
char *strdup(const char *s);
#endif

#ifdef USE_DMALLOC
# include <dmalloc.h>
#endif

#if 1
# define RND(m) ((rand() >> 15) * m / ((RAND_MAX >> 15) + 1)) /* m < 32768 */
#else
# define RND(m) ((int)(rand() * 1.0 * m / (RAND_MAX + 1.0))) /* needs float */
#endif

/*  Structures for hostlist.c  */

typedef struct haddr_t {
    struct in_addr addr;
    time_t retry_time;
    u_int
	attempt:8,
	last_errt:2,
	err_wait:16;
} haddr_t;

typedef struct hinfo_t {
    char *name;			/*  back link to primary name  */
    char *lname;		/*  back link to local storage name  */
    u_int
	is_http11:1,		/*  1 till opposite proven  */
	maybe:1;		/*  proxy finder state flag  */
    short num_ips;		/*  0 if hostname not found  */
    short cur_ip;		/*  for round-robin  */
    haddr_t ips[0];		/*  actually num_ips  */
} hinfo_t;

typedef struct host_t {
    struct host_t *next;

    hinfo_t *info;
    char name[0];
} host_t;


/*  proxy spec.  */

typedef struct proxy_t {
    struct proxy_t *next;	/*  next proxy in chain  */

    host_t *host;		/*  Proxy host  */
    u_int
	have_auth:1,		/*  User:Password (BASE64) after cgi_path?  */
	ready:1;		/*  DNS lookup complete?  */
    u_short port;		/*  Proxy port  */
    u_short ratio;		/*  Requested load ratio  */
    int score;			/*  Connection/byte count - for capacity-driven balancing  */
/*    u_short cur_conn;*/		/*  Currently open connections - for load-driven balancing - now unused */
    char cgi_path[0];		/*  Only for "cgi-proxies"  */
} proxy_t;


/*  fake user agent  */
typedef struct agent_t {
    int ratio;
    char agent[0];
} agent_t;


/*  generic array of pointers  */
typedef struct ptrarr_t {
    void *ents;
    u_int nents, rents, cow:1;
} ptrarr_t;


/*  disposition path spec.  */
typedef struct disp_path_t {
    int file_num;		/* for -xe */
    char path[0];
} disp_path_t;


/*  url spec.  */

typedef struct options_t {
#ifdef USE_MAGIC
    int chk;
#endif
    int follow_src;		/*  -p[r[+[+]]]		:3	*/
    int follow_href;		/*  -r[+[+]]		:3	*/
    int follows_max;		/*  [-{p,{{pr,r}{,+{,+}}}}]	:3	*/
    int dir_mode;		/*  -nd & -xd		:2	*/
    int max_depth;		/*  -ld			short	*/
    int max_recurse;		/*  -l			short	*/
    off_t max_bytes;		/*  -lb				*/
    int inhibit_cgiget;		/*  -xg & -ng		:2	*/
    int force_recurse;		/*  -F			:1	*/
    int update_mode;		/*  -u, -c & -nc	:2	*/
    disp_path_t *disp_path;	/*  -P				*/
    char *index_filename;	/*  -xi				*/
    int enumerate_files;	/*  -xe			:1	*/
#ifndef HAVE_CYGWIN
    int fat_quotes;		/*  -xq			:1	*/
#endif
    int no_touch;		/*  -nt			:1	*/
    int delete_broken;		/*  -nb			:1	*/
    int send_if_range;		/*  -ni			:1	*/
    int send_referer;		/*  -nR			:1	*/
    int uar_total;		/*  [-U & -iU]			*/
    ptrarr_t user_agents;	/*  -U & -iU			*/
    ptrarr_t aux_headers;	/*  -xH				*/
    int timeout_connect;	/*  -Tc			short	*/
    int timeout_data;		/*  -Td			short	*/
    int max_attempts;		/*  -t			short	*/
    int fail_no_wait;		/*  -nw			:1	*/
    ptrarr_t bind_addrs;	/*  -xb & ib			*/
    ptrarr_t proxies;		/*  -y & -iy			*/
} options_t;


/*  url spec.  */

typedef struct url_parm_t {
#ifdef USE_MAGIC
    int chk;
#endif
    options_t *opt;
    char *http_auth;		/*  User:Password in BASE64 encoding  */
    char *disposition;		/*  -O, [-xE] & URL*disp	*/
    proxy_t *proxy;		/*  URL^proxy			*/
    uint ref_count:31;		/*  >1 -> actually copy in clone_parm() */
    uint strictproxy:1;		/*  URL^^proxy			*/
} url_parm_t;

typedef struct url_t {
#ifdef USE_MAGIC
    u_int attempt;		/*  Nr of attempt to download this URL  */
    int chk;			/*  Structure checksum  */
    /* checked data begins here  */
    u_int len;			/*  Structure lenght  */
#endif
    struct url_t *next;		/*  next url in global chain  */
    struct url_t *referer;	/*  Referer URL (or NULL)  */

    url_parm_t *parm;		/*  URL parameters  */
    u_int
#ifndef USE_MAGIC
	attempt:8,		/*  Nr of attempt to download this URL  */
#endif
	is_top_dir:1,		/*  Get disp_off from this url?  */
	is_requisite:1,		/*  1=comes from src=, otherwise href=  */
	relocs:3;		/*  How often this URL was redirected  */

    /*  Url: host, port and local part:  */
    int url_hash;		/*  (For quick search)  */
    host_t *host;		/*  Host of URL  */
    u_short port;		/*  Host port  */
    short link_depth;		/*  For -l */
    short path_len;		/*  Length of the path in the local part  */
    short disp_pathoff;		/*  Disposition path; points into local_part  */
    char local_part[0];		/*  Variable length local part  */
} url_t;

/*  queue element for waiting urls  */

typedef struct wurl_t {
    struct wurl_t *next;	/*  next url in current queue  */

    url_t *url;			/*  the associated url  */
} wurl_t;

/*  queue element for waiting objects  */

typedef struct wobj_t {
    struct wobj_t *next;	/*  next object in current queue  */

    url_t *url;			/*  the associated url (if any)  */
    proxy_t *proxy;		/*  the associated proxy (if any)  */
} wobj_t;

/*  queue element for active urls  */

typedef struct aurl_t {
    struct aurl_t *next;	/*  next url in current queue  */

    url_t *url;			/*  the associated url  */

    off_t file_off;		/*  Length of partial content  */
    off_t size_total;		/*  Size according to host (0=?)  */
    off_t size_fetched;		/*  Size fetched so far  */

    time_t file_time;		/*  Partial content creation time  */
    time_t timeout;		/*  Next timeout  */
    
    int socket;			/*  TCP socket  */
    int ipidx;			/*  Index of the used haddr_t  */
/*    int bipidx;	*/		/*  Index of the bound outgoing ip  */
    proxy_t *proxy;		/*  Pointer to used proxy  */
    int pipidx;			/*  Index of the proxy's used haddr_t  */
    
    int f;			/*  File descriptor (-1 if switched)  */

    u_int
	file_created:1,		/*  Did we already create the file?  */
	http_done_header:1,	/*  End of header reached?  */
	content_is_html:1,	/*  Try recursive search on file?  */
	reloc:1;		/*  Is this url being redirected?  */
    
    /*  Input data overlap buffer  */
    char *buffer;		/*  Overlap buffer  */
    short size;			/*  Current size of overlap buffer  */
    short offset;		/*  Current fill state of overlap buffer  */

    short http_result_code;	/*  200, 400 etc. 0 means no status code by now  */

    short displen;		/*  Length of >>  */
    char disposition[0];	/*  Local file name  */
} aurl_t;

typedef struct whost_t {
    struct whost_t *next;	/*  fork/read queue link  */

    wobj_t *objq;		/*  urls/proxies depending on this lookup  */
    host_t *host;		/*  host being looked up  */
    int fd;			/*  pipe to read from  */
    int pid;			/*  pid of the DNS helper process  */
    long timeout;		/*  lookup timeout  */
} whost_t;

/* set: prepend, remove random */
#define ex_linear_na_queue(q, t) extern t *q
#define linear_na_queue(q, t) t *q
#define lnq_append(q, e) do {e->next = q; q = e;} while(0)
/* #define lnq_rm1st(q) do {q = q->next;} while(0) */
#define lnq_remove(q, e) do {*e##p = (*e##p)->next;} while(0)
#define lnq_iterate(q, t, e, a) do {t *e; for (e = q; e; e = e->next) a;} while(0)
#define lnq_iterate_rm(q, t, e, a, c) do {t *e, **e##p; for (e##p = &q; (e = *e##p); ) {a; c: e##p = &(e->next); }} while(0)

/* queue: append, remove first */
#define ex_linear_queue(q, t) extern t *q, **q##_app
#define linear_queue(q, t) t *q, **q##_app = &q
#define lq_append(q, e) do {e->next = 0; *q##_app = e; q##_app = &(e->next);} while(0)
#define lq_rm1st(q) do {q = q->next; if (!q) q##_app = &q; } while(0)
/* #define lq_remove(q, e) do {if (q##_app == &((*e##p)->next)) q##_app = e##p; *e##p = (*e##p)->next;} while(0) */
#define lq_consume(q, t, e, a) do {t *e; while (q) {e = q; a;}} while(0)

/* circular queue: append, remove first, move first to end */
#define ex_circular_queue(q, t) extern t *q
#define circular_queue(q, t) t *q
#define cq_append(q, e) do {if(q) {e->next = q->next; q->next = e;} else e->next=e; q = e;} while(0)
#define cq_rm1st(q) do {if(q->next == q) q = 0; else q->next = q->next->next;} while(0)
/* #define cq_remove(q, e) do {if(q->next == q) q = 0; else {if(e##p->next == q) q = e##p; e##p->next = e##p->next->next;}} while(0) */
#define cq_consume(q, t, e, a) do {t *e, *e##fp; if (q) {e##fp = q; do {e = q->next; a;} while (e != e##fp);}} while(0)

/*  Update modes for already existing files  */
#define EX_CLOBBER	0
#define EX_UPDATE	1
#define EX_CONTINUE	2
#define EX_NO_CLOBBER	3

/*  Directory hierarchy creation modes  */
#define DIRS_NONE	0
#define DIRS_NORMAL	1
#define DIRS_ALWAYS	2

/*  Levels of url recursion:  */
#define DONT_FETCH		0
#define NOT_RECURSIVE		1
#define SUBDIR_RECURSIVE	2
#define HOST_RECURSIVE		3
#define GLOBAL_RECURSIVE	4

/*  Generic return codes  */
#define RT_OK		0	/* go on */
#define RT_SKIP		1	/* deny existence */
#define RT_AGAIN	2	/* non-error retry */
#define RT_RETRY	3	/* error retry */
#define RT_GIVEUP	4	/* fatal error */
#define RT_DONE		5	/* finished */
#define RT_TIMEOUT	6	/* proxy->server timeout */
#define RT_REFUSED	7	/* proxy->server refusal */

/*  Functions:  */

/*  main.c:  */
extern char *progname;
extern int verbose;
int calc_hash(u_char * data, int len);
#ifdef __DMALLOC_H__
# define mmalloc malloc
# define mrealloc realloc
#else
void *mmalloc(size_t size);
void *mrealloc(void *ptr, size_t size);
#endif
void die(int ret, char *msg, ...);
int errm(url_t *u, char *txt, ...);
void prx(int lev, char *txt, ...);

#define NFO 3
#define WRN 2
#define ERR 1

#ifdef DEBUG
extern int debug;
void dbp(char *txt, ...);
void dbpe(char *txt, ...);
# define dbg(wht, tdo) do { if (wht & debug) dbp tdo; } while (0)
# define dbge(wht, tdo) do { if (wht & debug) dbpe tdo; } while (0)
#else
# define dbg(wht, tdo)
# define dbge(wht, tdo)
#endif

#define URL 1
#define DNS 2
#define QUE 4
#define CON 8
#define HDR 16
#define CHK 32
#define MEM 64

#if 0
# define ierr(m) die(3, m)
#else
# define ierr(m) *(char *)0 = 0
#endif

#ifdef USE_MAGIC
void magck(const char *);
# define checken(m) magck(m)
# define ichecken_hashurl(p) calc_hash((char *)&(p->len), p->len)
# define ichecken_hashgen(p) calc_hash((char *)p+sizeof(int), sizeof(*p)-sizeof(int))
# define checken_updurl(p) p->chk = ichecken_hashurl(p)
# define checken_updgen(p) p->chk = ichecken_hashgen(p)
# define ichecken_url(p) (p->chk == ichecken_hashurl(p))
# define ichecken_gen(p) (p->chk == ichecken_hashgen(p))
# define checken_url(p,m) do { if(!ichecken_url(p)) checken(m); } while(0)
# define checken_gen(p,m) do { if(!ichecken_gen(p)) checken(m); } while(0)
#else
# define checken(m) do ; while(0)
# define checken_updurl(p) do ; while(0)
# define checken_updgen(p) do ; while(0)
# define checken_url(p,m) do ; while(0)
# define checken_gen(p,m) do ; while(0)
#endif

/*  hostlist.c:  */
extern int always_primary_name;
extern host_t *hostlist;
ex_linear_queue(queue_dns_forks, whost_t);
ex_linear_na_queue(queue_dns_reads, whost_t);
host_t *host_lookup_fast(char *name, int namlen);
host_t *host_lookup_full(char *name, int namlen, url_t *u, proxy_t *prx);
int fork_lookup(whost_t *wh);
int finish_lookup(whost_t *wh);

/*  getopts.c:  */
void getopts(int argc, char *argv[]);

/*  fetch.c:  */
extern off_t max_bytes, fetched_bytes, total_bytes;
extern int max_dns_forks;
extern int max_urls_active;
extern int timeout_dns;
extern int max_time;
extern int max_urls;
extern int num_urls;
extern int num_urls_done;
extern int num_urls_fail;
extern int num_errors;
extern int show_stat;
extern int waiting_proxies;
extern int all_proxy_wait;	/* unused */
extern struct timeval cur_tv;
extern struct sockaddr_in bind_addr;
ex_circular_queue(queue_urls_connect, wurl_t);
ex_linear_na_queue(queue_urls_request, aurl_t);
ex_linear_na_queue(queue_urls_reply, aurl_t);
int touch(aurl_t *au);
void cleanup(char *msg);
void fetch_all(void);

/*  url.c:  */
extern int economize_dns;
extern url_t *urllist;
extern proxy_t *proxylist;
int same_dir(char *path, int len, url_t *referer, int is_req);
int find_url(char *path, int len, hinfo_t *hinfo, u_short port, int *hashp);
proxy_t *parse_proxy (char *proxy, int ratio);
int parse_add_url(char *url, int len, url_t *referer, url_parm_t *parm,
                  int istopdir, int isreq, int relocs, int link_depth);
int queue_url(url_t *u);
void add_url(url_t *u);
void free_url(url_t *u);
int clone_parm(url_t *u);

/*  recurse.c:  */
int recurse_buff(url_t *u, char *databuf, int len, int notlast);
void recurse_pfile(url_t *u, int fi, char **bupo, int *lepo);
void recurse_file(url_t *u, char *name);

/*  http_conn.c:  */
int activate_url(url_t *u, aurl_t **au);

/*  http_req.c:  */
extern void init_user_agent(void);
#define len_enc_auth(x) (4 * (((x) + 2) / 3) + 1)
void encode_auth(char *buf, char *auth, int len);
int send_http_get(aurl_t *au);

/*  http_rsp.c:  */
extern int economize_files;
int mmfopen(char *name, int flags, int *f);
int free_fd(void);
int handle_reply(aurl_t *au);

/*  util_date.c:  */
#define BAD_DATE 0
time_t parseHTTPdate(const char *date);

#endif				/*  _PUF_H  */
