/*
	This is (will be) version 1.0 of quote.cgi
	The program will get a homepage and randomly quote a part of it
	as specified in the file defined as CONFIGFILE later.

	geturl() function by km@multi.fi (Kimmo Makkonen) is used instead of lynx
	Temporary files are not used anymore,
	More configuration options are provided

	hingo@multi.fi (Henrik Ingo)
*/

/*kimmos*/
#include <stdio.h>
#include <netdb.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <errno.h>
/*/kimmos*/
#include <unistd.h>

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>

#define TITLE "Quote.cgi 1.0 (beta) by hingo@multi.fi, august 1997."
/*the configurationfile:*/
#define CONFIGFILE "quote1rc"
/*variables used in the configfile*/
#define URL "URL="
#define LINKS "LINKS="
#define BEGIN "BEGIN="
#define END "END="
#define NQUOTES "NQUOTES="
#define HEADER "HEADER="
#define FROM "FROM="

/*errors*/
#define BUGREPORTTO "hingo@multi.fi"
#define CONFERR 1
#define URLERR 2
#define HTMLERR 3
#define MEMERR 4

#define TRUE 1
#define FALSE 0

/*kimmos*/
struct urlinfo {
		char prot[10];
		char host[64];
		int port;
		char path[256];
};
/*/kimmos*/

	int * begin_points=NULL;

/*kimmos*/      
void parseurl(char *url, struct urlinfo *urldata) {
		int i, j;
		char port[10];

		/* get the protocol */
		for (i = 0; url[i] != 0 && url[i] != ':'; i++) {
				urldata->prot[i] = url[i];
		}
		urldata->prot[i] = '\0';
		i += 3;

		/* get the host name or ip */
		for (j = 0; url[i] != 0 && url[i] != ':' && url[i] != '/'; i++) {
				urldata->host[j++] = url[i];
		}
		urldata->host[j] = '\0';

		/* get the optional port number */
		if (url[i] == ':') {
				i++;        
				for (j = 0; url[i] != 0 && url[i] != '/' && isdigit(url[i]); i++) {
						port[j++] = url[i];
				}
				port[j] = '\0';
				urldata->port = atoi(port);
		}
		if (urldata->port == 0)
				urldata->port = 80;

		/* get the path */
		for (j = 0; url[i] != 0; i++) {
				urldata->path[j++] = url[i];
		}
		urldata->path[j] = '\0';
		if (urldata->path[j - 1] != '/') {
				strcat(urldata->path, "/");
		}

}


char *geturl(char *url)
{
		struct urlinfo urldata;
		struct hostent *hostp;
		static char *content=NULL;
		char buffer[1024];
		int sockid, n, size=0, memsize = 8192, blocksize = 4096;
		struct sockaddr_in sock;
		char request[512];

		if ( content != NULL)
			free(content);

		content = (char *)malloc(memsize);

		parseurl(url, &urldata);

		/* create socket */
		if ((sockid = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
				printf("error creating socket, error: %s\n", strerror(errno));
				exit(0);
		}

		/* name the socket using wildcards */
		bzero((char *) &sock, sizeof(sock));
		sock.sin_family = AF_INET;
//    sock.sin_addr.s_addr = inet_addr(urldata.host);
		hostp = gethostbyname(urldata.host);
		bcopy(hostp->h_addr, &sock.sin_addr, hostp->h_length);
		sock.sin_port = htons(urldata.port);

		if (connect(sockid, (struct sockaddr *) &sock,
				sizeof(sock)) < 0) {
				printf("error connecting to server, error: %s\n", strerror(errno));
				exit(0);
		}

		/* make and send request to the server */
		strcpy(request, "GET ");
		strcat(request, urldata.path);
		strcat(request, "\n");
		write(sockid, request, strlen(request));

		/* read response from server */
		while ((n = read(sockid, buffer, sizeof(buffer))) > 0) {
				size += n;
				if (size > memsize)
						content = (char *)realloc(content, size + blocksize);
				strncat(content, buffer, n);
		}

		close(sockid);

		return content;
}

/*/kimmos*/

void fatal(int errtype, char * string)
{
	printf("<H1>Quote.cgi error!</H1>\n");

	if(errtype==CONFERR)
		{
			printf("<H3>An error occured while trying to read the ");
			printf("configuration file. <BR> \n");
			printf("The file '%s'", CONFIGFILE);
			printf("' may be of wrong format or missing. </H3>\n");
		}
	else if(errtype==URLERR)
		{
			printf("<H3>An error occured while trying ");
			printf("to get the page %s. </H3> \n", string); 
		}
	else if(errtype==HTMLERR)  
		{
			printf("<H3>An error occured while processing the HTML-code from ");
			printf("<A HREF='%s'> %s </A>.<BR> \n", string,string);
			printf("Probably the page and the configurationfiles don't match.");
		}  
	else if(errtype==MEMERR)
		{
			printf("<H3>Function %s failed to allocate memory.\n", string);
			printf("Unable to continue.\n");
		}
		
	printf("<P>Bugreports to: <A HREF='mailto:");
	printf(BUGREPORTTO);printf("'>");printf(BUGREPORTTO);printf("</A>");
	
	exit(errtype);
}
/*skip() is used to scroll a file until a line not beginning with a #
It also skips empty lines and lines beginning with a whitespace.
returns EOF if end of file is reached*/

int skip(FILE * fp)
{
	char ch=!EOF, dummy[1024];

	ch = fgetc(fp);
	while(ch==' ' || ch=='\n' || ch=='\r' || ch=='\t' || ch == '#')
		{
			if( ch == EOF)
	break;
			else
	fgets(dummy, 1023, fp);
		
		ch = fgetc(fp);
		}
	/*the last character taken was not a # so we'd better return it*/
	ungetc(ch, fp);

	return(ch);
}

void clearend(char *str)
{
	if(!isgraph(str[strlen(str)-1]))
		{
			str[strlen(str)-1]='\0';
			clearend(str);
		}    
}

/*case insensitive*/
int match_char(char a, char b)
{
	if ((tolower(a) == tolower(b)) ||
			(a == '"' && b == '\'') ||
			(a == '\'' && b == '"'))
		return(TRUE);
	else
		return(FALSE);
}

/*match() returns TRUE if pattern matches the current position i in string.
	i is set to the position after the matching pattern in string
*/
int match(char * string, int * i, char * pattern)
{
	if(match_char(string[*i], pattern[0]))
		{
			*i = *i + 1;
			if(pattern[1] == '\0')
				return(TRUE);
			else if(string[*i] == '\0')
				return(FALSE);
			else
				return(match(string, i, &pattern[1]));
		}
	else
		{
			return(FALSE);
		}
}

/*
*/
int find_begins(char * string, char * pattern)
{
	int found = 0, i = 0;

	begin_points = (int *) malloc(sizeof(int));
	if(NULL == begin_points )
		fatal(MEMERR, "malloc()");
	
	while(TRUE)
		{
			if(match( string, &i, pattern) == TRUE)
				{
					begin_points = (int *) realloc(begin_points, (found+1)*sizeof(int));
					if(NULL == begin_points)
						fatal(MEMERR, "realloc()"); 
					begin_points[found] = i;
					found++;
				}
			else
				i++;
			if(string[i] == '\0')
				return(found);
		}
}

/*returns number of characters from beginning of string 
	to beginning of pattern.
*/
int find_end(char * string, char * pattern, int nquotes)
{
	int i = 0;

	while(TRUE)
		{
			if( match( string, &i, pattern) == TRUE)
				{
					nquotes--;
					if(nquotes == 0)
						return(i - strlen(pattern));
				}
			else
				i++;
			if(string[i] == '\0')
				return(i);
		}
}


void getconf(char * url, int * links, int * nquotes, char * begin, 
						 char * end, char * from, char * header)
{
	FILE * fp;
	char buffer[1024]={'\0'};

	fp=fopen(CONFIGFILE, "r");
	if(fp == NULL)
		fatal(CONFERR, CONFIGFILE);

	/*The first line of CONFIGFILE is the header to print*/
	if(skip(fp)==EOF)
		fatal(CONFERR, CONFIGFILE);

	if((!strncmp(buffer, HEADER, strlen(HEADER))) ||
			fgets(buffer, 1024, fp)==NULL)
		fatal(CONFERR, CONFIGFILE);
	else
		strcpy(header, &buffer[strlen(HEADER)]);

	/*Scroll until a line with URL is found*/
	while(url[0]=='\0')
		{
			if(fgets( buffer, 1024, fp) == NULL)
				fatal(CONFERR, CONFIGFILE);
			if(!strncmp(buffer, URL, strlen(URL)))
				strcpy(url, &buffer[strlen(URL)]);
		}

	/*Then read the other variables*/
	if(skip(fp)!=EOF)
		{
			buffer[0]='#'; /* buffer[] != URL*/
			while(strncmp(buffer, URL, strlen(URL)) &&
						fgets(buffer, 1023,fp) != NULL &&
						skip(fp) != EOF)
				{
					if(!strncmp( buffer, LINKS, strlen(LINKS)))
						*links = buffer[strlen(LINKS)] - '0';
					else if(!strncmp( buffer, NQUOTES, strlen(NQUOTES)))
						*nquotes = buffer[strlen(NQUOTES)] - '0';
					else if(!strncmp( buffer, BEGIN, strlen(BEGIN)))
						strcpy(begin, &buffer[strlen(BEGIN)]);
					else if(!strncmp( buffer,END, strlen(END)))
						strcpy(end, &buffer[strlen(END)]);
					else if(!strncmp( buffer,FROM, strlen(FROM)))
						strcpy(from, &buffer[strlen(FROM)]);
					else          
						fatal(CONFERR, CONFIGFILE);
				}
		}
	fclose(fp);
}


int main(void)
{
	char url[257]={'\0'}, begin[32]="<P>", end[32]="<P>";
	char from[1024]={'\0'}, header[1024]={'\0'}, buffer[1024]={'\0'};
	char * content;
//  int * begin_points=NULL;
	int links=0, nquotes=1, found, n, m;
	time_t t;

	srand(time(&t));
	printf("Content-type: text/html\n\n");

	/*Getting the configurations*/
	getconf(url, &links, &nquotes, begin, end, from, header);
	clearend(url); clearend(begin); clearend(end);

	/*Get the first page*/
	content = geturl(url);

	/*and follow links to the page to quote from*/
	while(links > 0)
		{
			if(begin_points != NULL)
				free(begin_points);

			/*count the urls given in the document*/
			found = find_begins(content,  "<A HREF=\"");
			if( found == 0 )
				fatal(HTMLERR, url);

			n = 1 + (double) found * (double) rand() / RAND_MAX;
			
			m = find_end(content+begin_points[n], "\"", 1);
			strncpy(buffer, content+begin_points[n], m);
			if(!strncmp(buffer, "http://", 7))
				strcpy(url, buffer);
			else
				strcat(url, buffer);

			content = geturl(url);
			links--;
		}
	/*first catch the <TITLE> so we can tell what we've quoted*/
	if(begin_points != NULL)
		free(begin_points);

	find_begins(content, "<TITLE>");
	
	m = find_end(content+begin_points[0], "</TITLE>", 1);
	strncpy(buffer, content+begin_points[0], m);

	/*quote from content*/
	if(begin_points != NULL)
		free(begin_points);
	
	found = find_begins(content, begin);
	if( found == 0 )
		fatal(HTMLERR, url);
	 
	n = 1 + (double) found * (double) rand() / RAND_MAX;
	/*look for the endpoint*/
	found = find_end( content+n, end, nquotes);


	/*lets go for the output!*/
	printf("\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n", TITLE);
	printf("<BASE HREF='%s'>\n", url);
	printf("</HEAD>\n<BODY>\n<FONT SIZE=2>");
	printf("<P ALIGN=CENTER><B>%s</B></P>\n<P ALIGN=LEFT>%s", header, begin);

	for(;found > n; n++)
		putchar(content[n]);

	printf("%s\n</P>\n",end);
	printf("<P ALIGN=RIGHT>\n<I>From: %s, '%s'<BR>\n", from, buffer);
	printf("<A HREF='%s'>%s</A>\n", url, url);
	printf("</BODY>\n</HTML>");

	return(0);
}
