#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <ctype.h> // isspace

char* readtag(const char* line) {
    //printf("got:%s", line);
    char* ret;
    const char* ptr = line;
    int count = 0;
    while(*ptr != '<') {
	if(*ptr == '\0') return NULL;
	ptr++;
    }

    line = ++ptr;

    while(*ptr != '>' && *ptr != ' ') {
	if(*ptr == '\0') return NULL;
	count++;
	ptr++;
    }

    ret = malloc(count+1);
    memcpy(ret,line,count);
    ret[count] = '\0';

    return ret;
}

int countwhitespace(const char* string) {
    int count = 0;
    while(isspace(*string)) {
	count++;
	string++;
    }

    return count;
}
char* geturl(const char* string) {
    // extracts a URL from an <A> tag.
    const char* href;
    const char* end;
    char* ret = NULL;
    int count = 0;

    href = strstr(string, "HREF=");
    if(!href) return NULL;

    while(*href != '\"')
	href++;

    end = ++href;

    while(*end != '\"') {
	count++;
	end++;
    }

    ret = malloc(count+1);
    memcpy(ret,href,count);
    ret[count] = '\0';

    return ret;
}

char* getcontent(const char* string) {
    char* ret;
    const char* ptr = string;
    int count = 0;
    while(*ptr != '>') {
	if(*ptr == '\0' || *ptr == '<') return NULL;
	ptr++;
    }

    string = ++ptr;

    // DD ends in a newline
    while(*ptr != '<' && *ptr != '\n') {
	if(*ptr == '\0') return NULL;
	count++;
	ptr++;
    }

    ret = malloc(count+1);
    memcpy(ret,string,count);
    ret[count] = '\0';

    return ret;
}
/* Make a state machine that turns bookmarks.html into a tree of:
 * BookmarksPtr
 *   next
 *   children
 *   type
 *   value
 *     (union)
 *     url_value
 *       desc
 *       properties
 *       name
 *       url
 *     folder_value
 *       desc
 *       properties
 *       name
 *
 * The state machine is necessary as we need to process line-by-line.
 * The structure is necessary because we need to be able to easily merge a set
 * of bookmarks and folders into the existing bookmarks.html.
 */
/*
void new_firefox_worker(void) {
    while(fgets(line, 1024, fp) != NULL) {
	tag = readtag(line);
	if(!tag) continue;

	if(strcmp(tag,"/DL") == 0) return;
	if(strcmp(tag,"HR") == 0) ctx->next = HR;
*/

void firefox_worker(FILE* fp, const char* foldername) {
    char line[1024];
    char* tag;

    // State machine it. ctx can be the parent tag.
    // Write to a tree structure similar to libxml2's.
    while(fgets(line, 1024, fp) != NULL) {
	
	tag = readtag(line);
	if(!tag) continue;
	
	if(strcmp(tag,"HR") == 0)
	    printf("horizontal rule\n");
	
	else if(strcmp(tag, "/DL") == 0)
	    return;

	// Either a bookmark or a folder.
	else if(strcmp(tag, "DT") == 0) {
	    int offset = countwhitespace(line) + 4;

	    free(tag);
	    tag = readtag(line+offset);
	    if(!tag) continue;


	    // A bookmark
	    if(strcmp(tag, "A") == 0) {
		char* url = geturl(line+offset+2);
		char* name = getcontent(line+offset+2);
		if(foldername)
		    printf("folder (%s) url: %s, name: %s\n", foldername, url, name);
		else
		    printf("url: %s, name: %s\n", url, name);
		free(url);
		free(name);
	    }
	    // A folder
	    else if(strcmp(tag, "H3") == 0) {
		char* folder = getcontent(line+offset+3);
		printf("folder: %s\n", folder);

		if(fgets(line, 1024, fp) == NULL) return;
		char* tag2 = readtag(line);
		char* description = NULL;
		if(strcmp(tag2, "DD") == 0) {
		    // Skip past the <
		    // FIXME: add getwhitespace to the offset
		    description = getcontent(line+1);
		    printf("description (%s): %s\n", folder, description);
		    free(description);
		    free(tag2);
		}

		// skip past <DL>
		if(fgets(line, 1024, fp) == NULL) return;

		firefox_worker(fp, folder);
		free(folder);
	    }
	}
	free(tag);
    }

}
void firefox_import_firefox(void) {
    FILE* fp;
    //char line[1024];
    //char* tag;

    fp = fopen("/home/evan/.mozilla/firefox/j539x5mu.default/bookmarks.html", "r");
    firefox_worker(fp, NULL);
    fclose(fp);
}
