/* rbpage.c
 *
 * A "page" is what I call the sub-files inside the .rb file.  An RbPage
 * object is created for each item you wish to add to the book, and may
 * result in multiple Table of Contents (ToC) entries and multiple low-
 * level pages being written into the .rb file.  For instance, if you add
 * a web page to the .rb file, writing the resulting RbPage object will
 * store the necessary .html, .hidx, and (optionally) .hkey pages, as well
 * as writing a .info file for the first .html file that is written.
 */
/* This software is copyrighted as detailed in the LICENSE file. */

#include <config.h>
#include <ctype.h>
#include <rbmake/rbmake.h>
#include "rbmake.h"
#include "rbfile.h"
#include "rbpage.h"
#include "rbhtml.h"
#include "mbuf.h"

static int delUnusedHidxNames(void *userPtr, const char *key, void *obj);
static int cmpStrings(const void *a, const void *b);
static int cmpPosStrings(const void *a, const void *b);

char *alignStrs[] = { NULL, "RIGHT", "JUSTIFY", "CENTER", "LEFT" };

RbPage *
RbPage_new(RbMake *rb, RbPage *fromPage, const char *url, int type)
{
    const char *tn;
    RbPage *pg;

    if (type == RB_PAGETYPE_COVER_IMAGE)
	tn = "cover.png";
    else {
	NameWithType *nwt;
	if (!(nwt = RbMake_findPageName(rb, url))) {
	    if (!(nwt = RbMake_addPageName(rb, url, type)))
		return NULL;
	}
	tn = nwt->name;
	type = nwt->type;
    }

    pg = Mem_calloc(1, sizeof *pg);
    pg->rb = rb;
    pg->tocName = tn;
    pg->url = Mem_strdup(url);
    pg->charEncoding = XML_CHAR_ENCODING_NONE;
    RbPage_changeType(pg, type);
    pg->depth = fromPage? fromPage->depth+1 : 0;

    switch (pg->type) {
      case RB_PAGETYPE_HTML:
      case RB_PAGETYPE_TEXT:
      case RB_PAGETYPE_RAW_TEXT:
	pg->parsePage = true;
	/* FALL THROUGH */
      case RB_PAGETYPE_MAYBE_HTML:
	if (rb->substRules)
	    pg->content = MBuf_new(8192, 0);
	else
	    pg->content = MBuf_new(4096, 4096);
	pg->paras = MArray_new(512, 0);
	pg->fontSize = MArray_new(16, 1024);
	MArray_append(pg->fontSize, 3);
	if (strnEQ(pg->tocName, "menumark", 8))
	    pg->tocFlags = RB_TOCFLAG_MENUMARK_FILE;
	else {
	    pg->tocFlags = RB_TOCFLAG_DEFLATED;
	    if (!rb->rootURL) {
		if (rb->createHkeyFile) {
		    pg->keys = MArray_new(1024, 0);
		    rb->createHkeyFile = false;
		}
		pg->firstPage = 1;
		rb->rootURL = Mem_strdup(url);
	    }

	    if (rb->pageJoiningMode) {
		int i = rb->pageJoiningMode > 1? atoi(pg->tocName+1)-1 : 0;
		JoinGroup *jg = rb->joinGroupsHead;
		while (jg->ord != i) jg = jg->next;
		pg->joinGroup = jg;
		pg->joinOrd = 1;
		if (jg->pageList) {
		    RbPage *po = jg->pageList;
		    pg->tagTreeRoot = po->tagTreeRoot;
		    pg->names = po->names;
		    pg->keys = po->keys;
		    while (po->next) po = po->next, pg->joinOrd++;
		    pg->joinOrd++;
		    po->next = pg;
		}
		else
		    jg->pageList = pg;
		jg->pageCount++;
	    }
	}

	if (!pg->tagTreeRoot) {
	    TagTree *tt = pg->tagTreeRoot = Mem_alloc(sizeof (TagTree));
	    tt->parent = tt->child = tt->nextOrd = NULL;
	    tt->sibling = tt;	/* Misuse the sibling pointer in the root */
	    tt->elnum = tt->align = 0;
	    tt->ord = 1;	/* ord starts out life as a "used" boolean */
	}
	pg->tagTreePos = pg->tagTreeRoot;

	if (!pg->names)
	    pg->names = HashTable_new(501, false, true);
	break;
      case RB_PAGETYPE_AUDIO:
	pg->tocFlags = RB_TOCFLAG_DEFLATED;
	pg->content = MBuf_new(4096, 4096);
	break;
      case RB_PAGETYPE_IMAGE:
      case RB_PAGETYPE_COVER_IMAGE:
	pg->convertImage = 1;
	pg->content = MBuf_new(8192, 0);
	break;
      default:
	Mem_free(pg);
	return NULL;
    }

    return pg;
}

void
RbPage_delete(RbPage *me)
{
    if (me->content)
	MBuf_delete(me->content);
    switch (me->type) {
      case RB_PAGETYPE_HTML:
      case RB_PAGETYPE_TEXT:
      case RB_PAGETYPE_RAW_TEXT:
      case RB_PAGETYPE_MAYBE_HTML:
	if (me->joinOrd <= 1) {
	    if (me->tagTreeRoot) {
		TagTree *tt, *nextOrd;
		for (tt = me->tagTreeRoot; tt; tt = nextOrd) {
		    nextOrd = tt->nextOrd;
		    Mem_free(tt);
		}
	    }
	    if (me->names)
		HashTable_delete(me->names);
	    if (me->keys)
		MArray_delete(me->keys);
	}
	break;
    }
}

void
RbPage_changeType(RbPage *me, int type)
{
    switch(me->type = type) {
      case RB_PAGETYPE_TEXT:
	if (me->rb->textConversionMode == RB_TEXTCONV_NONE)
	    me->type = RB_PAGETYPE_RAW_TEXT;
	/* FALL THROUGH */
      case RB_PAGETYPE_HTML:
      case RB_PAGETYPE_RAW_TEXT:
	me->parsePage = true;
	break;
    }
}

const char *
RbPage_appendContent(RbPage *me, const char *bp, int len)
{
    if (me->type == RB_PAGETYPE_MAYBE_HTML) {
	const char *cp = bp;
	while (ISSPACE(*cp)) cp++;
	if (*cp == '<')	/* XXX Make this better?? */
	    RbPage_changeType(me, RB_PAGETYPE_HTML);
	else if (me->firstPage)
	    RbError_exit("First page wasn't HTML or text -- aborting.\n");
	else
	    return "ignoring unknown filetype";
    }
    if (me->parsePage && !me->rb->substRules) {
	switch (me->type) {
	  case RB_PAGETYPE_HTML:
	  case RB_PAGETYPE_RAW_TEXT:
	    RbHtml_parsedPushFunc(me, bp, len);
	    break;
	  case RB_PAGETYPE_TEXT:
	    RbHtml_parsedTextPushFunc(me, bp, len);
	    break;
	}
    }
    else
	MBuf_write(me->content, bp, len);

    return NULL;
}

void
RbPage_finishContent(RbPage *me)
{
    if (!me->rb->substRules && me->parsePage && me->ctxt) {
	RbHtml_flushParsedPush(me);
	me->parsePage = false;
    }
}

const char *
RbPage_write(RbPage *me)
{
    RbMake *rb = me->rb;

    if (me->tagTreeRoot) {
	int32 jOff, *joinOffs;
	TagTree *tt;
	const char **namesArray, **cpp, *cp;
	NodeWithOffset *nwo;
	JoinGroup *jg;
	RbPage *po;
	MBuf *mb;
	int cnt;
	char buf[128];
	char *tnsuf, *tn = Mem_strdup(me->tocName);

	if ((tnsuf = strchr(tn, '#')) != NULL) {
	    *tnsuf = '\0';
	    tnsuf -= 4;
	}
	else
	    tnsuf = rbGetFileSuffix(tn);

	if (me->type == RB_PAGETYPE_MAYBE_HTML) {
	    if (me->firstPage)
		RbError_exit("First page wasn't HTML or text -- aborting.\n");
	    return "ignored unknown filetype";
	}

	if (me->parsePage) {
	    char *cp;
	    mb = me->content;
	    me->content = MBuf_new(4096, 4096);
	    if (rb->substRules)
		mb = Subst_runRules(rb->substRules, me->url, mb);
	    MBuf_setReadPos(mb, 0, 0);
	    switch (me->type) {
	      case RB_PAGETYPE_HTML:
	      case RB_PAGETYPE_RAW_TEXT:
		while (1) {
		    int len = mb->totalLen;
		    if ((cp = MBuf_dataPtr(mb, &len)) == NULL)
			break;
		    RbHtml_parsedPushFunc(me, cp, len);
		}
		break;
	      case RB_PAGETYPE_TEXT:
		while (1) {
		    int len = mb->totalLen;
		    if ((cp = MBuf_dataPtr(mb, &len)) == NULL)
			break;
		    RbHtml_parsedTextPushFunc(me, cp, len);
		}
		break;
	    }
	    RbHtml_flushParsedPush(me);
	}

	if (me->firstPage) {
	    RbInfoHash *ih = rb->infoHash;
	    RbInfoHash_store(ih, "COMMENT", "Info file for the following eBook");
	    RbInfoHash_maybeStore(ih, "TITLE", "<unknown>");
	    RbInfoHash_maybeStore(ih, "AUTHOR", "<unknown>");
	    if (rb->foundMenumark || rb->menuItems->totalLen)
		RbInfoHash_store(ih, "MENUMARK", "menumark.html");
	    else
		RbInfoHash_remove(ih, "MENUMARK");
	    RbInfoHash_store(ih, "TYPE", "2");
	    RbInfoHash_store(ih, "PARSE", "1");
	    RbInfoHash_store(ih, "OUTPUT", "1");
	    RbInfoHash_store(ih, "GENERATOR", (char*)RbMake_getGenerator(rb));
	    RbInfoHash_store(ih, "BODY", tn);
	    if (me->keys)
		RbInfoHash_store(ih, "HKEY", "1");
	    else
		RbInfoHash_remove(ih, "HKEY");
	    if ((cp = RbInfoHash_fetch(ih, "VERSION")) != NULL)
		sprintf(buf, "%d", atoi(cp) + 1);
	    else
		strcpy(buf, "1");
	    RbInfoHash_store(ih, "VERSION", buf);

	    if (*rb->file->fileName == '_') {
		char *cp = RbMake_mungeBookName(rb, rb->file->fileName);
		if (*cp != '_') {
		    Mem_free(rb->file->fileName);
		    rb->file->fileName = cp;
		}
		else
		    Mem_free(cp);
	    }

	    rb->finishInfoPage(me, ih);
	    mb = RbInfoHash_toMBuf(ih);

	    strcpy(tnsuf, "info");
	    RbFile_writePage(rb->file, tn, RB_PAGETYPE_INFO, 2, mb);
	    MBuf_delete(mb);
	}

	if (me->joinOrd) {
	    jg = me->joinGroup;
	    if (--jg->todo > 0) {
		Mem_free(tn);
		return NULL;
	    }
	    me = jg->pageList;
	}
	else
	    jg = NULL;

	joinOffs = Mem_alloc((jg? jg->pageCount+2 : 2) * sizeof (int32));
	joinOffs[0] = joinOffs[1] = 0;
	if (jg) {
	    po = me;
	    mb = po->content;
	    joinOffs[po->joinOrd+1] = po->content->totalLen;
	    while ((po = po->next) != NULL) {
		joinOffs[po->joinOrd+1] = joinOffs[po->joinOrd]
					+ po->content->totalLen;
		MBuf_appendMBuf(mb, po->content);
		po->content = NULL;
	    }
	}
	else
	    mb = me->content;

	strcpy(tnsuf, "html");
	RbFile_writePage(rb->file, tn, me->type, me->tocFlags, mb);
	MBuf_delete(mb);
	me->content = NULL;

	mb = MBuf_new(4096, 4096);
	cnt = me->tagTreeRoot->ord = 0;
	for (tt = me->tagTreeRoot->nextOrd; tt; tt = tt->nextOrd) {
	    if (tt->ord)
		tt->ord = ++cnt;
	}
	sprintf(buf, "[tags %d]\n", cnt);
	MBuf_puts(mb, buf);
	for (tt = me->tagTreeRoot->nextOrd; tt; tt = tt->nextOrd) {
	    if (tt->ord) {
		MBuf_putc(mb, '<');
		MBuf_puts(mb, tagInfo[tt->elnum - 1].tag);
		if (tt->align != TT_ALIGN_NONE) {
		    sprintf(buf, " ALIGN=\"%s\"", alignStrs[tt->align]);
		    MBuf_puts(mb, buf);
		}
		sprintf(buf, "> %d\n", tt->parent->ord - 1);
		MBuf_puts(mb, buf);
	    }
	}

	cnt = 0;
	po = me;
	do {
	    cnt += MArray_itemCnt(po->paras);
	} while ((po = po->next) != NULL);
	sprintf(buf, "\n[paragraphs %d]\n", cnt);
	MBuf_puts(mb, buf);

	po = me;
	do {
	    jOff = joinOffs[po->joinOrd];
	    while ((nwo = MArray_fetchPtr(po->paras)) != NULL) {
		sprintf(buf,"%d %d\n",nwo->htmlOffset+jOff,nwo->tagNode->ord-1);
		Mem_free(nwo);
		MBuf_puts(mb, buf);
	    }
	    MBuf_delete(po->paras);
	} while ((po = po->next) != NULL);

	if (rb->menuItems->totalLen) {
	    char *item, *eq, *tn;
	    MArray_setFetchPos(rb->menuItems, 0);
	    while ((item = MArray_fetchPtr(rb->menuItems)) != NULL) {
		if ((eq = strchr(item, '=')) == NULL)
		    continue;
		if ((tn = RbPage_makeRbRef(me, eq+1)) != NULL) {
		    if (*tn == '#')
			RbPage_usedHidxName(me, tn+1, 1);
		    Mem_free(tn);
		}
	    }
	}
	HashTable_walk(me->names, NULL, delUnusedHidxNames);
	namesArray = HashTable_keys(me->names);
	sprintf(buf, "\n[names %d]\n", HashTable_itemCnt(me->names));
	MBuf_puts(mb, buf);
	qsort(namesArray, HashTable_itemCnt(me->names), PTRSIZE, cmpStrings);
	for (cp = *(cpp = namesArray); cp; cp = *(++cpp)) {
	    HtmlPosition *hp = HashTable_fetch(me->names, cp);
	    sprintf(buf, "\"%s\" %d\n", cp,
		    hp->htmlOffset + joinOffs[hp->joinOrd]);
	    MBuf_puts(mb, buf);
	}
	Mem_free(namesArray);
	HashTable_delete(me->names);
	me->names = NULL;

	strcpy(tnsuf, "hidx");
	RbFile_writePage(rb->file, tn, RB_PAGETYPE_HIDX, me->tocFlags, mb);
	MBuf_delete(mb);

	if (me->keys) {
	    StringWithPosition *swp, *oswp = NULL;
	    mb = MBuf_new(4096, 4096);
	    qsort(MArray_dataPtrAt(me->keys, 0),
		  MArray_itemCnt(me->keys), PTRSIZE, cmpPosStrings);
	    MArray_setFetchPos(me->keys, 0);
	    while ((swp = MArray_fetchPtr(me->keys)) != NULL) {
		int pos = swp->pos.htmlOffset + joinOffs[swp->pos.joinOrd];
		if (oswp) {
		    if (pos == oswp->pos.htmlOffset+joinOffs[oswp->pos.joinOrd]
		     && strEQ(swp->string, oswp->string)) {
			Mem_free(swp);
			continue;
		    }
		    Mem_free(oswp);
		}
		sprintf(buf, "\t%d\n",
			swp->pos.htmlOffset + joinOffs[swp->pos.joinOrd]);
		MBuf_vwrite(mb, swp->string,-1, buf,-1, NULL);
		oswp = swp;
	    }
	    if (oswp)
		Mem_free(oswp);

	    strcpy(tnsuf, "hkey");
	    RbFile_writePage(rb->file, tn, RB_PAGETYPE_HKEY, me->tocFlags, mb);
	    MBuf_delete(mb);
	    MArray_delete(me->keys);
	    me->keys = NULL;
	}
	Mem_free(tn);
	if (jg) {
	    RbPage *next;
	    for (po = me->next; po; po = next) {
		next = po->next;
		RbPage_delete(po);
	    }
	}
	RbPage_delete(me);
    }
    else {
	if (me->convertImage) {
	    MBuf *mb = RbImage_turnMBufIntoPngMBuf(me->content, me->url,
			me->type == RB_PAGETYPE_COVER_IMAGE?
			RB_IMAGE_FULLPAGE_PORTRAIT : 0);
	    if (mb == NULL)
		return RbImage_lastErrorMessage();
	    me->content = mb;
	}
	RbFile_writePage(rb->file, me->tocName, me->type, me->tocFlags,
			 me->content);
	RbPage_delete(me);
    }

    return NULL;
}

void
RbPage_drop(RbPage *me)
{
    if (me->tagTreeRoot) {
	if (me->firstPage)
	    RbError_exit("First page was dropped -- aborting.\n");
	if (me->joinOrd) {
	    JoinGroup *jg = me->joinGroup;
	    RbPage *po = jg->pageList;
	    if (po == me)
		jg->pageList = me->next;
	    else {
		while (po->next != me) po = po->next;
		po->next = me->next;
	    }
	    jg->pageCount--;
	    if (jg->pageList) {
		me->type = RB_PAGETYPE_HTML;
		me->parsePage = false;
		RbPage_write(me); /* --todo, write other pages (if done) */
	    }
	}
    }
    RbPage_delete(me);
}

char *
RbPage_makeRbRef(RbPage *me, char *url)
{
    NameWithType *nwt;
    char *rel, *joinedRel, *tn, *rbRef;

    if ((rel = strchr(url, '#')) != NULL)
	*rel = '\0';
    if ((nwt = RbMake_findPageName(me->rb, url)) == NULL) {
	if (rel)
	    *rel = '#';
	return NULL;
    }
    joinedRel = strchr(tn = nwt->name, '#');
    if (rel)
	*rel = joinedRel? RB_JOIN_NAME_SEP : '#';
    else
	rel = "";
    if (joinedRel) {
	if (strnEQ(tn, me->tocName, joinedRel - tn + 1))
	    tn = joinedRel;
    }
    else if (*rel  && strEQ(tn, me->tocName))
	tn = "";
    rbRef = Mem_alloc(strlen(tn) + strlen(rel) + 1);
    sprintf(rbRef, "%s%s", tn, rel);
    if (*rel == RB_JOIN_NAME_SEP)
	*rel = '#';

    return rbRef;
}

RbMake *
RbPage_getRbMake(RbPage *me)
{
    return me->rb;
}

const char *
RbPage_getUrl(RbPage *me)
{
    return me->url;
}

const char *
RbPage_getTocName(RbPage *me)
{
    return me->tocName;
}


MBuf *
RbPage_getContent(RbPage *me)
{
    return me->content;
}

bool
RbPage_getConvertImage(RbPage *me)
{
    return me->convertImage;
}

bool
RbPage_getParsePage(RbPage *me)
{
    return me->parsePage;
}

int
RbPage_getType(RbPage *me)
{
    return me->type;
}

void
RbPage_setContent(RbPage *me, MBuf *mb)
{
    me->content = mb;
}

void
RbPage_setConvertImage(RbPage *me, bool trueOrFalse)
{
    me->convertImage = trueOrFalse;
}

void
RbPage_setParsePage(RbPage *me, bool trueOrFalse)
{
    me->parsePage = trueOrFalse;
}

HtmlPosition *
RbPage_usedHidxName(RbPage *me, const char *name, bool used)
{
    HtmlPosition *hp = HashTable_fetch(me->names, name);
    if (!hp) {
	RbMake *rb = me->rb;
	hp = Mem_alloc(sizeof (HtmlPosition));
	hp->htmlOffset = -1;
	hp->flags = rb->pageJoiningMode != 1 && (rb->followLinks || rb->htmlCnt > 1);
	HashTable_store(me->names, name, hp);
    }
    if (used)
	hp->flags = 1;

    return hp;
}

void
RbPage_noteHidxName(RbPage *me, const char *name, int32 hOff)
{
    HtmlPosition *hp = RbPage_usedHidxName(me, name, 0);

    hp->htmlOffset = hOff;
    hp->joinOrd = me->joinOrd;
}

static int
delUnusedHidxNames(void *userPtr, const char *key, void *obj)
{
    HtmlPosition *hp = obj;
    if (!hp->flags || hp->htmlOffset < 0)
	return -1;
    return 1;
}

void
RbPage_noteHidxPara(RbPage *me, int32 off, TagTree *tt)
{
    int end = MArray_itemCnt(me->paras) - 1;
    NodeWithOffset *pnwo = MArray_fetchPtrAt(me->paras, end);
    NodeWithOffset *nwo = Mem_alloc(sizeof (NodeWithOffset));
    nwo->htmlOffset = off;
    nwo->tagNode = tt;
    /* Mark this node and any unused parent nodes as needed */
    do {
	tt->ord = 1;
	tt = tt->parent;
    } while (tt->ord == 0);
    if (!pnwo || off >= pnwo->htmlOffset)
	MArray_appendPtr(me->paras, nwo);
    else {
	MArray_storePtrAt(me->paras, end, nwo);
	MArray_appendPtr(me->paras, pnwo);
    }
}

void
RbPage_delLastHidxPara(RbPage *me, int elnum, int fudgePos)
{
    int end = MArray_itemCnt(me->paras) - 1;
    NodeWithOffset *nwo = MArray_fetchPtrAt(me->paras, end);
    if (elnum && nwo->tagNode->elnum != elnum) {
	int e = end;
	NodeWithOffset *onwo;
	do {
	    if (--e < 0)
		return;
	    onwo = MArray_fetchPtrAt(me->paras, e);
	    nwo->htmlOffset -= fudgePos;
	    MArray_storePtrAt(me->paras, e, nwo);
	    nwo = onwo;
	} while (nwo->tagNode->elnum != elnum);
    }
    Mem_free(nwo);
    MArray_truncate(me->paras, end);
}

static int
cmpStrings(const void *a, const void *b)
{
    return strCmp((*(char**)a), (*(char**)b));
}

static int
cmpPosStrings(const void *a, const void *b)
{
    return strCmp((*(StringWithPosition**)a)->string,
		  (*(StringWithPosition**)b)->string);
}
