/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -  This software is distributed in the hope that it will be
 -  useful, but with NO WARRANTY OF ANY KIND.
 -  No author or distributor accepts responsibility to anyone for the
 -  consequences of using this software, or for whether it serves any
 -  particular purpose or works at all, unless he or she says so in
 -  writing.  Everyone is granted permission to copy, modify and
 -  redistribute this source code, for commercial or non-commercial
 -  purposes, with the following restrictions: (1) the origin of this
 -  source code must not be misrepresented; (2) modified versions must
 -  be plainly marked as such; and (3) this notice may not be removed
 -  or altered from any source or modified source distribution.
 *====================================================================*/


/*
 *   sarray.c
 *
 *      Create/Destroy/Copy
 *          SARRAY    *sarrayCreate()
 *          SARRAY    *sarrayCreateWordsFromString()
 *          SARRAY    *sarrayCreateLinesFromString()
 *          void      *sarrayDestroy()
 *          SARRAY    *sarrayCopy()
 *
 *      Add/Remove string
 *          l_int32    sarrayAddString()
 *          l_int32    sarrayExtendArray()
 *          char      *sarrayRemoveString()
 *
 *      Accessors
 *          l_int32    sarrayGetCount()
 *          char     **sarrayGetArray()
 *          char      *sarrayGetString()
 *
 *      Conversion back to string
 *          char      *sarrayToString()
 *          char      *sarrayToStringRange()
 *
 *      Concatenate 2 sarrays
 *          l_int32    sarrayConcatenate()
 *
 *      Convert word sarray to (formatted) line sarray
 *          SARRAY    *sarrayConvertWordsToLines()
 *
 *      Split string on separator list
 *          SARRAY    *sarraySplitString()
 *
 *      Sort
 *          SARRAY    *sarraySort()
 *          l_int32    stringCompareLexical()
 *
 *      Serialize for I/O
 *          SARRAY    *sarrayRead()
 *          SARRAY    *sarrayReadStream()
 *          l_int32    sarrayWrite()
 *          l_int32    sarrayWriteStream()
 *          l_int32    sarrayAppend()
 *
 *      Comments on usage:
 *
 *          These functions are important for efficient manipulation
 *          of string data.  They have been used in leptonica for
 *          generating and parsing text files, and for generating
 *          code for compilation.  The user is responsible for
 *          correctly disposing of strings that have been extracted
 *          from sarrays.
 *
 *            - When you want a string to inspect it, or plan to
 *              make a copy of it later, use sarrayGetString() with
 *              copyflag = 0.  In this case, you must neither free
 *              the string nor put it directly in another array.
 *
 *            - When you want to insert a string that is in one
 *              array into another array (always leaving the first
 *              array intact), you have two options:
 *
 *                 (1) use copyflag = 1 to make an immediate copy,
 *                     which you must then add to the second array
 *                     by insertion; namely, using 
 *                     sarrayAddString(sa, str, 0), or
 *                 (2) use copyflag = 0 to get another handle to
 *                     the string, in which case you must add
 *                     a copy of it to the second string array,
 *                     using sarrayAddString(sa, str, 1).
 *
 *              In all cases, when you use copyflag = 1 to extract
 *              a string from an array, you must either free it
 *              or put it in an array that will be freed later.
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "allheaders.h"

static const l_int32  INITIAL_PTR_ARRAYSIZE = 50;     /* n'importe quoi */
static const l_int32  BUFFER_SIZE = 512;

#ifndef  NO_CONSOLE_IO
#define  DEBUG     0
#endif  /* ~NO_CONSOLE_IO */


/*--------------------------------------------------------------------------*
 *                   String array create/destroy/copy/extend                *
 *--------------------------------------------------------------------------*/
/*!
 *  sarrayCreate()
 *
 *      Input:  size of string ptr array to be alloc'd
 *              (use 0 for default)
 *      Return: sarray, or null on error
 */
SARRAY *
sarrayCreate(l_int32  n)
{
SARRAY  *sa;

    PROCNAME("sarrayCreate");

    if (n <= 0)
	n = INITIAL_PTR_ARRAYSIZE;

    if ((sa = (SARRAY *)CALLOC(1, sizeof(SARRAY))) == NULL)
        return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
    if ((sa->array = (char **)CALLOC(n, sizeof(char *))) == NULL)
        return (SARRAY *)ERROR_PTR("ptr array not made", procName, NULL);

    sa->nalloc = n;
    sa->n = 0;

    return sa;
}


/*!
 *  sarrayCreateWordsFromString()
 *
 *      Input:  string
 *      Return: sarray, or null on error
 *
 *  Action: finds the number of word substrings, creates an sarray
 *          of this size, and puts copies of each substring into the sarray.
 */
SARRAY *
sarrayCreateWordsFromString(const char  *string)
{
l_int32  i, nsub, size, inword;
char    *separators;
SARRAY  *sa;

    PROCNAME("sarrayCreateWordsFromString");

    if (!string)
        return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL);

	/* find the number of words */
    size = strlen(string);
    nsub = 0;
    separators = " \n\t";
    inword = FALSE;
    for (i = 0; i < size; i++) {
	if (inword == FALSE &&
	   (string[i] != ' ' && string[i] != '\t' && string[i] != '\n')) {
	   inword = TRUE;
	   nsub++;
	}
	else if (inword == TRUE &&
	   (string[i] == ' ' || string[i] == '\t' || string[i] == '\n')) {
	   inword = FALSE;
	}
    }

    if ((sa = sarrayCreate(nsub)) == NULL)
	return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
    sarraySplitString(sa, string, separators);

#if DEBUG
    fprintf(stderr, " nsub = %d, sa->n = %d\n", nsub, sa->n);
#endif  /* DEBUG */

    return sa;
}


/*!
 *  sarrayCreateLinesFromString()
 *
 *      Input:  string
 *              blankflag  (0 to exclude blank lines; 1 to include)
 *      Return: sarray, or null on error
 *
 *  Action: finds the number of line substrings, creates an sarray of
 *          this size, and puts copies of each substring into the sarray.
 */
SARRAY *
sarrayCreateLinesFromString(char    *string,
                            l_int32  blankflag)
{
l_int32  i, nsub, size, startptr;
char    *cstring, *substring;
SARRAY  *sa;

    PROCNAME("sarrayCreateLinesFromString");

    if (!string)
        return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL);

	/* find the number of lines */
    size = strlen(string);
    nsub = 0;
    for (i = 0; i < size; i++) {
	if (string[i] == '\n')
	    nsub++;
    }

    if ((sa = sarrayCreate(nsub)) == NULL)
	return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);

    if (blankflag) {  /* keep blank lines as null strings */
	    /* Make a copy for munging */
	if ((cstring = stringNew(string)) == NULL)
	    return (SARRAY *)ERROR_PTR("cstring not made", procName, NULL);
	    /* We'll insert nulls like strtok */
	startptr = 0;
	for (i = 0; i < size; i++) {
	    if (cstring[i] == '\n') {
		cstring[i] = '\0';
		if ((substring = stringNew(cstring + startptr)) == NULL)
		    return (SARRAY *)ERROR_PTR("substring not made",
		                                procName, NULL);
		sarrayAddString(sa, substring, 0);
/*		fprintf(stderr, "substring = %s\n", substring); */
		startptr = i + 1;
	    }
	}
	if (startptr < size) {  /* no newline at end of last line */
	    if ((substring = stringNew(cstring + startptr)) == NULL)
		return (SARRAY *)ERROR_PTR("substring not made",
		                            procName, NULL);
	    sarrayAddString(sa, substring, 0);
/*	    fprintf(stderr, "substring = %s\n", substring); */
	}
	FREE((void *)cstring);
    }
    else {  /* remove blank lines; use strtok */
	sarraySplitString(sa, string, "\n");
    }


#if DEBUG
    fprintf(stderr, " nsub = %d, sa->n = %d\n", nsub, sa->n);
#endif  /* DEBUG */

    return sa;
}


/*!
 *  sarrayDestroy()
 *
 *      Input:  &sarray <to be nulled>
 *      Return: void
 *
 *  Action: destroys the sarray and nulls the contents of the input ptr
 */
void
sarrayDestroy(SARRAY  **psa)
{
l_int32  i;
SARRAY  *sa;

    PROCNAME("sarrayDestroy");

    if (psa == NULL) {
	L_WARNING("ptr address is NULL!", procName);
	return;
    }
    if ((sa = *psa) == NULL)
	return;

    if (sa->array) {
	for (i = 0; i < sa->n; i++)
	    FREE((void *)sa->array[i]);
	FREE((void *)sa->array);
    }

    FREE((void *)sa);
    *psa = NULL;
    return;
}

        
/*!
 *  sarrayCopy()
 *
 *      Input:  sarray
 *      Return: copy of sarray, or null on error
 */
SARRAY *
sarrayCopy(SARRAY  *sa)
{
l_int32  i;
SARRAY  *csa;

    PROCNAME("sarrayCopy");

    if (!sa)
        return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);

    if ((csa = sarrayCreate(sa->nalloc)) == NULL)
        return (SARRAY *)ERROR_PTR("csa not made", procName, NULL);

    for (i = 0; i < sa->n; i++)
	sarrayAddString(csa, sa->array[i], 1);

    return csa;
}



/*!
 *  sarrayAddString()
 *
 *      Input:  sarray
 *              string  (string to be added)
 *              copyflag (0 for insertion; 1 for copy)
 *      Return: 0 if OK, 1 on error
 */
l_int32
sarrayAddString(SARRAY  *sa,
                char    *string,
	        l_int32  copyflag)
{
l_int32  n;

    PROCNAME("sarrayAddString");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!string)
        return ERROR_INT("string not defined", procName, 1);
    
    n = sarrayGetCount(sa);
    if (n >= sa->nalloc)
	sarrayExtendArray(sa);

    if (copyflag == TRUE) {
	if ((sa->array[n] = stringNew(string)) == NULL)
	    return ERROR_INT("cstring not made", procName, 1);
    }
    else   /* insert */
	sa->array[n] = string;
    sa->n++;

    return 0;
}


/*!
 *  sarrayExtendArray()
 *
 *      Input:  sarray
 *      Return: 0 if OK, 1 on error
 */
l_int32
sarrayExtendArray(SARRAY  *sa)
{
    PROCNAME("sarrayExtendArray");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);

    if ((sa->array = (char **)reallocNew((void **)&sa->array,
                              sizeof(l_intptr_t) * sa->nalloc,
                              2 * sizeof(l_intptr_t) * sa->nalloc)) == NULL)
	    return ERROR_INT("new ptr array not returned", procName, 1);

    sa->nalloc *= 2;
    return 0;
}


/*!
 *  sarrayRemoveString()
 *
 *      Input:  sarray
 *              index (of string within sarray)
 *      Return: removed string, or null on error
 */
char *
sarrayRemoveString(SARRAY  *sa,
                   l_int32  index)
{
char    *string;
char   **array;
l_int32  i, n, nalloc;

    PROCNAME("sarrayRemoveString");

    if (!sa)
        return (char *)ERROR_PTR("sa not defined", procName, NULL);
    
    if ((array = sarrayGetArray(sa, &nalloc, &n)) == NULL)
        return (char *)ERROR_PTR("array not returned", procName, NULL);

    if (index < 0 || index >= n)
        return (char *)ERROR_PTR("array index out of bounds", procName, NULL);

    string = array[index];

	/* If removed string is not at end of array, shift
	 * to fill in, maintaining original ordering.
	 * Note: if we didn't care about the order, we could
	 * put the last string array[n - 1] directly into the hole.  */
    for (i = index; i < n - 1; i++)
	array[i] = array[i + 1];

    sa->n--;
    return string;
}


/*----------------------------------------------------------------------*
 *                               Accessors                              *
 *----------------------------------------------------------------------*/
/*!
 *  sarrayGetCount()
 *
 *      Input:  sarray
 *      Return: count, or 0 if no strings or on error
 */
l_int32
sarrayGetCount(SARRAY  *sa)
{
    PROCNAME("sarrayGetCount");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 0);
    return sa->n;
}
	

/*!
 *  sarrayGetArray()
 *
 *      Input:  sarray
 *              &nalloc  (<return> number allocated string ptrs)
 *              &n  (<return> number allocated strings)
 *      Return: ptr to string array, or null on error
 *
 *  Note: returned array is not a copy, so caller must not destroy it!
 */
char **
sarrayGetArray(SARRAY   *sa,
               l_int32  *pnalloc,
	       l_int32  *pn)
{
char  **array;

    PROCNAME("sarrayGetArray");

    if (!sa)
        return (char **)ERROR_PTR("sa not defined", procName, NULL);

    array = sa->array;
    *pnalloc = sa->nalloc;
    *pn = sa->n;

    return array;
}


/*!
 *  sarrayGetString()
 *
 *      Input:  sarray
 *              index   (to the index-th string)
 *              copyflag  (0 for string itself; 1 for a copy)
 *      Return: string, or null on error
 */
char *
sarrayGetString(SARRAY  *sa,
                l_int32  index,
                l_int32  copyflag)
{
    PROCNAME("sarrayGetString");

    if (!sa)
        return (char *)ERROR_PTR("sa not defined", procName, NULL);

    if (index < 0 || index >= sa->n)
        return (char *)ERROR_PTR("index not valid", procName, NULL);

    if (copyflag == 1)
	return stringNew(sa->array[index]);
    else
	return sa->array[index];
}




/*----------------------------------------------------------------------*
 *                      Conversion to string                           *
 *----------------------------------------------------------------------*/
/*!
 *  sarrayToString()
 *
 *      Input:  sarray
 *              addnlflag (flag: 0 adds nothing to each substring
 *                               1 adds '\n' to each substring
 *                               2 adds ' ' to each substring)
 *      Return: dest string, or null on error
 *
 *  Action: concatenates all the strings in the sarray, preserving
 *  all white space.  If addnlflag != 0, adds either a '\n' or
 *  a ' ' after each substring.
 *
 *  N.B.  This function was not implemented as:
 *            for (i = 0; i < n; i++)
 *   	       strcat(dest, sarrayGetString(sa, i, 0));
 *        Do you see why?
 */
char *
sarrayToString(SARRAY  *sa,
               l_int32  addnlflag)
{
    PROCNAME("sarrayToString");

    if (!sa)
        return (char *)ERROR_PTR("sa not defined", procName, NULL);

    return sarrayToStringRange(sa, 0, 0, addnlflag);
}


/*!
 *  sarrayToStringRange()
 *
 *      Input: sarray
 *             first  (index of first string to use; starts with 0)
 *             nstrings (number of strings to append into the result; use
 *                       0 to append to the end of the sarray)
 *             addnlflag (flag: 0 adds nothing to each substring
 *                              1 adds '\n' to each substring
 *                              2 adds ' ' to each substring)
 *      Return: dest string, or null on error
 *
 *  Action: concatenates the specified strings in the sarray, preserving
 *  all white space.  If addnlflag != 0, adds either  a '\n' or
 *  a ' ' after each substring.
 */
char *
sarrayToStringRange(SARRAY  *sa,
                    l_int32  first,
                    l_int32  nstrings,
                    l_int32  addnlflag)
{
char    *dest, *src;
l_int32  n, i, last, size, index, len;

    PROCNAME("sarrayToStringRange");

    if (!sa)
        return (char *)ERROR_PTR("sa not defined", procName, NULL);

    n = sarrayGetCount(sa);
    if (first < 0 || first >= n)
        return (char *)ERROR_PTR("first not valid", procName, NULL);
    if (nstrings == 0 || (nstrings > n - first))
        nstrings = n - first;  /* no overflow */
    last = first + nstrings - 1;

    size = 0;
    for (i = first; i <= last; i++) 
	size += strlen(sarrayGetString(sa, i, 0)) + 2;

    if ((dest = (char *)CALLOC(size + 1, sizeof(char))) == NULL)
        return (char *)ERROR_PTR("dest not made", procName, NULL);

    index = 0;
    for (i = first; i <= last; i++) {
	src = sa->array[i];
	len = strlen(src);
	memcpy(dest + index, src, len);
	index += len;
	if (addnlflag == 1) {
	    dest[index] = '\n';
	    index++;
	}
	else if (addnlflag == 2) {
	    dest[index] = ' ';
	    index++;
	}
    }

    return dest;
}


/*----------------------------------------------------------------------*
 *                      Concatenate 2 sarrays                           *
 *----------------------------------------------------------------------*/
/*!
 *  sarrayConcatenate()
 *
 *      Input:  sarray1  (to be added to)
 *              sarray2  (append to sarray1)
 *      Return: 0 if OK, 1 on error
 *
 *  Note: new strings are added to sarray1, and sarray2 is not affected.
 */
l_int32
sarrayConcatenate(SARRAY  *sa1,
                  SARRAY  *sa2)
{
char    *string;
l_int32  n, i;

    PROCNAME("sarrayConcatenate");

    if (!sa1)
        return ERROR_INT("sa1 not defined", procName, 1);
    if (!sa2)
        return ERROR_INT("sa2 not defined", procName, 1);

    n = sarrayGetCount(sa2);
    for (i = 0; i < n; i++) {
        string = sarrayGetString(sa2, i, 0);
	sarrayAddString(sa1, string, 1);
    }

    return 0;
}


/*----------------------------------------------------------------------*
 *                   Convert word sarray to line sarray                 *
 *----------------------------------------------------------------------*/
/*! 
 *  sarrayConvertWordsToLines()
 *
 *      Input:  sa  (sa of individual words)
 *              linesize  (max num of chars in each line)
 *      Return: saout (sa of formatted lines), or null on error
 *
 *  This is useful for re-typesetting text to a specific maximum
 *  line length.  The individual words in the input sarray
 *  are concatenated into textlines.  An input word string of zero
 *  length is taken to be a paragraph separator.  Each time
 *  such a string is found, the current line is ended and 
 *  a new line is also produced that contains just the
 *  string of zero length ("").  When the output sarray
 *  of lines is eventually converted to a string with newlines
 *  (typically) appended to each line string, the empty
 *  strings are just converted to newlines, producing the visible
 *  paragraph separation.
 *
 *  What happens when a word is larger than linesize?
 *  We write it out as a single line anyway!  Words preceding
 *  or following this long word are placed on lines preceding
 *  or following the line with the long word.  Why this choice?
 *  Long "words" found in text documents are typically URLs, and
 *  it's often desirable not to put newlines in the middle of a URL.
 *  The text display program (e.g., text editor) will typically
 *  wrap the long "word" to fit in the window.
 */
SARRAY *
sarrayConvertWordsToLines(SARRAY  *sa,
                          l_int32  linesize)
{
char    *wd, *strl;
l_int32  n, i, len, totlen;
SARRAY  *sal, *saout;

    PROCNAME("sarrayConvertWordsToLines");

    if (!sa)
        return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);

    if ((saout = sarrayCreate(0)) == NULL)
        return (SARRAY *)ERROR_PTR("saout not defined", procName, NULL);

    n = sarrayGetCount(sa);
    totlen = 0;
    sal = NULL;
    for (i = 0; i < n; i++) {
	if (!sal) {
	    if ((sal = sarrayCreate(0)) == NULL)
		return (SARRAY *)ERROR_PTR("sal not made", procName, NULL);
	}
        wd = sarrayGetString(sa, i, 0);
	len = strlen(wd);
	if (len == 0) {  /* end of paragraph: end line & insert blank line */
	    if (totlen > 0) {
		strl = sarrayToString(sal, 2);
		sarrayAddString(saout, strl, 0);
	    }
	    sarrayAddString(saout, "", 1);
	    sarrayDestroy(&sal);
	    totlen = 0;
	}
	else if (totlen == 0 && len + 1 > linesize) {  /* long word! */
	    sarrayAddString(saout, wd, 1);  /* copy to one line */
	}
	else if (totlen + len + 1 > linesize) {  /* end line & start new one */
	    strl = sarrayToString(sal, 2);
	    sarrayAddString(saout, strl, 0);
	    sarrayDestroy(&sal);
	    if ((sal = sarrayCreate(0)) == NULL)
		return (SARRAY *)ERROR_PTR("sal not made", procName, NULL);
	    sarrayAddString(sal, wd, 1);
	    totlen = len + 1;
	}
	else {   /* add to current line */
	    sarrayAddString(sal, wd, 1);
	    totlen += len + 1;
	}
    }
    if (totlen > 0) {   /* didn't end with blank line; output last line */
	strl = sarrayToString(sal, 2);
	sarrayAddString(saout, strl, 0);
	sarrayDestroy(&sal);
    }

    return saout;

}


/*----------------------------------------------------------------------*
 *                    Split string on separator list                    *
 *----------------------------------------------------------------------*/
/*
 *  sarraySplitString()
 *
 *      Input:  sa (to append to; typically empty initially)
 *              str (string to split)
 *              separators (characters that split input string)
 *      Return: 0 if OK, 1 on error.
 */
l_int32
sarraySplitString(SARRAY      *sa,
                  const char  *str,
                  const char  *separators)
{
char    *cstr, *strptr, *substr;

    PROCNAME("sarraySplitString");

    if (!sa)
	return ERROR_INT("sa not defined", procName, 1);
    if (!str)
	return ERROR_INT("str not defined", procName, 1);
    if (!separators)
	return ERROR_INT("separators not defined", procName, 1);

	/* strtok inserts nulls; make a copy for strtok to munge */
    if ((cstr = stringNew(str)) == NULL)
	return ERROR_INT("cstr not made", procName, 1);

    strptr = strtok(cstr, separators);
    if (strptr) {
	if ((substr = stringNew(strptr)) == NULL)
	    return ERROR_INT("substr not made", procName, 1);
/*	fprintf(stderr, "substr = %s\n", substr); */
	sarrayAddString(sa, substr, 0);
    }

    while ((strptr = strtok(NULL, separators))) {
	if ((substr = stringNew(strptr)) == NULL)
	    return ERROR_INT("substr not made", procName, 1);
/*	fprintf(stderr, "substr = %s\n", substr); */
	sarrayAddString(sa, substr, 0);
    }

    FREE((void *)cstr);
    return 0;
}


/*----------------------------------------------------------------------*
 *                                   Sort                               *
 *----------------------------------------------------------------------*/
/*!
 *  sarraySort()
 *
 *      Input:  saout (output sarray; can be NULL or equal to sain)
 *              sain (input sarray)
 *              sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
 *      Return: saout (output sarray, sorted by ascii value), or null on error
 *
 *  Notes:
 *      (1) Set saout = sain for in-place; otherwise, set naout = NULL.
 *      (2) Shell sort, modified from K&R, 2nd edition, p.62.
 *          Slow but simple O(n logn) sort.
 */
SARRAY *
sarraySort(SARRAY  *saout,
           SARRAY  *sain,
           l_int32  sortorder)
{
char   **array;
char    *tmp;
l_int32  n, i, j, gap;

    PROCNAME("sarraySort");

    if (!sain)
	return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);

        /* make saout if necessary; otherwise do in-place */
    if (!saout)
        saout = sarrayCopy(sain);
    else if (sain != saout)
	return (SARRAY *)ERROR_PTR("invalid: not in-place", procName, NULL);
    array = saout->array;  /* operate directly on the array */
    n = sarrayGetCount(saout);

        /* shell sort */
    for (gap = n/2; gap > 0; gap = gap / 2) {
        for (i = gap; i < n; i++) {
            for (j = i - gap; j >= 0; j -= gap) {
                if ((sortorder == L_SORT_INCREASING &&
                     stringCompareLexical(array[j], array[j + gap])) ||
                    (sortorder == L_SORT_DECREASING &&
                     stringCompareLexical(array[j + gap], array[j])))
                {
                    tmp = array[j];
                    array[j] = array[j + gap];
                    array[j + gap] = tmp;
                }
            }
        }
    }

    return saout;
}


/*!
 *  stringCompareLexical()
 *
 *      Input:  str1
 *              str2
 *      Return: 1 if str1 > str2 (lexically); 0 otherwise
 *
 *  Note: If the lexical values are identical, return a 0, to
 *        indicate that no swapping is required to sort the strings.
 */
l_int32
stringCompareLexical(const char *str1,
                     const char *str2)
{
l_int32  i, len1, len2, len;

    PROCNAME("sarrayCompareLexical");

    if (!str1)
	return ERROR_INT("str1 not defined", procName, 1);
    if (!str2)
	return ERROR_INT("str2 not defined", procName, 1);

    len1 = strlen(str1);
    len2 = strlen(str2);
    len = L_MIN(len1, len2);

    for (i = 0; i < len; i++) {
        if (str1[i] == str2[i])
            continue;
        if (str1[i] > str2[i])
            return 1;
        else
            return 0;
    }

    if (len1 > len2)
        return 1;
    else
        return 0;
}


/*----------------------------------------------------------------------*
 *                           Serialize for I/O                          *
 *----------------------------------------------------------------------*/
/*!
 *  sarrayRead()
 *
 *      Input:  filename
 *      Return: sarray, or null on error
 */
SARRAY *
sarrayRead(const char  *filename)
{
FILE    *fp;
SARRAY  *sa;

    PROCNAME("sarrayRead");

    if (!filename)
	return (SARRAY *)ERROR_PTR("filename not defined", procName, NULL);

    if ((fp = fopenReadStream(filename)) == NULL)
	return (SARRAY *)ERROR_PTR("stream not opened", procName, NULL);

    if ((sa = sarrayReadStream(fp)) == NULL) {
	fclose(fp);
	return (SARRAY *)ERROR_PTR("sa not read", procName, NULL);
    }

    fclose(fp);
    return sa;
}


/*!
 *  sarrayReadStream()
 *
 *      Input:  stream
 *      Return: sarray, or null on error
 */
SARRAY *
sarrayReadStream(FILE  *fp)
{
char    *stringbuf, *string;
l_int32  i, j, n, size;
SARRAY  *sa;

    PROCNAME("sarrayReadStream");

    if (!fp)
	return (SARRAY *)ERROR_PTR("stream not defined", procName, NULL);

    if (fscanf(fp, "\nsarray: number of strings = %d\n", &n) != 1)
	return (SARRAY *)ERROR_PTR("not an sarray file", procName, NULL);

    if ((sa = sarrayCreate(n)) == NULL)
	return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
    
    if ((stringbuf = (char *)CALLOC(BUFFER_SIZE + 1, sizeof(char))) == NULL)
	return (SARRAY *)ERROR_PTR("stringbuf not made", procName, NULL);

    for (i = 0; i < n; i++) {
	fgets(stringbuf, BUFFER_SIZE, fp);
	for (j = 0; j < BUFFER_SIZE; j++) {
	    if (stringbuf[j] == ':')
		break;
	}
	string = stringbuf + j + 3;   /* beginning of stored string */
	size = strlen(string);
	string[size - 1] = '\0';   /* remove \n added by sarrayWriteStream */
	sarrayAddString(sa, string, 1);   /* copy it in */
    }
    fscanf(fp, "\n");

    FREE((void *)stringbuf);
    return sa;
}


/*!
 *  sarrayWrite()
 *
 *      Input:  filename
 *              sarray
 *      Return: 0 if OK; 1 on error
 */
l_int32
sarrayWrite(const char  *filename,
	    SARRAY      *sa)
{
FILE  *fp;

    PROCNAME("sarrayWrite");

    if (!filename)
	return ERROR_INT("filename not defined", procName, 1);
    if (!sa)
	return ERROR_INT("sa not defined", procName, 1);

    if ((fp = fopen(filename, "w")) == NULL)
	return ERROR_INT("stream not opened", procName, 1);

    if (sarrayWriteStream(fp, sa))
	return ERROR_INT("sa not written to stream", procName, 1);
    
    fclose(fp);
    return 0;
}


/*!
 *  sarrayWriteStream()
 *
 *      Input:  stream
 *              sarray
 *      Returns 0 if OK; 1 on error
 */
l_int32
sarrayWriteStream(FILE    *fp,
                  SARRAY  *sa)
{
l_int32  i, n;

    PROCNAME("sarrayWriteStream");

    if (!fp)
	return ERROR_INT("stream not defined", procName, 1);
    if (!sa)
	return ERROR_INT("sa not defined", procName, 1);

    n = sarrayGetCount(sa);
    fprintf(fp, "\nsarray: number of strings = %d\n", n);
    for (i = 0; i < n; i++)
	fprintf(fp, "  array[%d]:  %s\n", i, sa->array[i]);
    fprintf(fp, "\n");

    return 0;
}


/*!
 *  sarrayAppend()
 *
 *      Input:  filename
 *              sarray
 *      Return: 0 if OK; 1 on error
 */
l_int32
sarrayAppend(const char  *filename,
	     SARRAY      *sa)
{
FILE  *fp;

    PROCNAME("sarrayAppend");

    if (!filename)
	return ERROR_INT("filename not defined", procName, 1);
    if (!sa)
	return ERROR_INT("sa not defined", procName, 1);

    if ((fp = fopen(filename, "a")) == NULL)
	return ERROR_INT("stream not opened", procName, 1);

    if (sarrayWriteStream(fp, sa))
	return ERROR_INT("sa not appended to stream", procName, 1);
    
    fclose(fp);
    return 0;
}

