/* Copyright (C) 2005 to 2010 Chris Vine

The library comprised in this file or of which this file is part is
distributed by Chris Vine under the GNU Lesser General Public
License as follows:

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public License
   as published by the Free Software Foundation; either version 2.1 of
   the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License, version 2.1, for more details.

   You should have received a copy of the GNU Lesser General Public
   License, version 2.1, along with this library (see the file LGPL.TXT
   which came with this source code package in the src/utils sub-directory);
   if not, write to the Free Software Foundation, Inc.,
   59 Temple Place - Suite 330, Boston, MA, 02111-1307, USA.

However, it is not intended that the object code of a program whose
source code instantiates a template from this file or uses macros or
inline functions (of any length) should by reason only of that
instantiation or use be subject to the restrictions of use in the GNU
Lesser General Public License.  With that in mind, the words "and
macros, inline functions and instantiations of templates (of any
length)" shall be treated as substituted for the words "and small
macros and small inline functions (ten lines or less in length)" in
the fourth paragraph of section 5 of that licence.  This does not
affect any other reason why object code may be subject to the
restrictions in that licence (nor for the avoidance of doubt does it
affect the application of section 2 of that licence to modifications
of the source code in this file).

*/

#ifndef CGU_CONVERT_H
#define CGU_CONVERT_H

#include <string>
#include <iterator>
#include <exception>

#include <glib.h>

#include <c++-gtk-utils/cgu_config.h>

namespace Cgu {

/**
 * @file convert.h
 * @brief This file contains functions for converting between
 * character sets.
 *
 * \#include <c++-gtk-utils/convert.h>
 *
 * This file contains functions for converting between character sets.
 * If you want these functions to work, you will generally have needed
 * to have set the locale in the relevant program with either
 * <em>std::locale::global(std::locale(""))</em> (from the C++
 * standard library) or <em>setlocale(LC_ALL,"")</em> (from the C
 * standard library).
 */ 

/**
 * @namespace Cgu::Utf8
 * @brief This namespace contains utilities relevant to the use of
 * UTF-8 in programs.
 *
 * \#include <c++-gtk-utils/convert.h> (for conversion and validation
 * functions)
 *
 * \#include <c++-gtk-utils/reassembler.h> (for Reassembler class)
 * @sa convert.h reassembler.h
 *
 * This namespace contains utilities relevant to the use of UTF-8 in
 * programs.  If you want these functions to work, you will generally
 * have needed to have set the locale in the relevant program with
 * either <em>std::locale::global(std::locale(""))</em> (from the C++
 * standard library) or <em>setlocale(LC_ALL,"")</em> (from the C standard
 * library).
 */ 

namespace Utf8 {

class ConversionError: public std::exception {
  gchar* message;
public:
  virtual const char* what() const throw() {return message;}
  ConversionError(GError* error) throw() {
    g_strdup_printf("Utf8::ConversionError: %s", error->message);
  }
  ~ConversionError() throw() {g_free(message);}
};

/**
 * Converts text from UTF-8 to the system's Unicode wide character
 * representation, which will be UCS-4/UTF-32 for systems with a wide
 * character size of 4 (almost all unix-like systems), and UTF-16 for
 * systems with a wide character size of 2.
 * @param input Text in valid UTF-8 format.
 * @return The input text converted to UCS-4 or UTF-16.
 * @exception Cgu::Utf8::ConversionError This exception will be thrown
 * if conversion fails because the input string is not in valid UTF-8
 * format or the system does not support wide character Unicode
 * strings.
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if memory is exhausted and the system throws in that case.
 *
 * Since 0.9.2
 */
std::wstring uniwide_from_utf8(const std::string& input);

/**
 * Converts text from the system's Unicode wide character
 * representation, which will be UCS-4/UTF-32 for systems with a wide
 * character size of 4 (almost all unix-like systems) and UTF-16 for
 * systems with a wide character size of 2, to narrow character UTF-8
 * format.
 * @param input Text in valid UCS-4 or UTF-16 format.
 * @return The input text converted to UTF-8.
 * @exception Cgu::Utf8::ConversionError This exception will be thrown
 * if conversion fails because the input string is not in valid UCS-4
 * or UTF-16 format or the system does not support wide character
 * Unicode strings.
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if memory is exhausted and the system throws in that case.
 *
 * Since 0.9.2
 */
std::string uniwide_to_utf8(const std::wstring& input);


/**
 * Converts text from UTF-8 to the system's wide character locale
 * representation.  For this function to work correctly, the system's
 * installed iconv() must support conversion to a generic wchar_t
 * target, but in POSIX whether it does so is implementation defined
 * (GNU's C library implemention does).  For most unix-like systems
 * the wide character representation will be Unicode (UCS-4/UTF-32 or
 * UTF-16), and where that is the case use the uniwide_from_utf8()
 * function instead, which will not rely on the generic target being
 * available.
 * @param input Text in valid UTF-8 format.
 * @return The input text converted to the system's wide character
 * locale representation.
 * @exception Cgu::Utf8::ConversionError This exception will be thrown
 * if conversion fails because the input string is not in valid UTF-8
 * format, or cannot be converted to the system's wide character
 * locale representation (eg because the input characters cannot be
 * represented by that encoding, or the system's installed iconv()
 * function does not support conversion to a generic wchar_t target).
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if memory is exhausted and the system throws in that case.
 *
 * Since 0.9.2
 */
std::wstring wide_from_utf8(const std::string& input);


/**
 * Converts text from the system's wide character locale
 * representation to UTF-8.  For this function to work correctly, the
 * system's installed iconv() must support conversion from a generic
 * wchar_t target, but in POSIX whether it does so is implementation
 * defined (GNU's C library implemention does).  For most unix-like
 * systems the wide character representation will be Unicode
 * (UCS-4/UTF-32 or UTF-16), and where that is the case use the
 * uniwide_to_utf8() function instead, which will not rely on the
 * generic target being available.
 * @param input Text in a valid wide character locale format.
 * @return The input text converted to UTF-8.
 * @exception Cgu::Utf8::ConversionError This exception will be thrown
 * if conversion fails because the input string is not in a valid wide
 * character locale format, or cannot be converted to UTF-8 (eg
 * because the system's installed iconv() function does not support
 * conversion from a generic wchar_t target).
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if memory is exhausted and the system throws in that case.
 *
 * Since 0.9.2
 */
std::string wide_to_utf8(const std::wstring& input);

/**
 * Converts text from UTF-8 to the system's filename encoding.
 * @param input Text in valid UTF-8 format.
 * @return The input text converted to filename encoding.
 * @exception Cgu::Utf8::ConversionError This exception will be thrown
 * if conversion fails because the input string is not in valid UTF-8
 * format, or cannot be converted to filename encoding (eg because the
 * input characters cannot be represented by that encoding).
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if memory is exhausted and the system throws in that case.
 * @note glib takes the system's filename encoding from the
 * environmental variables G_FILENAME_ENCODING and G_BROKEN_FILENAMES.
 * If G_BROKEN_FILENAMES is set to 1 and G_FILENAME_ENCODING is not
 * set, it will be assumed that the filename encoding is the same as
 * the locale encoding.  If G_FILENAME_ENCODING is set, then
 * G_BROKEN_FILENAMES is ignored, and filename encoding is taken from
 * the value held by G_FILENAME_ENCODING.
 *
 * Since 0.9.2
 */
std::string filename_from_utf8(const std::string& input);

/**
 * Converts text from the system's filename encoding to UTF-8.
 * @param input Text in valid filename encoding.
 * @return The input text converted to UTF-8.
 * @exception Cgu::Utf8::ConversionError This exception will be thrown
 * if conversion fails because the input string is not in valid
 * filename encoding.
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if memory is exhausted and the system throws in that case.
 * @note glib takes the system's filename encoding from the
 * environmental variables G_FILENAME_ENCODING and G_BROKEN_FILENAMES.
 * If G_BROKEN_FILENAMES is set to 1 and G_FILENAME_ENCODING is not
 * set, it will be assumed that the filename encoding is the same as
 * the locale encoding.  If G_FILENAME_ENCODING is set, then
 * G_BROKEN_FILENAMES is ignored, and filename encoding is taken from
 * the value held by G_FILENAME_ENCODING.
 *
 * Since 0.9.2
 */
std::string filename_to_utf8(const std::string& input); 

/**
 * Converts text from UTF-8 to the system's locale encoding.
 * @param input Text in valid UTF-8 format.
 * @return The input text converted to locale encoding.
 * @exception Cgu::Utf8::ConversionError This exception will be thrown
 * if conversion fails because the input string is not in valid UTF-8
 * format, or cannot be converted to locale encoding (eg because the
 * input characters cannot be represented by that encoding).
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if memory is exhausted and the system throws in that case.
 *
 * Since 0.9.2
 */
std::string locale_from_utf8(const std::string& input);

/**
 * Converts text from the system's locale encoding to UTF-8.
 * @param input Text in valid locale encoding.
 * @return The input text converted to UTF-8.
 * @exception Cgu::Utf8::ConversionError This exception will be thrown
 * if conversion fails because the input string is not in valid locale
 * encoding.
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if memory is exhausted and the system throws in that case.
 *
 * Since 0.9.2
 */
std::string locale_to_utf8(const std::string& input); 

/**
 * Indicates whether the input text comprises valid UTF-8.
 * @param text The text to be tested.
 * @return true if the input text is in valid UTF-8 format, otherwise
 * false.
 * @exception std::bad_alloc This function might throw std::bad_alloc
 * if std::string::data() might throw when memory is exhausted.
 * @note \#include <c++-gtk-utils/convert.h> for this function.
 *
 * Since 0.9.2
 */
inline bool validate(const std::string& text) {
  return g_utf8_validate(text.data(), text.size(), 0);
}

/************** Iterator class **************/

/** 
 * @class Iterator convert.h c++-gtk-utils/convert.h
 * @brief A class which will iterate through a std::string object by
 * reference to unicode characters rather than by bytes.
 * @sa Cgu::Utf8::ReverseIterator
 *
 * The Cgu::Utf8::Iterator class does the same as
 * std::string::const_iterator, except that when iterating through a
 * std::string object using the ++ and -- postfix and prefix
 * operators, it iterates by increments of whole unicode code points
 * rather than by reference to bytes.  In addition, the dereferencing
 * operator returns the whole unicode code point (a UCS-4 gunichar
 * type) rather than a char type.
 *
 * Where, as in practically all unix-like systems, sizeof(wchar_t) ==
 * 4, then the gunichar return value of the dereferencing operator can
 * be converted by a simple static_cast to the wchar_t type.  So far
 * as displaying individual code points is concerned however, it
 * should be noted that because unicode allows combining characters, a
 * unicode code point may not contain the whole representation of a
 * character as displayed.  This effect can be dealt with for all
 * characters capable of representation by Level 1 unicode (ie by
 * precomposed characters) using g_utf8_normalize() before iterating.
 * There will still however be some non-European scripts, in
 * particular some Chinese/Japanese/Korean ideograms, where
 * description of the ideogram requires more than one codepoint to be
 * finally resolved.  For these, printing individual code points
 * sequentially one by one directly to a display (say with std::wcout)
 * may or not may not have the desired result, depending on how the
 * display device (eg console) deals with that case.
 *
 * A Cgu::Utf8::Iterator only allows reading from and not writing to
 * the std::string object being iterated through.  This is because in
 * UTF-8 the representation of any one unicode code point will require
 * between 1 and 6 bytes: accordingly modifying a UTF-8 string may
 * change its length (in bytes) even though the number of unicode
 * characters stays the same.  For the same reason, this iterator is a
 * bidirectional iterator but not a random access iterator.
 *
 * The std::string object concerned should contain valid UTF-8 text.
 * If necessary, this should be checked with Cgu::Utf8::validate()
 * first.  In addition, before use, the Cgu::Utf8::Iterator object
 * must be initialized by a std::string::const_iterator or
 * std::string::iterator object pointing to the first byte of a valid
 * UTF-8 character in the string (or by another Cgu::Utf8::Iterator
 * object or by a Cgu::Utf8::ReverseIterator object), and iteration
 * will begin at the point of initialization: therefore, assuming the
 * string contains valid UTF-8 text, passing std::string::begin() to a
 * Cgu::Utf8::Iterator object will always be safe.  Initialization by
 * std::string::end() is also valid if the first interation is
 * backwards with the -- operator.  This initialization can be done
 * either in the constructor or by assignment.  Comparison operators
 * ==, !=, <, <=, > and >= are provided enabling the position of
 * Cgu::Utf8::Iterator objects to be compared with each other or with
 * std::string::const_iterator and std::string::iterator objects.
 *
 * This is an example:
 * @code
 * using namespace Cgu;
 *
 * std::wstring wide_str(L"ßøǿón");
 * std::string narrow_str(Utf8::uniwide_to_utf8(wide_str));
 *
 * Utf8::Iterator iter;
 * for (iter = narrow_str.begin();
 *      iter != narrow_str.end();
 *      ++iter)
 *   std::wcout << static_cast<wchar_t>(*iter) << std::endl;
 * @endcode
 *
 * This class assumes in using g_utf8_next_char(), g_utf8_prev_char()
 * and g_utf8_get_char() that the std::string object keeps its
 * internal string in contiguous storage.  This is required by the
 * draft C++0x standard, but not formally by C++98/C++03.  However,
 * known implementations of std::string in fact store the string
 * contiguously, and given the new draft standard it is reasonable to
 * assume that that will always be the case even before the latest
 * standard is promulgated.
 */ 

class ReverseIterator;

class Iterator {
public:
  typedef gunichar value_type;
  typedef gunichar reference;  // read only
  typedef void pointer;        // read only
  typedef std::string::difference_type difference_type;
  typedef std::bidirectional_iterator_tag iterator_category;

private:  
  std::string::const_iterator pos;
public:

/**
 * Increments the iterator so that it moves from the beginning of the
 * current UTF-8 character to the beginning of the next UTF-8
 * character.  It is a prefix operator.  It will not throw.
 * @return A reference to the iterator in its new position.
 *
 * Since 1.0.1
 */
  Iterator& operator++();

/**
 * Increments the iterator so that it moves from the beginning of the
 * current UTF-8 character to the beginning of the next UTF-8
 * character.  It is a postfix operator.  It will not throw provided
 * that copy constructing and assigning a std::string::const_iterator
 * object does not throw, as it will not in any sane implementation.
 * @return A copy of the iterator in its former position.
 *
 * Since 1.0.1
 */
  Iterator operator++(int);

/**
 * Decrements the iterator so that it moves from the beginning of the
 * current UTF-8 character to the beginning of the previous UTF-8
 * character.  It is a prefix operator.  It will not throw.
 * @return A reference to the iterator in its new position.
 *
 * Since 1.0.1
 */
  Iterator& operator--();

/**
 * Decrements the iterator so that it moves from the beginning of the
 * current UTF-8 character to the beginning of the previous UTF-8
 * character.  It is a postfix operator.  It will not throw provided
 * that copy constructing and assigning a std::string::const_iterator
 * object does not throw, as it will not in any sane implementation.
 * @return A copy of the iterator in its former position.
 *
 * Since 1.0.1
 */
  Iterator operator--(int);

/**
 * Assigns a std::string::const_iterator object to this object.  It
 * should point to the beginning of a UTF-8 character (eg
 * std::string::begin()) or to std::string::end().  It will not throw
 * provided assigning a std::string::const_iterator object does not
 * throw, as it will not in any sane implementation.
 * @param iter The std::string::const_iterator.
 * @return A reference to this Cgu::Utf8::Iterator object after
 * assignment.
 *
 * Since 1.0.1
 */
  Iterator& operator=(const std::string::const_iterator& iter) {pos = iter; return *this;}

/**
 * Assigns a std::string::iterator object to this object.  It should
 * point to the beginning of a UTF-8 character (eg
 * std::string::begin()) or to std::string::end().  It will not throw
 * provided assigning a std::string::const_iterator object does not
 * throw, as it will not in any sane implementation.
 * @param iter The std::string::iterator.
 * @return A reference to this Cgu::Utf8::Iterator object after
 * assignment.
 *
 * Since 1.0.1
 */
  Iterator& operator=(const std::string::iterator& iter) {pos = iter; return *this;}

/**
 * Assigns a Cgu::Utf8::Iterator object to this object.  It will not
 * throw provided assigning a std::string::const_iterator object does
 * not throw, as it will not in any sane implementation.
 * @param iter The iterator.
 * @return A reference to this Cgu::Utf8::Iterator object after
 * assignment.
 *
 * Since 1.0.1
 */
  Iterator& operator=(const Iterator& iter) {pos = iter.pos; return *this;}

/**
 * Assigns a Cgu::Utf8::ReverseIterator object to this object, so that
 * this iterator adopts the same physical position (but the logical
 * position will be offset to the following UTF-8 character).  It will
 * not throw provided assigning a std::string::const_iterator object
 * does not throw, as it will not in any sane implementation.
 * @param iter The iterator.
 * @return A reference to this Cgu::Utf8::Iterator object after
 * assignment.
 *
 * Since 1.0.1
 */
  Iterator& operator=(const ReverseIterator& iter);

/**
 * The dereference operator.
 * @return A 32-bit gunichar object containing the whole unicode code
 * point which is currently represented by this iterator.  It will not
 * throw.
 *
 * Since 1.0.1
 */
  Iterator::value_type operator*() const {return g_utf8_get_char(&(*pos));}

/**
 * @return The current underlying std::string::const_iterator kept by
 * this iterator.  Once this iterator has been correctly initialized,
 * that will point to the beginning of the UTF-8 character currently
 * represented by this iterator or to std::string::end().  It will not
 * throw provided assigning a std::string::const_iterator object does
 * not throw, as it will not in any sane implementation.
 *
 * Since 1.0.1
 */
  std::string::const_iterator base() const {return pos;}

/**
 * Constructs this iterator and initialises it with a
 * std::string::const_iterator object.  It should point to the
 * beginning of a UTF-8 character (eg std::string::begin()) or to
 * std::string::end().  It will not throw provided that copy
 * constructing a std::string::const_iterator object does not throw,
 * as it will not in any sane implementation.  This is a type
 * conversion constructor (it is not marked explicit) so that it can
 * be used with Cgu::Utf8::Iterator comparison operators to compare
 * the position of Cgu::Utf8::Iterator with
 * std::string::const_iterator objects.
 * @param iter The std::string::const_iterator.
 *
 * Since 1.0.1
 */
  Iterator(const std::string::const_iterator& iter): pos(iter) {}

/**
 * Constructs this iterator and initialises it with a
 * std::string::iterator object.  It should point to the beginning of
 * a UTF-8 character (eg std::string::begin()) or to
 * std::string::end().  It will not throw provided that copy
 * constructing a std::string::const_iterator object does not throw,
 * as it will not in any sane implementation.  This is a type
 * conversion constructor (it is not marked explicit) so that it can
 * be used with Cgu::Utf8::Iterator comparison operators to compare
 * the position of Cgu::Utf8::Iterator with std::string::iterator
 * objects.
 * @param iter The std::string::iterator.
 *
 * Since 1.0.1
 */
  Iterator(const std::string::iterator& iter): pos(iter) {}

/**
 * Constructs this iterator and initialises it with another
 * Cgu::Utf8::Iterator object.  It will not throw provided that copy
 * constructing a std::string::const_iterator object does not throw,
 * as it will not in any sane implementation.
 * @param iter The iterator.
 *
 * Since 1.0.1
 */
  Iterator(const Iterator& iter): pos(iter.pos) {}

/**
 * Constructs this iterator and initialises it with a
 * Cgu::Utf8::ReverseIterator object, so that this iterator adopts the
 * same physical position (but the logical position will be offset to
 * the following UTF-8 character).  It will not throw provided that
 * copy constructing a std::string::const_iterator object does not
 * throw, as it will not in any sane implementation.
 * @param iter The iterator.
 *
 * Since 1.0.1
 */
  explicit Iterator(const ReverseIterator& iter);

/**
 * The default constructor will not throw.
 *
 * Since 1.0.1
 */
  Iterator() {}

/* Only has effect if --with-glib-memory-slices-compat or
 * --with-glib-memory-slices-no-compat option picked */
  CGU_GLIB_MEMORY_SLICES_FUNCS
};

inline Iterator& Iterator::operator++() {
  const std::string::value_type* tmp = &(*pos);
  // using g_utf8_next_char is safe even when pos points to the last character -
  // that macro calls up the g_utf8_skip look-up table rather than attempting to
  // read the following character, so we can safely iterate to std::string::end()
  pos += g_utf8_next_char(tmp) - tmp;
  return *this;
}

inline Iterator Iterator::operator++(int) {
  Iterator tmp(*this);
  ++(*this);
  return tmp;
}
 
inline Iterator& Iterator::operator--() {
  // we might be iterating from std::string::end() so we need
  // to decrement before dereferencing and then increment again
  const std::string::value_type* tmp = &(*(pos-1));
  ++tmp;
  pos -= tmp - g_utf8_prev_char(tmp);
  return *this;
}

inline Iterator Iterator::operator--(int) {
  Iterator tmp(*this);
  --(*this);
  return tmp;
}

/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
inline bool operator==(const Iterator& iter1, const Iterator& iter2) {
  return (iter1.base() == iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
inline bool operator!=(const Iterator& iter1, const Iterator& iter2) {
  return (iter1.base() != iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
inline bool operator<(const Iterator& iter1, const Iterator& iter2) {
  return (iter1.base() < iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
inline bool operator<=(const Iterator& iter1, const Iterator& iter2) {
  return (iter1.base() <= iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
inline bool operator>(const Iterator& iter1, const Iterator& iter2) {
  return (iter1.base() > iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
inline bool operator>=(const Iterator& iter1, const Iterator& iter2) {
  return (iter1.base() >= iter2.base());
}
 
/************** ReverseIterator class **************/

/**
 * @class ReverseIterator convert.h c++-gtk-utils/convert.h
 * @brief A class which will iterate in reverse through a std::string
 * object by reference to unicode characters rather than by bytes.
 * @sa Cgu::Utf8::Iterator
 *
 * The Cgu::Utf8::ReverseIterator class does the same as
 * std::string::const_reverse_iterator, except that when iterating
 * through a std::string object using the ++ and -- postfix and prefix
 * operators, it iterates by increments of whole unicode code points
 * rather than by reference to bytes.  In addition, the dereferencing
 * operator returns the whole unicode code point (a UCS-4 gunichar
 * type) rather than a char type.
 *
 * Before use, the Cgu::Utf8::ReverseIterator object must be
 * initialized by a std::string::const_reverse_iterator or
 * std::string::reverse_iterator object representing the first byte of
 * a valid UTF-8 character in the string (or by another
 * Cgu::Utf8::ReverseIterator object or by a Cgu::Utf8::Iterator
 * object): so assuming the string contains valid UTF-8 text, it is
 * always valid to initialise a Cgu::Utf8::ReverseIterator with
 * std::string::rbegin().  Initialization by std::string::rend() is
 * also valid if the first interation is backwards with the --
 * operator.  This initialization can be done either in the
 * constructor or by assignment.  Comparison operators ==, !=, <, <=,
 * > and >= are provided enabling the position of
 * Cgu::Utf8::ReverseIterator objects to be compared with each other
 * or with std::string::const_reverse_iterator and
 * std::string::reverse_iterator objects.
 *
 * This is an example:
 * @code
 * using namespace Cgu;
 *
 * std::wstring wide_str(L"ßøǿón");
 * std::string narrow_str(Utf8::uniwide_to_utf8(wide_str));
 *
 * Utf8::ReverseIterator iter;
 * for (iter = narrow_str.rbegin();
 *      iter != narrow_str.rend();
 *      ++iter)
 *   std::wcout << static_cast<wchar_t>(*iter) << std::endl;
 * @endcode
 * 
 * For further information on its use, see the Utf8::Iterator
 * documentation.
 */

class ReverseIterator {
public:
  typedef gunichar value_type;
  typedef gunichar reference;  // read only
  typedef void pointer;        // read only
  typedef std::string::difference_type difference_type;
  typedef std::bidirectional_iterator_tag iterator_category;

private:  
  std::string::const_iterator pos;
  // we use cache to make iterating and then dereferencing more efficient
  mutable std::string::const_iterator cache;
public:

/**
 * Increments the iterator in the reverse direction so that it moves
 * from the beginning of the current UTF-8 character to the beginning
 * of the previous UTF-8 character in the std::string object
 * concerned.  It is a prefix operator.  It will not throw provided
 * assigning a std::string::const_iterator object does not throw, as
 * it will not in any sane implementation.
 * @return A reference to the iterator in its new position
 *
 * Since 1.0.1
 */
  ReverseIterator& operator++();

/**
 * Increments the iterator in the reverse direction so that it moves
 * from the beginning of the current UTF-8 character to the beginning
 * of the previous UTF-8 character in the std::string object
 * concerned.  It is a postfix operator.  It will not throw provided
 * that copy constructing and assigning a std::string::const_iterator
 * object does not throw, as it will not in any sane implementation.
 * @return A copy of the iterator in its former position
 *
 * Since 1.0.1
 */
  ReverseIterator operator++(int);

/**
 * Decrements the iterator in the reverse direction so that it moves
 * from the beginning of the current UTF-8 character to the beginning
 * of the following UTF-8 character in the std::string object
 * concerned.  It is a prefix operator.  It will not throw provided
 * assigning a std::string::const_iterator object does not throw, as
 * it will not in any sane implementation.
 * @return A reference to the iterator in its new position
 *
 * Since 1.0.1
 */
  ReverseIterator& operator--();

/**
 * Decrements the iterator in the reverse direction so that it moves
 * from the beginning of the current UTF-8 character to the beginning
 * of the following UTF-8 character in the std::string object
 * concerned.  It is a postfix operator.  It will not throw provided
 * that copy constructing and assigning a std::string::const_iterator
 * object does not throw, as it will not in any sane implementation.
 * @return A copy of the iterator in its former position
 *
 * Since 1.0.1
 */
  ReverseIterator operator--(int);

/**
 * Assigns a std::string::const_reverse_iterator object to this
 * object.  It should represent the beginning of a UTF-8 character (eg
 * std::string::rbegin()) or comprise std::string::rend().  It will
 * not throw provided assigning a std::string::const_iterator object
 * does not throw, as it will not in any sane implementation.
 * @param iter The const_reverse_iterator.
 * @return A reference to this Cgu::Utf8::ReverseIterator object after
 * assignment.
 *
 * Since 1.0.1
 */
  ReverseIterator& operator=(const std::string::const_reverse_iterator& iter) {pos = iter.base(); cache = pos; return *this;}

/**
 * Assigns a std::string::reverse_iterator object to this object.  It
 * should represent the beginning of a UTF-8 character (eg
 * std::string::rbegin()) or comprise std::string::rend().  It will
 * not throw provided assigning a std::string::const_iterator object
 * does not throw, as it will not in any sane implementation.
 * @param iter The reverse_iterator.
 * @return A reference to this Cgu::Utf8::ReverseIterator object after
 * assignment.
 *
 * Since 1.0.1
 */
  ReverseIterator& operator=(const std::string::reverse_iterator& iter) {pos = iter.base(); cache = pos; return *this;}

/**
 * Assigns a Cgu::Utf8::ReverseIterator object to this object.  It
 * will not throw provided assigning a std::string::const_iterator
 * object does not throw, as it will not in any sane implementation.
 * @param iter The iterator.
 * @return A reference to this Cgu::Utf8::ReverseIterator object after
 * assignment.
 *
 * Since 1.0.1
 */
  ReverseIterator& operator=(const ReverseIterator& iter) {pos = iter.pos; cache = iter.cache; return *this;}

/**
 * Assigns a Cgu::Utf8::Iterator object to this object, so that this
 * iterator adopts the same physical position (but the logical
 * position will be offset to the previous UTF-8 character in the
 * std::string object concerned).  It will not throw provided
 * assigning a std::string::const_iterator object does not throw, as
 * it will not in any sane implementation.
 * @param iter The iterator.
 * @return A reference to this Cgu::Utf8::ReverseIterator object after
 * assignment.
 *
 * Since 1.0.1
 */
  ReverseIterator& operator=(const Iterator& iter) {pos = iter.base(); cache = pos; return *this;}

/**
 * The dereference operator.
 * @return A 32-bit gunichar object containing the whole unicode code
 * point which is currently represented by this iterator.  It will not
 * throw.
 *
 * Since 1.0.1
 */
  ReverseIterator::value_type operator*() const;

/**
 * @return The current underlying std::string::const_iterator kept by
 * this iterator.  Once this iterator has been correctly initialized,
 * that will point to the beginning of the UTF-8 character after the
 * one currently represented by this iterator or to
 * std::string::end().  It will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
  std::string::const_iterator base() const {return pos;}

/**
 * Constructs this iterator and initialises it with a
 * std::string::const_reverse_iterator object.  It should represent
 * the beginning of a UTF-8 character (eg std::string::rbegin()) or
 * comprise std::string::rend().  It will not throw provided that copy
 * constructing a std::string::const_iterator object does not throw,
 * as it will not in any sane implementation.  This is a type
 * conversion constructor (it is not marked explicit) so that it can
 * be used with Cgu::Utf8::ReverseIterator comparison operators to
 * compare the position of Cgu::Utf8::ReverseIterator with
 * std::string::const_reverse_iterator objects.
 * @param iter The const_reverse_iterator.
 *
 * Since 1.0.1
 */
  ReverseIterator(const std::string::const_reverse_iterator& iter): pos(iter.base()), cache(pos) {}

/**
 * Constructs this iterator and initialises it with a
 * std::string::reverse_iterator object.  It should represent the
 * beginning of a UTF-8 character (eg std::string::rbegin()) or
 * comprise std::string::rend().  It will not throw provided that copy
 * constructing a std::string::const_iterator object does not throw,
 * as it will not in any sane implementation.  This is a type
 * conversion constructor (it is not marked explicit) so that it can
 * be used with Cgu::Utf8::ReverseIterator comparison operators to
 * compare the position of Cgu::Utf8::ReverseIterator with
 * std::string::reverse_iterator objects.
 * @param iter The reverse_iterator.
 *
 * Since 1.0.1
 */
  ReverseIterator(const std::string::reverse_iterator& iter): pos(iter.base()), cache(pos) {}

/**
 * Constructs this iterator and initialises it with another
 * Cgu::Utf8::ReverseIterator object.  It will not throw provided that
 * copy constructing a std::string::const_iterator object does not
 * throw, as it will not in any sane implementation.
 * @param iter The iterator.
 *
 * Since 1.0.1
 */
  ReverseIterator(const ReverseIterator& iter): pos(iter.pos), cache(iter.cache) {}

/**
 * Constructs this iterator and initialises it with a
 * Cgu::Utf8::Iterator object, so that this iterator adopts the same
 * physical position (but the logical position will be offset to the
 * previous UTF-8 character in the std::string object concerned).  It
 * will not throw provided that copy constructing a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 * @param iter The iterator.
 *
 * Since 1.0.1
 */
  explicit ReverseIterator(const Iterator& iter): pos(iter.base()), cache(pos) {}

/**
 * The default constructor will not throw.
 *
 * Since 1.0.1
 */
  ReverseIterator() {}

/* Only has effect if --with-glib-memory-slices-compat or
 * --with-glib-memory-slices-no-compat option picked */
  CGU_GLIB_MEMORY_SLICES_FUNCS
};

inline ReverseIterator& ReverseIterator::operator++() {

  if (pos > cache) pos = cache;

  else {
    // we might be iterating from std::string::end()/std::string::rbegin() so
    // we need to decrement before dereferencing and then increment again
    const std::string::value_type* tmp = &(*(pos-1));
    ++tmp;
    pos -= tmp - g_utf8_prev_char(tmp);
  }
  return *this;
}

inline ReverseIterator ReverseIterator::operator++(int) {
  ReverseIterator tmp(*this);
  ++(*this);
  return tmp;
}
 
inline ReverseIterator& ReverseIterator::operator--() {
  cache = pos;
  const std::string::value_type* tmp = &(*pos);
  // using g_utf8_next_char is safe even when pos points to the first character -
  // that macro calls up the g_utf8_skip look-up table rather than attempting to
  // read the following character, so we can safely iterate to std::string::rbegin()
  pos += g_utf8_next_char(tmp) - tmp;
  return *this;
}

inline ReverseIterator ReverseIterator::operator--(int) {
  ReverseIterator tmp(*this);
  --(*this);
  return tmp;
}

inline ReverseIterator::value_type ReverseIterator::operator*() const {
  Iterator tmp(*this);
  --tmp;
  cache = tmp.base();
  return g_utf8_get_char(&(*(tmp.base())));
}

/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
inline bool operator==(const ReverseIterator& iter1, const ReverseIterator& iter2) {
  return (iter1.base() == iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.
 *
 * Since 1.0.1
 */
inline bool operator!=(const ReverseIterator& iter1, const ReverseIterator& iter2) {
  return (iter1.base() != iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.  Ordering is viewed from the
 * perspective of the logical operation (reverse iteration), so that
 * for example an iterator at position std::string::rbegin() is less
 * than an iterator at position std::string::rend().
 *
 * Since 1.0.1
 */
inline bool operator<(const ReverseIterator& iter1, const ReverseIterator& iter2) {
  return (iter1.base() > iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.  Ordering is viewed from the
 * perspective of the logical operation (reverse iteration), so that
 * for example an iterator at position std::string::rbegin() is less
 * than an iterator at position std::string::rend().
 *
 * Since 1.0.1
 */
inline bool operator<=(const ReverseIterator& iter1, const ReverseIterator& iter2) {
  return (iter1.base() >= iter2.base());
}
 
/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.  Ordering is viewed from the
 * perspective of the logical operation (reverse iteration), so that
 * for example an iterator at position std::string::rbegin() is less
 * than an iterator at position std::string::rend().
 *
 * Since 1.0.1
 */
inline bool operator>(const ReverseIterator& iter1, const ReverseIterator& iter2) {
  return (iter1.base() < iter2.base());
}

/**
 * The comparison operators will not throw provided assigning a
 * std::string::const_iterator object does not throw, as it will not
 * in any sane implementation.  Ordering is viewed from the
 * perspective of the logical operation (reverse iteration), so that
 * for example an iterator at position std::string::rbegin() is less
 * than an iterator at position std::string::rend().
 *
 * Since 1.0.1
 */
inline bool operator>=(const ReverseIterator& iter1, const ReverseIterator& iter2) {
  return (iter1.base() <= iter2.base());
}
 
/*** Iterator class methods which require ReverseIterator as a complete type ***/

inline Iterator& Iterator::operator=(const ReverseIterator& iter) {
  pos = iter.base();
  return *this;
}

inline Iterator::Iterator(const ReverseIterator& iter): pos(iter.base()) {}

} // namespace Utf8

} // namespace Cgu

#endif
