
/* conv.c */

/* This file provides functions to convert single characters and
 * character strings from general Latin-1 to upper case, lower case and
 * ascii.  It also provides functions to remove middots ('\267').
 *
 */

#include "conv.h"
#include "gen.h"

inline char uc1   ( const char c ) {
  const int n = (int)c & 0377;
  return (char)(
    n <  'a'  ? n         :
    n <= 'z'  ? n & ~0040 :
    n <= 0336 ? n         :
    n <= 0337 ? 'S'       : // LATIN SMALL LETTER SHARP S
    n <= 0366 ? n & ~0040 :
    n <= 0367 ? n         : // DIVISION SIGN
    n <= 0376 ? n & ~0040 :
    n <= 0377 ? 'Y'       : // LATIN SMALL LETTER Y WITH DIAERESIS
      n
  );
}

inline char lc1   ( const char c ) {
  const int n = (int)c & 0377;
  return (char)(
    n <  'A'  ? n         :
    n <= 'Z'  ? n |  0040 :
    n <= 0277 ? n         :
    n <= 0326 ? n |  0040 :
    n <= 0327 ? n         : // MULTIPLICATION SIGN
    n <= 0336 ? n |  0040 :
    n <= 0377 ? n         :
      n
  );
}

/* The mapping of the following Latin-1 to ascii transliteration is
 * adapted from that of F. Pinard's and S. Vila's `recode_3.6-6' to fit
 * debram(1)'s needs.  Notice that THORN is transliterated to Z not T
 * for proper sorting.  Notice also that the middot is transliterated as
 * '.' not ' ' or '*'.  (Such a simple one-to-one transliteration as
 * this one should perhaps be added to the standard C library.  The
 * author lacks the time needed to bring this to pass; if you are
 * feeling ambitious, though, you may wish to do it.)  */
inline char unlat1( const char c ) {
  const int n = (int)c & 0377;
  return (char)(
    n <  0200 ? n         :
    n <= 0240 ? ' '       :
    n <= 0241 ? '!'       : // INVERTED EXCLAMATION MARK
    n <= 0242 ? 'c'       : // CENT SIGN
    n <= 0243 ? '#'       : // POUND SIGN
    n <= 0244 ? 'E'       : // CURRENCY SIGN
    n <= 0245 ? 'Y'       : // YEN SIGN
    n <= 0246 ? '|'       : // BROKEN BAR
    n <= 0247 ? 'S'       : // SECTION SIGN
    n <= 0250 ? '"'       : // DIAERESIS
    n <= 0251 ? 'c'       : // COPYRIGHT SIGN
    n <= 0252 ? 'a'       : // FEMININE ORDINAL INDICATOR
    n <= 0253 ? '<'       : // LEFT-POINTING DOUBLE ANGLE QUOT. MARK
    n <= 0254 ? '!'       : // NOT SIGN
    n <= 0255 ? '-'       : // SOFT HYPHEN
    n <= 0256 ? 'R'       : // REGISTERED SIGN
    n <= 0257 ? '-'       : // MACRON
    n <= 0260 ? '^'       : // DEGREE SIGN
    n <= 0261 ? '+'       : // PLUS-MINUS SIGN
    n <= 0263 ? '^'       : // SUPERSCRIPTS TWO AND THREE
    n <= 0264 ? '\''      : // ACUTE ACCENT
    n <= 0265 ? 'u'       : // MICRO SIGN
    n <= 0266 ? 'P'       : // PILCROW SIGN
    n <= 0267 ? '.'       : // MIDDLE DOT
    n <= 0270 ? ','       : // CEDILLA
    n <= 0271 ? '^'       : // SUPERSCRIPT ONE
    n <= 0272 ? 'o'       : // MASCULINE ORDINAL INDICATOR
    n <= 0273 ? '>'       : // RIGHT-POINTING DOUBLE ANGLE QUOT. MARK
    n <= 0276 ? '/'       : // VULGAR FRACTIONS 1/4, 1/2 AND 3/4
    n <= 0277 ? '?'       : // INVERTED QUESTION MARK
    n <= 0306 ? 'A'       :
    n <= 0307 ? 'C'       :
    n <= 0313 ? 'E'       :
    n <= 0317 ? 'I'       :
    n <= 0320 ? 'D'       : // ETH
    n <= 0321 ? 'N'       :
    n <= 0326 ? 'O'       :
    n <= 0327 ? 'x'       : // MULTIPLICATION SIGN
    n <= 0330 ? 'O'       :
    n <= 0334 ? 'U'       :
    n <= 0335 ? 'Y'       :
    n <= 0336 ? 'Z'       : // THORN
    n <= 0337 ? 's'       : // SHARP S
    n <= 0346 ? 'a'       :
    n <= 0347 ? 'c'       :
    n <= 0353 ? 'e'       :
    n <= 0357 ? 'i'       :
    n <= 0360 ? 'd'       : // ETH
    n <= 0361 ? 'n'       :
    n <= 0366 ? 'o'       :
    n <= 0367 ? ':'       : // DIVISION SIGN
    n <= 0370 ? 'o'       :
    n <= 0374 ? 'u'       :
    n <= 0375 ? 'y'       :
    n <= 0376 ? 'z'       : // THORN
    n <= 0377 ? 'y'       :
      n           &  0177
  );
}

inline char undot1( const char c ) {
  return c == '\267' ? ' ' : c;
}

char *uc   ( char *s ) {
  char *const o = s;
  for ( ; *s; ++s ) *s = uc1   (*s);
  return o;
}

char *lc   ( char *s ) {
  char *const o = s;
  for ( ; *s; ++s ) *s = lc1   (*s);
  return o;
}

char *unlat( char *s ) {
  char *const o = s;
  for ( ; *s; ++s ) *s = unlat1(*s);
  return o;
}

/* Replace independent sequences of middots with spaces.  A sequence of
 * one or more middots is independent if it is bounded by spaces and/or
 * by the ends of the string.  */
char *undot( char *s ) {
  char *const o = s;
  char *p       = s;
  for ( ; *s; ++s )
    if ( p ) {
      if      ( !*s || *s == ' '    ) while ( p < s ) *p++ = ' ';
      else if (        *s != '\267' ) p = 0  ;
    }
    else   if (        *s == ' '    ) p = s+1;
  return o;
}

