Logo Search packages:      
Sourcecode: uclibc version File versions  Download package

wchar.c

/*  Copyright (C) 2002, 2003, 2004     Manuel Novoa III
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public
 *  License along with this library; if not, write to the Free
 *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*  ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!
 *
 *  Besides uClibc, I'm using this code in my libc for elks, which is
 *  a 16-bit environment with a fairly limited compiler.  It would make
 *  things much easier for me if this file isn't modified unnecessarily.
 *  In particular, please put any new or replacement functions somewhere
 *  else, and modify the makefile to use your version instead.
 *  Thanks.  Manuel
 *
 *  ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION! */


/* May 23, 2002     Initial Notes:
 *
 * I'm still tweaking this stuff, but it passes the tests I've thrown
 * at it, and Erik needs it for the gcc port.  The glibc extension
 * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
 * in the glibc source.  I also need to fix the behavior of
 * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
 *
 * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
 * file on my platform (x86) show about 5-10% faster conversion speed than
 * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
 * individual mbrtowc()/wcrtomb() calls.
 *
 * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
 * as a fail-safe UTF-8 decoder appropriate for a terminal, etc.  which
 * needs to deal gracefully with whatever is sent to it.  In that mode,
 * it passes Markus Kuhn's UTF-8-test.txt stress test.  I plan to add
 * an arg to force that behavior, so the interface will be changing.
 *
 * I need to fix the error checking for 16-bit wide chars.  This isn't
 * an issue for uClibc, but may be for ELKS.  I'm currently not sure
 * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
 *
 * July 1, 2002
 *
 * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
 * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
 *    locales.
 * Enabled building of a C/POSIX-locale-only version, so full locale support
 *    no longer needs to be enabled.
 *
 * Nov 4, 2002
 *
 * Fixed a bug in _wchar_wcsntoutf8s().  Don't store wcs position if dst is NULL.
 * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
 *   order to support %ls in printf.  See comments below for details.
 * Change behaviour of wc<->mb functions when in the C locale.  Now they do
 *   a 1-1 map for the range 0x80-UCHAR_MAX.  This is for backwards compatibility
 *   and consistency with the stds requirements that a printf format string by
 *   a valid multibyte string beginning and ending in it's initial shift state.
 *
 * Nov 5, 2002
 *
 * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
 *
 * Nov 7, 2002
 *
 * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
 *   Added some size/speed optimizations and integrated it into my locale
 *   framework.  Minimally tested at the moment, but the stub C-locale
 *   version (which most people would probably be using) should be fine.
 *
 * Nov 21, 2002
 *
 * Revert the wc<->mb changes from earlier this month involving the C-locale.
 * Add a couple of ugly hacks to support *wprintf.
 * Add a mini iconv() and iconv implementation (requires locale support).
 *
 * Aug 1, 2003
 * Bug fix for mbrtowc.
 *
 * Aug 18, 2003
 * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
 *
 * Feb 11, 2004
 * Bug fix: Fix size check for remaining output space in iconv().
 *
 * Manuel
 */

#define _GNU_SOURCE
#define _ISOC99_SOURCE
#include <errno.h>
#include <stddef.h>
#include <limits.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <locale.h>
#include <wchar.h>
#include <bits/uClibc_uwchar.h>

/**********************************************************************/
#ifdef __UCLIBC_HAS_LOCALE__
#ifdef __UCLIBC_MJN3_ONLY__
#ifdef L_iswspace
/* generates one warning */
#warning TODO: Fix Cc2wc* and Cwc2c* defines!
#endif
#endif /* __UCLIBC_MJN3_ONLY__ */

#define ENCODING        ((__UCLIBC_CURLOCALE_DATA).encoding)

#define Cc2wc_IDX_SHIFT       __LOCALE_DATA_Cc2wc_IDX_SHIFT
#define Cc2wc_ROW_LEN         __LOCALE_DATA_Cc2wc_ROW_LEN
#define Cwc2c_DOMAIN_MAX      __LOCALE_DATA_Cwc2c_DOMAIN_MAX
#define Cwc2c_TI_SHIFT        __LOCALE_DATA_Cwc2c_TI_SHIFT
#define Cwc2c_TT_SHIFT        __LOCALE_DATA_Cwc2c_TT_SHIFT
#define Cwc2c_TI_LEN          __LOCALE_DATA_Cwc2c_TI_LEN

#ifndef __CTYPE_HAS_UTF_8_LOCALES
#warning __CTYPE_HAS_UTF_8_LOCALES not set!
#endif

#else  /* __UCLIBC_HAS_LOCALE__ */

#ifdef __UCLIBC_MJN3_ONLY__
#ifdef L_btowc
/* emit only once */
#warning fix preprocessor logic testing locale settings
#endif
#endif

#define ENCODING (__ctype_encoding_7_bit)
#ifdef __CTYPE_HAS_8_BIT_LOCALES
#error __CTYPE_HAS_8_BIT_LOCALES is defined!
#endif
#ifdef __CTYPE_HAS_UTF_8_LOCALES
#error __CTYPE_HAS_UTF_8_LOCALES is defined!
#endif
#undef L__wchar_utf8sntowcs
#undef L__wchar_wcsntoutf8s

#endif /* __UCLIBC_HAS_LOCALE__ */
/**********************************************************************/

#if WCHAR_MAX > 0xffffUL
#define UTF_8_MAX_LEN 6
#else
#define UTF_8_MAX_LEN 3
#endif

#define KUHN 1

/* Implementation-specific work functions. */

extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
                                                 const char **__restrict src, size_t n,
                                                 mbstate_t *ps, int allow_continuation);

extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
                                                 const wchar_t **__restrict src, size_t wn);

/* glibc extensions. */

extern size_t __mbsnrtowcs(wchar_t *__restrict dst,
                                       const char **__restrict src,
                                       size_t NMC, size_t len, mbstate_t *__restrict ps);

extern size_t __wcsnrtombs(char *__restrict dst,
                                       const wchar_t **__restrict src,
                                       size_t NWC, size_t len, mbstate_t *__restrict ps);

/**********************************************************************/
#ifdef L_btowc

wint_t btowc(int c)
{
#ifdef __CTYPE_HAS_8_BIT_LOCALES

      wchar_t wc;
      unsigned char buf[1];
      mbstate_t mbstate;

      if (c != EOF) {
            *buf = (unsigned char) c;
            mbstate.__mask = 0;           /* Initialize the mbstate. */
            if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
                  return wc;
            }
      }
      return WEOF;

#else  /*  __CTYPE_HAS_8_BIT_LOCALES */

#ifdef __UCLIBC_HAS_LOCALE__
      assert((ENCODING == __ctype_encoding_7_bit)
               || (ENCODING == __ctype_encoding_utf8));
#endif /* __UCLIBC_HAS_LOCALE__ */

      /* If we don't have 8-bit locale support, then this is trivial since
       * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
      return (((unsigned int)c) < 0x80) ? c : WEOF;

#endif /*  __CTYPE_HAS_8_BIT_LOCALES */
}

#endif
/**********************************************************************/
#ifdef L_wctob

/* Note: We completely ignore ps in all currently supported conversions. */

int wctob(wint_t c)
{
#ifdef __CTYPE_HAS_8_BIT_LOCALES

      unsigned char buf[MB_LEN_MAX];

      return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;

#else  /*  __CTYPE_HAS_8_BIT_LOCALES */

#ifdef __UCLIBC_HAS_LOCALE__
      assert((ENCODING == __ctype_encoding_7_bit)
               || (ENCODING == __ctype_encoding_utf8));
#endif /* __UCLIBC_HAS_LOCALE__ */

      /* If we don't have 8-bit locale support, then this is trivial since
       * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
      
      /* TODO: need unsigned version of wint_t... */
/*    return (((unsigned int)c) < 0x80) ? c : WEOF; */
      return ((c >= 0) && (c < 0x80)) ? c : EOF;

#endif /*  __CTYPE_HAS_8_BIT_LOCALES */
}

#endif
/**********************************************************************/
#ifdef L_mbsinit

int mbsinit(const mbstate_t *ps)
{
      return !ps || !ps->__mask;
}

#endif
/**********************************************************************/
#ifdef L_mbrlen

size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
       __attribute__ ((__weak__, __alias__("__mbrlen")));

size_t __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
{
      static mbstate_t mbstate;     /* Rely on bss 0-init. */

      return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
}

#endif
/**********************************************************************/
#ifdef L_mbrtowc

size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
                     size_t n, mbstate_t *__restrict ps)
{
      static mbstate_t mbstate;     /* Rely on bss 0-init. */
      wchar_t wcbuf[1];
      const char *p;
      size_t r;
      char empty_string[1];         /* Avoid static to be fPIC friendly. */

      if (!ps) {
            ps = &mbstate;
      }

      if (!s) {
            pwc = (wchar_t *) s;    /* NULL */
            empty_string[0] = 0;    /* Init the empty string when necessary. */
            s = empty_string;
            n = 1;
      } else if (!n) {
            /* TODO: change error code? */
            return (ps->__mask && (ps->__wc == 0xffffU))
                  ? ((size_t) -1) : ((size_t) -2);
      }

      p = s;

#ifdef __CTYPE_HAS_UTF_8_LOCALES
      /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
      if (ENCODING == __ctype_encoding_utf8) {
            if (!pwc) {
                  pwc = wcbuf;
            }
            r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
            return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
      }
#endif

#ifdef __UCLIBC_MJN3_ONLY__
#warning TODO: This adds a trailing nul!
#endif /* __UCLIBC_MJN3_ONLY__ */

      r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);

      if (((ssize_t) r) >= 0) {
            if (pwc) {
                  *pwc = *wcbuf;
            }
      }
      return (size_t) r;
}

#endif
/**********************************************************************/
#ifdef L_wcrtomb

/* Note: We completely ignore ps in all currently supported conversions. */
/* TODO: Check for valid state anyway? */

size_t wcrtomb(register char *__restrict s, wchar_t wc,
                     mbstate_t *__restrict ps)
{
#ifdef __UCLIBC_MJN3_ONLY__
#warning TODO: Should wcsnrtombs nul-terminate unconditionally?  Check glibc.
#endif /* __UCLIBC_MJN3_ONLY__ */
      wchar_t wcbuf[1];
      const wchar_t *pwc;
      size_t r;
      char buf[MB_LEN_MAX];

      if (!s) {
            s = buf;
            wc = 0;
      }

      pwc = wcbuf;
      wcbuf[0] = wc;

      r = __wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
      return (r != 0) ? r : 1;
}

#endif
/**********************************************************************/
#ifdef L_mbsrtowcs

size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
                         size_t len, mbstate_t *__restrict ps)
{
      static mbstate_t mbstate;     /* Rely on bss 0-init. */

      return __mbsnrtowcs(dst, src, SIZE_MAX, len,
                                    ((ps != NULL) ? ps : &mbstate));
}

#endif
/**********************************************************************/
#ifdef L_wcsrtombs

/* Note: We completely ignore ps in all currently supported conversions.

 * TODO: Check for valid state anyway? */

size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
                         size_t len, mbstate_t *__restrict ps)
{
      return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);
}

#endif
/**********************************************************************/
#ifdef L__wchar_utf8sntowcs

/* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
 * UTF-8-test.txt strss test.
 */
/*  #define DECODER */

#ifdef DECODER
#ifndef KUHN
#define KUHN
#endif
#endif

size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
                                      const char **__restrict src, size_t n,
                                      mbstate_t *ps, int allow_continuation)
{
      register const char *s;
      __uwchar_t mask;
      __uwchar_t wc;
      wchar_t wcbuf[1];
      size_t count;
      int incr;

      s = *src;

      assert(s != NULL);
      assert(ps != NULL);

      incr = 1;
      /* NOTE: The following is an AWFUL HACK!  In order to support %s in
       * wprintf, we need to be able to compute the number of wchars needed
       * for the mbs conversion, not to exceed the precision specified.
       * But if dst is NULL, the return value is the length assuming a
       * sufficiently sized buffer.  So, we allow passing of (wchar_t *) ps
       * as pwc in order to flag that we really want the length, subject
       * to the restricted buffer size and no partial conversions.
       * See mbsnrtowcs() as well. */
      if (!pwc || (pwc == ((wchar_t *)ps))) {
            if (!pwc) {
                  wn = SIZE_MAX;
            }
            pwc = wcbuf;
            incr = 0;
      }

      /* This is really here only to support the glibc extension function
       * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
       * check on the validity of the mbstate. */
      if (!(count = wn)) {
            return 0;
      }

      if ((mask = (__uwchar_t) ps->__mask) != 0) { /* A continuation... */
#ifdef DECODER
            wc = (__uwchar_t) ps->__wc;
            if (n) {
                  goto CONTINUE;
            }
            goto DONE;
#else
            if ((wc = (__uwchar_t) ps->__wc) != 0xffffU) {
                  /* TODO: change error code here and below? */
                  if (n) {
                        goto CONTINUE;
                  }
                  goto DONE;
            }
            __set_errno(EILSEQ);
            return (size_t) -1;           /* We're in an error state. */
#endif
      }

      do {
            if (!n) {
                  goto DONE;
            }
            --n;
            if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
                  mask = 0x40;
#ifdef __UCLIBC_MJN3_ONLY__
#warning TODO: Fix range for 16 bit wchar_t case.
#endif
                  if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
                        goto START;
                  }
            BAD:
#ifdef DECODER
                  wc = 0xfffdU;
                  goto COMPLETE;
#else
                  ps->__mask = mask;
                  ps->__wc = 0xffffU;
                  __set_errno(EILSEQ);
                  return (size_t) -1;     /* Illegal start byte! */
#endif

            CONTINUE:
                  while (n) {
                        --n;
                        if ((*s & 0xc0) != 0x80) {
                              goto BAD;
                        }
                        mask <<= 5;
                        wc <<= 6;
                        wc += (*s & 0x3f);      /* keep seperate for bcc (smaller code) */
                        ++s;
                  START:
                        wc &= ~(mask << 1);

                        if ((wc & mask) == 0) { /* Character completed. */
                              if ((mask >>= 5) == 0x40) {
                                    mask += mask;
                              }
                              /* Check for invalid sequences (longer than necessary)
                               * and invalid chars.  */
                              if ( (wc < mask) /* Sequence not minimal length. */
#ifdef KUHN
#if UTF_8_MAX_LEN == 3
#error broken since mask can overflow!!
                                     /* For plane 0, these are the only defined values.*/
                                     || (wc > 0xfffdU)
#else
                                     /* Note that we don't need to worry about exceeding */
                                     /* 31 bits as that is the most that UTF-8 provides. */
                                     || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
#endif
                                     || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
#endif /* KUHN */
                                     ) {
                                    goto BAD;
                              }
                              goto COMPLETE;
                        }
                  }
                  /* Character potentially valid but incomplete. */
                  if (!allow_continuation) {
                        if (count != wn) {
                              return 0;
                        }
                        /* NOTE: The following can fail if you allow and then disallow
                         * continuation!!! */
#if UTF_8_MAX_LEN == 3
#error broken since mask can overflow!!
#endif
                        /* Need to back up... */
                        do {
                              --s;
                        } while ((mask >>= 5) >= 0x40);
                        goto DONE;
                  }
                  ps->__mask = (wchar_t) mask;
                  ps->__wc = (wchar_t) wc;
                  *src = s;
                  return (size_t) -2;
            }
      COMPLETE:
            *pwc = wc;
            pwc += incr;
      }
#ifdef DECODER
      while (--count);
#else
      while (wc && --count);

      if (!wc) {
            s = NULL;
      }
#endif

 DONE:
      /* ps->__wc is irrelavent here. */
      ps->__mask = 0;
      if (pwc != wcbuf) {
            *src = s;
      }

      return wn - count;
}

#endif
/**********************************************************************/
#ifdef L__wchar_wcsntoutf8s

size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
                                      const wchar_t **__restrict src, size_t wn)
{
      register char *p;
      size_t len, t;
      __uwchar_t wc;
      const __uwchar_t *swc;
      int store;
      char buf[MB_LEN_MAX];
      char m;

      store = 1;
      /* NOTE: The following is an AWFUL HACK!  In order to support %ls in
       * printf, we need to be able to compute the number of bytes needed
       * for the mbs conversion, not to exceed the precision specified.
       * But if dst is NULL, the return value is the length assuming a
       * sufficiently sized buffer.  So, we allow passing of (char *) src
       * as dst in order to flag that we really want the length, subject
       * to the restricted buffer size and no partial conversions.
       * See wcsnrtombs() as well. */
      if (!s || (s == ((char *) src))) {
            if (!s) {
                  n = SIZE_MAX;
            }
          s = buf;
            store = 0;
      }

      t = n;
      swc = (const __uwchar_t *) *src;

      assert(swc != NULL);

      while (wn && t) {
            wc = *swc;

            *s = wc;
            len = 1;

            if (wc >= 0x80) {
#ifdef KUHN
                  if (
#if UTF_8_MAX_LEN == 3
                        /* For plane 0, these are the only defined values.*/
                        /* Note that we don't need to worry about exceeding */
                        /* 31 bits as that is the most that UTF-8 provides. */
                        (wc > 0xfffdU)
#else
                        /* UTF_8_MAX_LEN == 6 */
                        (wc > 0x7fffffffUL)
                        || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
#endif
                        || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
                        ) {
                        __set_errno(EILSEQ);
                        return (size_t) -1;
                  }
#else  /* KUHN */
#if UTF_8_MAX_LEN != 3
                  if (wc > 0x7fffffffUL) { /* Value too large. */
                        __set_errno(EILSEQ);
                        return (size_t) -1;
                  }
#endif
#endif /* KUHN */

                  wc >>= 1;
                  p = s;
                  do {
                        ++p;
                  } while (wc >>= 5);
                  wc = *swc;
                  if ((len = p - s) > t) { /* Not enough space. */
                        break;
                  }

                  m = 0x80;
                  while( p>s ) {
                        m = (m >> 1) | 0x80;
                        *--p = (wc & 0x3f) | 0x80;
                        wc >>= 6;
                  }
                  *s |= (m << 1);
            } else if (wc == 0) {   /* End of string. */
                  swc = NULL;
                  break;
            }

            ++swc;
            --wn;
            t -= len;
            if (store) {
                  s += len;
            }
      }

      if (store) {
            *src = (const wchar_t *) swc;
      }

      return n - t;
}


#endif
/**********************************************************************/
#ifdef L___mbsnrtowcs

/* WARNING: We treat len as SIZE_MAX when dst is NULL! */

size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
                          size_t NMC, size_t len, mbstate_t *__restrict ps)
       __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));

size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
                              size_t NMC, size_t len, mbstate_t *__restrict ps)
{
      static mbstate_t mbstate;     /* Rely on bss 0-init. */
      wchar_t wcbuf[1];
      const char *s;
      size_t count;
      int incr;

      if (!ps) {
            ps = &mbstate;
      }

#ifdef __CTYPE_HAS_UTF_8_LOCALES
      if (ENCODING == __ctype_encoding_utf8) {
            size_t r;
            return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
                        != (size_t) -2) ? r : 0;
      }
#endif
      incr = 1;
      /* NOTE: The following is an AWFUL HACK!  In order to support %s in
       * wprintf, we need to be able to compute the number of wchars needed
       * for the mbs conversion, not to exceed the precision specified.
       * But if dst is NULL, the return value is the length assuming a
       * sufficiently sized buffer.  So, we allow passing of ((wchar_t *)ps)
       * as dst in order to flag that we really want the length, subject
       * to the restricted buffer size and no partial conversions.
       * See _wchar_utf8sntowcs() as well. */
      if (!dst || (dst == ((wchar_t *)ps))) {
            if (!dst) {
                  len = SIZE_MAX;
            }
            dst = wcbuf;
            incr = 0;
      }

      /* Since all the following encodings are single-byte encodings... */
      if (len > NMC) {
            len = NMC;
      }

      count = len;
      s = *src;

#ifdef __CTYPE_HAS_8_BIT_LOCALES
      if (ENCODING == __ctype_encoding_8_bit) {
            wchar_t wc;
            while (count) {
                  if ((wc = ((unsigned char)(*s))) >= 0x80) {     /* Non-ASCII... */
                        wc -= 0x80;
                        wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
                                      (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
                                       << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
                        if (!wc) {
                              goto BAD;
                        }
                  }
                  if (!(*dst = wc)) {
                        s = NULL;
                        break;
                  }
                  dst += incr;
                  ++s;
                  --count;
            }
            if (dst != wcbuf) {
                  *src = s;
            }
            return len - count;
      }
#endif

#ifdef __UCLIBC_HAS_LOCALE__
      assert(ENCODING == __ctype_encoding_7_bit);
#endif

      while (count) {
            if ((*dst = (unsigned char) *s) == 0) {
                  s = NULL;
                  break;
            }
            if (*dst >= 0x80) {
#ifdef __CTYPE_HAS_8_BIT_LOCALES
            BAD:
#endif
                  __set_errno(EILSEQ);
                  return (size_t) -1;
            }
            ++s;
            dst += incr;
            --count;
      }
      if (dst != wcbuf) {
            *src = s;
      }
      return len - count;
}

#endif
/**********************************************************************/
#ifdef L___wcsnrtombs

/* WARNING: We treat len as SIZE_MAX when dst is NULL! */

/* Note: We completely ignore ps in all currently supported conversions.
 * TODO: Check for valid state anyway? */

size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
                          size_t NWC, size_t len, mbstate_t *__restrict ps)
       __attribute__ ((__weak__, __alias__("__wcsnrtombs")));

size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
                              size_t NWC, size_t len, mbstate_t *__restrict ps)
{
      const __uwchar_t *s;
      size_t count;
      int incr;
      char buf[MB_LEN_MAX];

#ifdef __CTYPE_HAS_UTF_8_LOCALES
      if (ENCODING == __ctype_encoding_utf8) {
            return _wchar_wcsntoutf8s(dst, len, src, NWC);
      }
#endif /* __CTYPE_HAS_UTF_8_LOCALES */

      incr = 1;
      /* NOTE: The following is an AWFUL HACK!  In order to support %ls in
       * printf, we need to be able to compute the number of bytes needed
       * for the mbs conversion, not to exceed the precision specified.
       * But if dst is NULL, the return value is the length assuming a
       * sufficiently sized buffer.  So, we allow passing of (char *) src
       * as dst in order to flag that we really want the length, subject
       * to the restricted buffer size and no partial conversions.
       * See _wchar_wcsntoutf8s() as well. */
      if (!dst || (dst == ((char *) src))) {
            if (!dst) {
                  len = SIZE_MAX;
            }
            dst = buf;
            incr = 0;
      }

      /* Since all the following encodings are single-byte encodings... */
      if (len > NWC) {
            len = NWC;
      }

      count = len;
      s = (const __uwchar_t *) *src;

#ifdef __CTYPE_HAS_8_BIT_LOCALES
      if (ENCODING == __ctype_encoding_8_bit) {
            __uwchar_t wc;
            __uwchar_t u;
            while (count) {
                  if ((wc = *s) <= 0x7f) {
                        if (!(*dst = (unsigned char) wc)) {
                              s = NULL;
                              break;
                        }
                  } else {
                        u = 0;
                        if (wc <= Cwc2c_DOMAIN_MAX) {
                              u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
                                                                                    + Cwc2c_TT_SHIFT)];
                              u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
                                                      + ((wc >> Cwc2c_TT_SHIFT)
                                                         & ((1 << Cwc2c_TI_SHIFT)-1))];
                              u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
                                                      + (u << Cwc2c_TT_SHIFT)
                                                      + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
                        }

#define __WCHAR_REPLACEMENT_CHAR '?'
#ifdef __WCHAR_REPLACEMENT_CHAR
                        *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
#else  /* __WCHAR_REPLACEMENT_CHAR */
                        if (!u) {
                              goto BAD;
                        }
                        *dst = (unsigned char) u;
#endif /* __WCHAR_REPLACEMENT_CHAR */
                  }
                  ++s;
                  dst += incr;
                  --count;
            }
            if (dst != buf) {
                  *src = (const wchar_t *) s;
            }
            return len - count;
      }
#endif /* __CTYPE_HAS_8_BIT_LOCALES */

#ifdef __UCLIBC_HAS_LOCALE__
      assert(ENCODING == __ctype_encoding_7_bit);
#endif

      while (count) {
            if (*s >= 0x80) {
#if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
            BAD:
#endif
                  __set_errno(EILSEQ);
                  return (size_t) -1;
            }
            if ((*dst = (unsigned char) *s) == 0) {
                  s = NULL;
                  break;
            }
            ++s;
            dst += incr;
            --count;
      }
      if (dst != buf) {
            *src = (const wchar_t *) s;
      }
      return len - count;
}

#endif
/**********************************************************************/
#ifdef L_wcswidth

#ifdef __UCLIBC_MJN3_ONLY__
#warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
#warning TODO: Update wcwidth to match latest by Kuhn.
#endif

#if defined(__UCLIBC_HAS_LOCALE__) && \
( defined(__CTYPE_HAS_8_BIT_LOCALES) || defined(__CTYPE_HAS_UTF_8_LOCALES) )

static const unsigned char new_idx[] = {
      0,    5,    5,    6,   10,   15,   28,   39, 
      48,   48,   71,   94,  113,  128,  139,  154, 
      175,  186,  188,  188,  188,  188,  188,  188, 
      203,  208,  208,  208,  208,  208,  208,  208, 
      208,  219,  219,  219,  222,  222,  222,  222, 
      222,  222,  222,  222,  222,  222,  222,  224, 
      224,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  231,  231,  231, 
      231,  231,  231,  231,  231,  233,  233,  233, 
      233,  233,  233,  233,  234,  234,  234,  234, 
      234,  234,  234,  234,  234,  234,  234,  234, 
      234,  234,  234,  234,  234,  234,  234,  234, 
      234,  234,  234,  234,  234,  234,  234,  234, 
      234,  234,  234,  234,  234,  234,  234,  234, 
      234,  234,  234,  234,  234,  234,  234,  234, 
      236,  236,  236,  236,  236,  236,  236,  236, 
      236,  236,  236,  236,  236,  236,  236,  236, 
      236,  236,  236,  236,  236,  236,  236,  236, 
      236,  236,  236,  236,  236,  236,  236,  236, 
      236,  237,  237,  238,  241,  241,  242,  249, 
      255, 
};

static const unsigned char new_tbl[] = {
      0x00, 0x01, 0x20, 0x7f, 0xa0, 0x00, 0x00, 0x50, 
      0x60, 0x70, 0x00, 0x83, 0x87, 0x88, 0x8a, 0x00, 
      0x91, 0xa2, 0xa3, 0xba, 0xbb, 0xbe, 0xbf, 0xc0, 
      0xc1, 0xc3, 0xc4, 0xc5, 0x00, 0x4b, 0x56, 0x70, 
      0x71, 0xd6, 0xe5, 0xe7, 0xe9, 0xea, 0xee, 0x00, 
      0x0f, 0x10, 0x11, 0x12, 0x30, 0x4b, 0xa6, 0xb1, 
      0x00, 0x01, 0x03, 0x3c, 0x3d, 0x41, 0x49, 0x4d, 
      0x4e, 0x51, 0x55, 0x62, 0x64, 0x81, 0x82, 0xbc, 
      0xbd, 0xc1, 0xc5, 0xcd, 0xce, 0xe2, 0xe4, 0x00, 
      0x02, 0x03, 0x3c, 0x3d, 0x41, 0x43, 0x47, 0x49, 
      0x4b, 0x4e, 0x70, 0x72, 0x81, 0x83, 0xbc, 0xbd, 
      0xc1, 0xc6, 0xc7, 0xc9, 0xcd, 0xce, 0x00, 0x01, 
      0x02, 0x3c, 0x3d, 0x3f, 0x40, 0x41, 0x44, 0x4d, 
      0x4e, 0x56, 0x57, 0x82, 0x83, 0xc0, 0xc1, 0xcd, 
      0xce, 0x00, 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4e, 
      0x55, 0x57, 0xbf, 0xc0, 0xc6, 0xc7, 0xcc, 0xce, 
      0x00, 0x41, 0x44, 0x4d, 0x4e, 0xca, 0xcb, 0xd2, 
      0xd5, 0xd6, 0xd7, 0x00, 0x31, 0x32, 0x34, 0x3b, 
      0x47, 0x4f, 0xb1, 0xb2, 0xb4, 0xba, 0xbb, 0xbd, 
      0xc8, 0xce, 0x00, 0x18, 0x1a, 0x35, 0x36, 0x37, 
      0x38, 0x39, 0x3a, 0x71, 0x7f, 0x80, 0x85, 0x86, 
      0x88, 0x90, 0x98, 0x99, 0xbd, 0xc6, 0xc7, 0x00, 
      0x2d, 0x31, 0x32, 0x33, 0x36, 0x38, 0x39, 0x3a, 
      0x58, 0x5a, 0x00, 0x60, 0x00, 0x12, 0x15, 0x32, 
      0x35, 0x52, 0x54, 0x72, 0x74, 0xb7, 0xbe, 0xc6, 
      0xc7, 0xc9, 0xd4, 0x00, 0x0b, 0x0f, 0xa9, 0xaa, 
      0x00, 0x0b, 0x10, 0x2a, 0x2f, 0x60, 0x64, 0x6a, 
      0x70, 0xd0, 0xeb, 0x00, 0x29, 0x2b, 0x00, 0x80, 
      0x00, 0x2a, 0x30, 0x3f, 0x40, 0x99, 0x9b, 0x00, 
      0xd0, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x1e, 
      0x1f, 0x00, 0x00, 0x10, 0x20, 0x24, 0x30, 0x70, 
      0xff, 0x00, 0x61, 0xe0, 0xe7, 0xf9, 0xfc, 
};

static const signed char new_wtbl[] = {
      0,   -1,    1,   -1,    1,    1,    0,    1, 
      0,    1,    1,    0,    1,    0,    1,    1, 
      0,    1,    0,    1,    0,    1,    0,    1, 
      0,    1,    0,    1,    1,    0,    1,    0, 
      1,    0,    1,    0,    1,    0,    1,    1, 
      0,    1,    0,    1,    0,    1,    0,    1, 
      1,    0,    1,    0,    1,    0,    1,    0, 
      1,    0,    1,    0,    1,    0,    1,    0, 
      1,    0,    1,    0,    1,    0,    1,    1, 
      0,    1,    0,    1,    0,    1,    0,    1, 
      0,    1,    0,    1,    0,    1,    0,    1, 
      0,    1,    0,    1,    0,    1,    1,    0, 
      1,    0,    1,    0,    1,    0,    1,    0, 
      1,    0,    1,    0,    1,    0,    1,    0, 
      1,    1,    0,    1,    0,    1,    0,    1, 
      0,    1,    0,    1,    0,    1,    0,    1, 
      1,    0,    1,    0,    1,    0,    1,    0, 
      1,    0,    1,    1,    0,    1,    0,    1, 
      0,    1,    0,    1,    0,    1,    0,    1, 
      0,    1,    1,    0,    1,    0,    1,    0, 
      1,    0,    1,    0,    1,    0,    1,    0, 
      1,    0,    1,    0,    1,    0,    1,    1, 
      0,    1,    0,    1,    0,    1,    0,    1, 
      0,    1,    2,    0,    1,    0,    1,    0, 
      1,    0,    1,    0,    1,    0,    1,    0, 
      1,    0,    1,    1,    0,    1,    0,    1, 
      1,    0,    1,    0,    1,    0,    1,    0, 
      1,    0,    1,    1,    2,    1,    1,    2, 
      2,    0,    2,    1,    2,    0,    2,    2, 
      1,    1,    2,    1,    1,    2,    1,    0, 
      1,    1,    0,    1,    0,    1,    2,    1, 
      0,    2,    1,    2,    1,    0,    1, 
};

int wcswidth(const wchar_t *pwcs, size_t n)
{
    int h, l, m, count;
    wchar_t wc;
    unsigned char b;

      if (ENCODING == __ctype_encoding_7_bit) {
            size_t i;
            
            for (i = 0 ; (i < n) && pwcs[i] ; i++) {
                  if (pwcs[i] != ((unsigned char)(pwcs[i]))) {
                        return -1;
                  }
            }
      }
#ifdef __CTYPE_HAS_8_BIT_LOCALES
      else if (ENCODING == __ctype_encoding_8_bit) {
            mbstate_t mbstate;

            mbstate.__mask = 0;                 /* Initialize the mbstate. */
            if (__wcsnrtombs(NULL, &pwcs, n, SIZE_MAX, &mbstate) == ((size_t) - 1)) {
                  return -1;
            }
      }
#endif /* __CTYPE_HAS_8_BIT_LOCALES */
#if defined(__CTYPE_HAS_UTF_8_LOCALES) && defined(KUHN)
      /* For stricter handling of allowed unicode values... see comments above. */
      else if (ENCODING == __ctype_encoding_utf8) {
            size_t i;
            
            for (i = 0 ; (i < n) && pwcs[i] ; i++) {
                  if ( (((__uwchar_t)((pwcs[i]) - 0xfffeU)) < 2)
                         || (((__uwchar_t)((pwcs[i]) - 0xd800U)) < (0xe000U - 0xd800U))
                        ) {
                        return -1;
                  }
            }
      }
#endif /* __CTYPE_HAS_UTF_8_LOCALES */

    for (count = 0 ; n && (wc = *pwcs++) ; n--) {
            if (wc <= 0xff) {
                  /* If we're here, wc != 0. */
                  if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
                        return -1;
                  }
                  ++count;
                  continue;
            }
            if (((unsigned int) wc) <= 0xffff) {
                  b = wc & 0xff;
                  h = (wc >> 8);
                  l = new_idx[h];
                  h = new_idx[h+1];
                  while ((m = (l+h) >> 1) != l) {
                        if (b >= new_tbl[m]) {
                              l = m;
                        } else {          /* wc < tbl[m] */
                              h = m;
                        }
                  }
                  count += new_wtbl[l]; /* none should be -1. */
                  continue;
            }

            /* Redo this to minimize average number of compares?*/
            if (wc >= 0x1d167) {
                  if (wc <= 0x1d1ad) {
                        if ((wc <= 0x1d169
                               || (wc >= 0x1d173
                                     && (wc <= 0x1d182
                                           || (wc >= 0x1d185
                                                 && (wc <= 0x1d18b
                                                       || (wc >= 0x1d1aa))))))
                              ) {
                              continue;
                        }
                  } else if (((wc >= 0xe0020) && (wc <= 0xe007f)) || (wc == 0xe0001)) {
                        continue;
                  } else if ((wc >= 0x20000) && (wc <= 0x2ffff)) {
                        ++count;          /* need 2.. add one here */
                  }
#if (WCHAR_MAX > 0x7fffffffL)
                  else if (wc > 0x7fffffffL) {
                        return -1;
                  }
#endif /* (WCHAR_MAX > 0x7fffffffL) */
            }

            ++count;
    }

    return count;
}

#else  /*  __UCLIBC_HAS_LOCALE__ */

int wcswidth(const wchar_t *pwcs, size_t n)
{
      int count;
      wchar_t wc;

    for (count = 0 ; n && (wc = *pwcs++) ; n--) {
            if (wc <= 0xff) {
                  /* If we're here, wc != 0. */
                  if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
                        return -1;
                  }
                  ++count;
                  continue;
            } else {
                  return -1;
            }
      }

      return count;
}

#endif /*  __UCLIBC_HAS_LOCALE__ */

#endif
/**********************************************************************/
#ifdef L_wcwidth

int wcwidth(wchar_t wc)
{
    return wcswidth(&wc, 1);
}

#endif
/**********************************************************************/


typedef struct {
      mbstate_t tostate;
      mbstate_t fromstate;
      int tocodeset;
      int fromcodeset;
      int frombom;
      int tobom;
      int fromcodeset0;
      int frombom0;
      int tobom0;
      int skip_invalid_input;       /* To support iconv -c option. */
} _UC_iconv_t;



#ifdef L_iconv

#include <iconv.h>
#include <string.h>
#include <endian.h>
#include <byteswap.h>

#if (__BYTE_ORDER != __BIG_ENDIAN) && (__BYTE_ORDER != __LITTLE_ENDIAN)
#error unsupported endianness for iconv
#endif

#ifndef __CTYPE_HAS_8_BIT_LOCALES
#error currently iconv requires 8 bit locales
#endif
#ifndef __CTYPE_HAS_UTF_8_LOCALES
#error currently iconv requires UTF-8 locales
#endif


enum {
      IC_WCHAR_T = 0xe0,
      IC_MULTIBYTE = 0xe0,
#if __BYTE_ORDER == __BIG_ENDIAN
      IC_UCS_4 =  0xec,
      IC_UTF_32 = 0xe4,
      IC_UCS_2 =  0xe2,
      IC_UTF_16 = 0xea,
#else
      IC_UCS_4 =  0xed,
      IC_UTF_32 = 0xe5,
      IC_UCS_2 =  0xe3,
      IC_UTF_16 = 0xeb,
#endif
      IC_UTF_8 = 2,
      IC_ASCII = 1
};

/* For the multibyte
 * bit 0 means swap endian
 * bit 1 means 2 byte
 * bit 2 means 4 byte
 *
 */

const unsigned char __iconv_codesets[] =
      "\x0a\xe0""WCHAR_T\x00"       /* superset of UCS-4 but platform-endian */
#if __BYTE_ORDER == __BIG_ENDIAN
      "\x08\xec""UCS-4\x00"         /* always BE */
      "\x0a\xec""UCS-4BE\x00"
      "\x0a\xed""UCS-4LE\x00"
      "\x09\fe4""UTF-32\x00"        /* platform endian with BOM */
      "\x0b\xe4""UTF-32BE\x00"
      "\x0b\xe5""UTF-32LE\x00"
      "\x08\xe2""UCS-2\x00"         /* always BE */
      "\x0a\xe2""UCS-2BE\x00"
      "\x0a\xe3""UCS-2LE\x00"
      "\x09\xea""UTF-16\x00"        /* platform endian with BOM */
      "\x0b\xea""UTF-16BE\x00"
      "\x0b\xeb""UTF-16LE\x00"
#elif __BYTE_ORDER == __LITTLE_ENDIAN
      "\x08\xed""UCS-4\x00"         /* always BE */
      "\x0a\xed""UCS-4BE\x00"
      "\x0a\xec""UCS-4LE\x00"
      "\x09\xf4""UTF-32\x00"        /* platform endian with BOM */
      "\x0b\xe5""UTF-32BE\x00"
      "\x0b\xe4""UTF-32LE\x00"
      "\x08\xe3""UCS-2\x00"         /* always BE */
      "\x0a\xe3""UCS-2BE\x00"
      "\x0a\xe2""UCS-2LE\x00"
      "\x09\xfa""UTF-16\x00"        /* platform endian with BOM */
      "\x0b\xeb""UTF-16BE\x00"
      "\x0b\xea""UTF-16LE\x00"
#endif
      "\x08\x02""UTF-8\x00"
      "\x0b\x01""US-ASCII\x00"
      "\x07\x01""ASCII";                  /* Must be last! (special case to save a nul) */

static int find_codeset(const char *name)
{
      const unsigned char *s;
      int codeset;

      for (s = __iconv_codesets ; *s ; s += *s) {
            if (!strcasecmp(s+2, name)) {
                  return s[1];
            }
      }

      /* The following is ripped from find_locale in locale.c. */

      /* TODO: maybe CODESET_LIST + *s ??? */
      /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
      codeset = 2;
      s = __LOCALE_DATA_CODESET_LIST;
      do {
            ++codeset;        /* Increment codeset first. */
            if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
                  return codeset;
            }
      } while (*++s);

      return 0;               /* No matching codeset! */
}

iconv_t weak_function iconv_open(const char *tocode, const char *fromcode)
{
      register _UC_iconv_t *px;
      int tocodeset, fromcodeset;

      if (((tocodeset = find_codeset(tocode)) != 0)
            && ((fromcodeset = find_codeset(fromcode)) != 0)) {
            if ((px = malloc(sizeof(_UC_iconv_t))) != NULL) {
                  px->tocodeset = tocodeset;
                  px->tobom0 = px->tobom = (tocodeset & 0x10) >> 4;
                  px->fromcodeset0 = px->fromcodeset = fromcodeset;
                  px->frombom0 = px->frombom = (fromcodeset & 0x10) >> 4;
                  px->skip_invalid_input = px->tostate.__mask
                        = px->fromstate.__mask = 0;
                  return (iconv_t) px;
            }
      } else {
            __set_errno(EINVAL);
      }
      return (iconv_t)(-1);
}

int weak_function iconv_close(iconv_t cd)
{
      free(cd);

      return 0;
}

size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
                                       size_t *__restrict inbytesleft,
                                       char **__restrict outbuf,
                                       size_t *__restrict outbytesleft)
{
      _UC_iconv_t *px = (_UC_iconv_t *) cd;
      size_t nrcount, r;
      wchar_t wc, wc2;
      int inci, inco;

      assert(px != (_UC_iconv_t *)(-1));
      assert(sizeof(wchar_t) == 4);

      if (!inbuf || !*inbuf) {      /* Need to reinitialze conversion state. */
            /* Note: For shift-state encodings we possibly need to output the
             * shift sequence to return to initial state! */
            if ((px->fromcodeset & 0xf0) == 0xe0) {
            }
            px->tostate.__mask = px->fromstate.__mask = 0;
            px->fromcodeset = px->fromcodeset0;
            px->tobom = px->tobom0;
            px->frombom = px->frombom0;
            return 0;
      }

      nrcount = 0;
      while (*inbytesleft) {
            if (!*outbytesleft) {
            TOO_BIG:
                  __set_errno(E2BIG);
                  return (size_t) -1;
            }

            inci = inco = 1;
            if (px->fromcodeset >= IC_MULTIBYTE) {
                  inci = (px->fromcodeset == IC_WCHAR_T) ? 4: (px->fromcodeset & 6);
                  if (*inbytesleft < inci) goto INVALID;
                  wc = (((unsigned int)((unsigned char)((*inbuf)[0]))) << 8)
                        + ((unsigned char)((*inbuf)[1]));
                  if (inci == 4) {
                        wc = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
                              + ((unsigned char)((*inbuf)[3])) + (wc << 16);
                        if (!(px->fromcodeset & 1)) wc = bswap_32(wc);
                  } else {
                        if (!(px->fromcodeset & 1)) wc = bswap_16(wc);
                        if (((px->fromcodeset & IC_UTF_16) == IC_UTF_16)
                               && (((__uwchar_t)(wc - 0xd800U)) < (0xdc00U - 0xd800U))
                              ) {               /* surrogate */
                              wc =- 0xd800U;
                              if (*inbytesleft < 4) goto INVALID;
                              wc2 = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
                                    + ((unsigned char)((*inbuf)[3]));
                              if (!(px->fromcodeset & 1)) wc = bswap_16(wc2);
                              if (((__uwchar_t)(wc2 -= 0xdc00U)) < (0xe0000U - 0xdc00U)) {
                                    goto ILLEGAL;
                              }
                              inci = 4;   /* Change inci here in case skipping illegals. */
                              wc = 0x10000UL + (wc << 10) + wc2;
                        }
                  }

                  if (px->frombom) {
                        px->frombom = 0;
                        if ((wc == 0xfeffU)
                              || (wc == ((inci == 4)
                                             ? (((wchar_t) 0xfffe0000UL))
                                             : ((wchar_t)(0xfffeUL))))
                              ) {
                              if (wc != 0xfeffU) {
                                    px->fromcodeset ^= 1; /* toggle endianness */
                                    wc = 0xfeffU;
                              }
                              if (!px->frombom) {
                                    goto BOM_SKIP_OUTPUT;
                              }
                              goto GOT_BOM;
                        }
                  }

                  if (px->fromcodeset != IC_WCHAR_T) {
                        if (((__uwchar_t) wc) > (((px->fromcodeset & IC_UCS_4) == IC_UCS_4)
                                                             ? 0x7fffffffUL : 0x10ffffUL)
#ifdef KUHN
                              || (((__uwchar_t)(wc - 0xfffeU)) < 2)
                              || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
#endif
                              ) {
                              goto ILLEGAL;
                        }
                  }
            } else if (px->fromcodeset == IC_UTF_8) {
                  const char *p = *inbuf;
                  r = _wchar_utf8sntowcs(&wc, 1, &p, *inbytesleft, &px->fromstate, 0);
                  if (((ssize_t) r) <= 0) { /* either EILSEQ or incomplete or nul */
                        if (((ssize_t) r) < 0) { /* either EILSEQ or incomplete or nul */
                              assert((r == (size_t)(-1)) || (r == (size_t)(-2)));
                              if (r == (size_t)(-2)) {
                              INVALID:
                                    __set_errno(EINVAL);
                              } else {
                                    px->fromstate.__mask = 0;
                                    inci = 1;
                              ILLEGAL:
                                    if (px->skip_invalid_input) {
                                          px->skip_invalid_input = 2;   /* flag for iconv utility */
                                          goto BOM_SKIP_OUTPUT;
                                    }
                                    __set_errno(EILSEQ);
                              }
                              return (size_t)(-1);
                        }
#ifdef __UCLIBC_MJN3_ONLY__
#warning TODO: optimize this.
#endif
                        if (p != NULL) { /* incomplete char case */
                              goto INVALID;
                        }
                        p = *inbuf + 1;   /* nul */
                  }
                  inci = p - *inbuf;
            } else if ((wc = ((unsigned char)(**inbuf))) >= 0x80) {     /* Non-ASCII... */
                  if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
                        goto ILLEGAL;
                  } else {                /* some other 8-bit ascii-extension codeset */
                        const __codeset_8_bit_t *c8b
                              = __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
                        wc -= 0x80;
                        wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
                                           (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
                                            << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
                        if (!wc) {
                              goto ILLEGAL;
                        }
                  }
            }


            if (px->tobom) {
                  inci = 0;
                  wc = 0xfeffU;
      GOT_BOM:
                  px->tobom = 0;
            }

            if (px->tocodeset >= IC_MULTIBYTE) {
                  inco = (px->tocodeset == IC_WCHAR_T) ? 4: (px->tocodeset & 6);
                  if (*outbytesleft < inco) goto TOO_BIG;
                  if (px->tocodeset != IC_WCHAR_T) {
                        if (((__uwchar_t) wc) > (((px->tocodeset & IC_UCS_4) == IC_UCS_4)
                                                             ? 0x7fffffffUL : 0x10ffffUL)
#ifdef KUHN
                              || (((__uwchar_t)(wc - 0xfffeU)) < 2)
                              || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
#endif
                              ) {
                        REPLACE_32:
                              wc = 0xfffd;
                              ++nrcount;
                        }
                  }
                  if (inco == 4) {
                        if (px->tocodeset & 1) wc = bswap_32(wc);
                  } else {
                        if (((__uwchar_t)wc ) > 0xffffU) {
                              if ((px->tocodeset & IC_UTF_16) != IC_UTF_16) {
                                    goto REPLACE_32;
                              }
                              if (*outbytesleft < (inco = 4)) goto TOO_BIG;
                              wc2 = 0xdc00U + (wc & 0x3ff);
                              wc = 0xd800U + ((wc >> 10) & 0x3ff);
                              if (px->tocodeset & 1) {
                                    wc = bswap_16(wc);
                                    wc2 = bswap_16(wc2);
                              }
                              wc += (wc2 << 16);
                        } else if (px->tocodeset & 1) wc = bswap_16(wc);
                  }                       
                  (*outbuf)[0] = (char)((unsigned char)(wc));
                  (*outbuf)[1] = (char)((unsigned char)(wc >> 8));
                  if (inco == 4) {
                        (*outbuf)[2] = (char)((unsigned char)(wc >> 16));
                        (*outbuf)[3] = (char)((unsigned char)(wc >> 24));
                  }
            } else if (px->tocodeset == IC_UTF_8) {
                  const wchar_t *pw = &wc;
                  do {
                        r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
                        if (r != (size_t)(-1)) {
#ifdef __UCLIBC_MJN3_ONLY__
#warning TODO: What happens for a nul?
#endif
                              if (r == 0) {
                                    if (wc != 0) {
                                          goto TOO_BIG;
                                    }
                                    ++r;
                              }
                              break;
                        }
                        wc = 0xfffdU;
                        ++nrcount;
                  } while (1);
                  inco = r;
            } else if (((__uwchar_t)(wc)) < 0x80) {
            CHAR_GOOD:
                        **outbuf = wc;
            } else {
                  if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
                        const __codeset_8_bit_t *c8b
                              = __locale_mmap->codeset_8_bit + px->tocodeset - 3;
                        __uwchar_t u;
                        u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
                        u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
                                     + ((wc >> Cwc2c_TT_SHIFT)
                                          & ((1 << Cwc2c_TI_SHIFT)-1))];
                        wc = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
                                     + (u << Cwc2c_TT_SHIFT)
                                     + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
                        if (wc) {
                              goto CHAR_GOOD;
                        }
                  }
                  **outbuf = '?';
                  ++nrcount;
            }

            *outbuf += inco;
            *outbytesleft -= inco;
      BOM_SKIP_OUTPUT:
            *inbuf += inci;
            *inbytesleft -= inci;
      }
      return nrcount;
}

#endif
/**********************************************************************/
#ifdef L_iconv_main

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <iconv.h>
#include <stdarg.h>
#include <libgen.h>

extern const unsigned char __iconv_codesets[];

#define IBUF BUFSIZ
#define OBUF BUFSIZ

char *progname;
int hide_errors;

static void error_msg(const char *fmt, ...) 
       __attribute__ ((noreturn, format (printf, 1, 2)));

static void error_msg(const char *fmt, ...) 
{
      va_list arg;

      if (!hide_errors) {
            fprintf(stderr, "%s: ", progname);
            va_start(arg, fmt);
            vfprintf(stderr, fmt, arg);
            va_end(arg);
      }

      exit(EXIT_FAILURE);
}

int main(int argc, char **argv)
{
      FILE *ifile;
      FILE *ofile = stdout;
      const char *p;
      const char *s;
      static const char opt_chars[] = "tfocsl";
                                    /* 012345 */
      const char *opts[sizeof(opt_chars)]; /* last is infile name */
      iconv_t ic;
      char ibuf[IBUF];
      char obuf[OBUF];
      char *pi;
      char *po;
      size_t ni, no, r, pos;

      hide_errors = 0;

      for (s = opt_chars ; *s ; s++) {
            opts[ s - opt_chars ] = NULL;
      }

      progname = *argv;
      while (--argc) {
            p = *++argv;
            if ((*p != '-') || (*++p == 0)) {
                  break;
            }
            do {
                  if ((s = strchr(opt_chars,*p)) == NULL) {
                  USAGE:
                        s = basename(progname);
                        fprintf(stderr,
                                    "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
                                    "  or\n%s -l\n", s, s);
                        return EXIT_FAILURE;
                  }
                  if ((s - opt_chars) < 3) {
                        if ((--argc == 0) || opts[s - opt_chars]) {
                              goto USAGE;
                        }
                        opts[s - opt_chars] = *++argv;
                  } else {
                        opts[s - opt_chars] = p;
                  }
            } while (*++p);
      }

      if (opts[5]) {                      /* -l */
            fprintf(stderr, "Recognized codesets:\n");
            for (s = __iconv_codesets ; *s ; s += *s) {
                  fprintf(stderr,"  %s\n", s+2);
            }
            s = __LOCALE_DATA_CODESET_LIST;
            do {
                  fprintf(stderr,"  %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
            } while (*++s);

            return EXIT_SUCCESS;
      }

      if (opts[4]) {
            hide_errors = 1;
      }

      if (!opts[0] || !opts[1]) {
            goto USAGE;
      }
      if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
            error_msg( "unsupported codeset in %s -> %s conversion\n", opts[0], opts[1]);
      }
      if (opts[3]) {                      /* -c */
            ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
      }

      if ((s = opts[2]) != NULL) {
            if (!(ofile = fopen(s, "w"))) {
                  error_msg( "couldn't open %s for writing\n", s);
            }
      }

      pos = ni = 0;
      do {
            if (!argc || ((**argv == '-') && !((*argv)[1]))) {
                  ifile = stdin;          /* we don't check for duplicates */
            } else if (!(ifile = fopen(*argv, "r"))) {
                  error_msg( "couldn't open %s for reading\n", *argv);
            }

            while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
                  pos += r;
                  ni += r;
                  no = OBUF;
                  pi = ibuf;
                  po = obuf;
                  if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
                        if ((errno != EINVAL) && (errno != E2BIG)) {
                              error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
                        }
                  }
                  if ((r = OBUF - no) > 0) {
                        if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
                              error_msg( "write error\n");
                        }
                  }
                  if (ni) {               /* still bytes in buffer! */
                        memmove(ibuf, pi, ni);
                  }
            }

            if (ferror(ifile)) {
                  error_msg( "read error\n");
            }

            ++argv;

            if (ifile != stdin) {
                  fclose(ifile);
            }

      } while (--argc > 0);

      iconv_close(ic);

      if (ni) {
            error_msg( "incomplete sequence\n");
      }

      return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
            ? EXIT_SUCCESS : EXIT_FAILURE;
}

#endif
/**********************************************************************/

Generated by  Doxygen 1.6.0   Back to index