File:  [DVB] / dietlibc / lib / mbrtoc32.c
Revision 1.1: download - view: text, annotated - select for diffs
Thu Oct 3 15:39:15 2019 UTC (4 years, 8 months ago) by leitner
Branches: MAIN
CVS tags: HEAD
  add stdalign.h
  add uchar.h and c16rtomb, c32rtomb, mbrtoc16, mbrtoc32
  add static_assert to <assert.h>

#include <uchar.h>
#include <errno.h>
#include "dietmb.h"

/* Read one UTF-8 character from s, but consume at most n bytes.
 * If that is only a partial UTF-8 sequence, return -2.
 * If it is an invalid UTF-8 sequence, set errno to EILSEQ and return -1.
 * If it is 0, then return 0.
 * Write that character to pwd if it is not NULL, return number of bytes
 * consumed from s.
 * We do not need (or use) the state.
 */

size_t mbrtoc32(char32_t *restrict pwc, const char *restrict s, size_t n, mbstate_t *restrict ps) {
  /* read one character */
  uint32_t x;
  ssize_t r=__mbrtox(s,n,&x);
  if (r<0) {
    if (r==-1) errno=EILSEQ;
    return r;
  }
  if (x==0) return 0;
  /* the surrogate pair ranges are reserved in input */
  if (x>=0xd800 && x<=0xdfff) {
    errno=EILSEQ;
    return -1;
  }
  if (pwc) *pwc=x;
  return r;
}

#ifdef UNITTEST
#include <assert.h>
#include <string.h>

#undef UNITTEST
#include "lib/mbrtox.c"

int main() {
  char32_t x;
  mbstate_t s = { 0 };
  /* first some positive tests */
  assert(mbrtoc32(&x,"\x7a",1,&s)==1 && x==0x7a);
  assert(mbrtoc32(&x,"\xc3\x9f",2,&s)==2 && x==0xdf);
  assert(mbrtoc32(&x,"\xe6\xb0\xb4",3,&s)==3 && x==0x6c34);
  assert(mbrtoc32(&x,"\xf0\x9f\x8d\x8c",4,&s)==4 && x==0x1F34C);

  /* now some negative tests */
  errno=0; memset(&s,0,sizeof(s)); assert(mbrtoc32(&x,"\x8f",1,&s)==-1 && errno==EILSEQ);	// first byte continuation
  errno=0; memset(&s,0,sizeof(s)); assert(mbrtoc32(&x,"\xc3",1,&s)==-2);		// incomplete sequence
  errno=0; memset(&s,0,sizeof(s)); assert(mbrtoc32(&x,"\xe6\xb0",2,&s)==-2);		// incomplete sequence
  errno=0; memset(&s,0,sizeof(s)); assert(mbrtoc32(&x,"\xe6 ",2,&s)==-1);		// invalid and incomplete, expect invalid
  errno=0; memset(&s,0,sizeof(s)); assert(mbrtoc32(&x,"\xfe\xff",2,&s)==-1);		// BOM not valid in UTF-8
}
#endif

LinuxTV legacy CVS <linuxtv.org/cvs>