add oversampler

2024-05-24 13:28:31 +02:00
parent e4a4a661a0
commit 989dba5a6b
484 changed files with 313937 additions and 0 deletions
--- a/oversampling/WDL/wdlutf8.h
+++ b/oversampling/WDL/wdlutf8.h
@@ -0,0 +1,317 @@
+/*
+WDL - wdlutf8.h
+Copyright (C) 2005 and later, Cockos Incorporated
+
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+claim that you wrote the original software. If you use this software
+in a product, an acknowledgment in the product documentation would be
+appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _WDLUTF8_H_
+#define _WDLUTF8_H_
+
+/* todo: handle overlongs?
+ * todo: handle multi-byte (make WideStr support UTF-16)
+ */
+
+#include "wdltypes.h"
+
+#ifndef WDL_WCHAR
+  #ifdef _WIN32
+    #define WDL_WCHAR WCHAR
+  #else
+    // this is often 4 bytes on macOS/linux! beware dragons!
+    #define WDL_WCHAR wchar_t
+  #endif
+#endif
+
+
+// returns size, sets cOut to code point. 
+// if invalid UTF-8, sets cOut to first character (as unsigned char).
+// cOut may be NULL if you only want the size of the character
+static int WDL_STATICFUNC_UNUSED wdl_utf8_parsechar(const char *rd, int *cOut) 
+{
+  const unsigned char *p = (const unsigned char *)rd;
+  const unsigned char b0 = *p;
+  unsigned char b1,b2,b3;
+
+  if (cOut) *cOut = b0;
+  if (b0 < 0x80) 
+  {
+    return 1;
+  }
+  if (((b1=p[1])&0xC0) != 0x80) return 1;
+
+  if (b0 < 0xE0)
+  {
+    if (!(b0&0x1E)) return 1; // detect overlong
+    if (cOut) *cOut = ((b0&0x1F)<<6)|(b1&0x3F);
+    return 2;
+  }
+
+  if (((b2=p[2])&0xC0) != 0x80) return 1;
+
+  if (b0 < 0xF0)
+  {
+    if (!(b0&0xF) && !(b1&0x20)) return 1; // detect overlong
+
+    if (cOut) *cOut = ((b0&0x0F)<<12)|((b1&0x3F)<<6)|(b2&0x3f);
+    return 3;
+  }
+
+  if (((b3=p[3])&0xC0) != 0x80) return 1;
+
+  if (b0 < 0xF8)
+  {
+    if (!(b0&0x7) && !(b1&0x30)) return 1; // detect overlong
+
+    if (cOut) *cOut = ((b0&7)<<18)|((b1&0x3F)<<12)|((b2&0x3F)<<6)|(b3&0x3F);
+    return 4;
+  }
+
+  // UTF-8 does not actually support 5-6 byte sequences as of 2003 (RFC-3629)
+  // skip them and return _
+  if ((p[4]&0xC0) != 0x80) return 1;
+  if (b0 < 0xFC) 
+  {
+    if (cOut) *cOut = '_';
+    return 5;
+  }
+
+  if ((p[5]&0xC0) != 0x80) return 1;
+  if (cOut) *cOut = '_';
+  return 6;
+}
+
+
+// makes a character, returns length. does NOT nul terminate.
+// returns 0 if insufficient space, -1 if out of range value
+static int WDL_STATICFUNC_UNUSED wdl_utf8_makechar(int c, char *dest, int dest_len)
+{
+  if (c < 0) return -1; // out of range character
+
+  if (c < 0x80)
+  {
+    if (dest_len<1) return 0;
+    dest[0]=(char)c;
+    return 1;
+  }  
+  if (c < 0x800)
+  {
+    if (dest_len < 2) return 0;
+
+    dest[0]=0xC0|(c>>6);
+    dest[1]=0x80|(c&0x3F);
+    return 2;
+  }
+  if (c < 0x10000)
+  {
+    if (dest_len < 3) return 0;
+
+    dest[0]=0xE0|(c>>12);
+    dest[1]=0x80|((c>>6)&0x3F);
+    dest[2]=0x80|(c&0x3F);
+    return 3;
+  }
+  if (c < 0x200000)
+  {
+    if (dest_len < 4) return 0;
+    dest[0]=0xF0|(c>>18);
+    dest[1]=0x80|((c>>12)&0x3F);
+    dest[2]=0x80|((c>>6)&0x3F);
+    dest[3]=0x80|(c&0x3F);
+    return 4;
+  }
+
+  return -1;
+}
+
+
+// invalid UTF-8 are now treated as ANSI characters for this function
+static int WDL_STATICFUNC_UNUSED WDL_MBtoWideStr(WDL_WCHAR *dest, const char *src, int destlenbytes)
+{
+  WDL_WCHAR *w = dest, *dest_endp = dest+(size_t)destlenbytes/sizeof(WDL_WCHAR)-1;
+  if (!dest || destlenbytes < 1) return 0;
+
+  if (src) for (; *src && w < dest_endp; )
+  {
+    int c,sz=wdl_utf8_parsechar(src,&c);
+    *w++ = c;
+    src+=sz;
+  }
+  *w=0; 
+  return (int)(w-dest);
+}
+
+
+// like wdl_utf8_makechar, except nul terminates and handles errors differently (returns _ and 1 on errors)
+// negative values for character are treated as 0.
+static int WDL_STATICFUNC_UNUSED WDL_MakeUTFChar(char* dest, int c, int destlen)
+{
+  if (destlen < 2)
+  {
+    if (destlen == 1) dest[0]=0;
+    return 0;
+  }
+  else
+  {
+    const int v = wdl_utf8_makechar(c>0?c:0,dest,destlen-1);
+    if (v < 1) // implies either insufficient space or out of range character
+    {
+      dest[0]='_';
+      dest[1]=0;
+      return 1;
+    }
+    dest[v]=0;
+    return v;
+  }
+}
+
+static int WDL_STATICFUNC_UNUSED WDL_WideToMBStr(char *dest, const WDL_WCHAR *src, int destlenbytes)
+{
+  char *p = dest, *dest_endp = dest + destlenbytes - 1;
+  if (!dest || destlenbytes < 1) return 0;
+
+  if (src) while (*src && p < dest_endp)
+  {
+    const int v = wdl_utf8_makechar(*src++,p,(int)(dest_endp-p));
+    if (v > 0)
+    {
+      p += v;
+    }
+    else if (v == 0) break; // out of space
+  }
+  *p=0;
+  return (int)(p-dest);
+}
+
+// returns >0 if UTF-8, -1 if 8-bit chars occur that are not UTF-8, or 0 if ASCII
+static int WDL_STATICFUNC_UNUSED WDL_DetectUTF8(const char *str)
+{
+  int hasUTF=0;
+
+  if (!str) return 0;
+  
+  for (;;)
+  {
+    const unsigned char c = *(const unsigned char *)str;
+
+    if (c < 0xC2 || c > 0xF7) 
+    {
+      if (!c) return hasUTF;
+      if (c >= 0x80) return -1;
+      str++;
+    }
+    else
+    {
+      const int l = wdl_utf8_parsechar(str,NULL);
+      if (l < 2) return -1; // wdl_utf8_parsechar returns length=1 if it couldn't parse UTF-8 properly
+      str+=l;
+      hasUTF=1;
+    }
+  }
+}
+
+
+static int WDL_STATICFUNC_UNUSED WDL_utf8_charpos_to_bytepos(const char *str, int charpos)
+{
+  int bpos = 0;
+  while (charpos-- > 0 && str[bpos])
+  {
+    bpos += wdl_utf8_parsechar(str+bpos,NULL);
+  }
+  return bpos;
+}
+static int WDL_STATICFUNC_UNUSED WDL_utf8_bytepos_to_charpos(const char *str, int bytepos)
+{
+  int bpos = 0, cpos=0;
+  while (bpos < bytepos && str[bpos])
+  {
+    bpos += wdl_utf8_parsechar(str+bpos,NULL);
+    cpos++;
+  }
+  return cpos;
+}
+
+#define WDL_utf8_get_charlen(rd) WDL_utf8_bytepos_to_charpos((rd), 0x7fffffff)
+
+static void WDL_STATICFUNC_UNUSED wdl_utf8_set_char_case(char *p, int upper) // upper 1 or -1 only
+{
+  const unsigned char c1 = (unsigned char)*p;
+  WDL_ASSERT(upper == 1 || upper == -1);
+  if (c1 >= 'a' && c1 <= 'z')
+  {
+    if (upper>0) *p += 'A'-'a';
+  }
+  else if (c1 >= 'A' && c1 <= 'Z')
+  {
+    if (upper<0) *p -= 'A'-'a';
+  }
+  else if (c1 >= 0x80)
+  {
+    const unsigned char cc = (unsigned char)p[1] - 0x80;
+    switch (c1)
+    {
+      case 0xc3: // u+0c0 to u+0ff as 0..0x3f
+        if ((cc&~0x20) != 0x17) // all values except 0xc7 and 0xf7
+        {
+          if (upper>0) p[1] &= ~0x20;
+          else p[1] |= 0x20;
+        }
+      break;
+      case 0xc4: // u+100 to u+13f
+        if (cc <= 0x37)
+        {
+          // u+100 to u+137 low bit is lowercase
+          if (upper>0) p[1] &= ~1;
+          else p[1] |= 1;
+        }
+        // u+138 is not cased
+        else if (cc >= 0x39 && cc < 0x3f)
+        {
+          // u+139 to u+13e, odd is uppercase
+          if ((cc & 1) != (upper>0)) p[1] -= upper;
+        }
+        else if (cc == 0x3f && upper<0) // u+139 convert to u+140
+        {
+          p[0]++;
+          p[1] -= 0x3f;
+        }
+      break;
+      case 0xc5: // u+140 to u+17f
+        // u+149 and u+178 and u+17f are not cased
+        if (cc == 0 && upper>0) // u+140 -> u+13f
+        {
+          p[0]--;
+          p[1] |= 0x3f;
+        }
+        else if (cc >= 0xa && cc <= 0x37) // u+14a to u+177 low bit is lowercase
+        {
+          if (upper>0) p[1] &= ~1;
+          else p[1] |= 1;
+        }
+        else if ((cc > 0 && cc <= 8) || (cc >= 0x39 && cc <= 0x3e))
+        {
+          // u+141 to u+148 and u+179 to u+17e have odd=uppercase
+          if ((cc & 1) != (upper>0)) p[1] -= upper;
+        }
+      break;
+    }
+  }
+}
+
+
+#endif