sync code with last improvements from OpenBSD

2023-08-28 05:57:34 +00:00 · 2023-08-28 05:57:34 +00:00 · 88965415ff
commit 88965415ff
26235 changed files with 29195616 additions and 0 deletions
--- a/dist/fontconfig/fc-case/fc-case.py
+++ b/dist/fontconfig/fc-case/fc-case.py
@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+#
+# fontconfig/fc-case/fc-case.py
+#
+# Copyright © 2004 Keith Packard
+# Copyright © 2019 Tim-Philipp Müller
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of the author(s) not be used in
+# advertising or publicity pertaining to distribution of the software without
+# specific, written prior permission.  The authors make no
+# representations about the suitability of this software for any purpose.  It
+# is provided "as is" without express or implied warranty.
+#
+# THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+# EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+from enum import Enum
+import argparse
+import string
+import sys
+
+class CaseFoldClass(Enum):
+    COMMON = 1
+    FULL = 2
+    SIMPLE = 3
+    TURKIC = 4
+
+class CaseFoldMethod(Enum):
+    RANGE = 0
+    EVEN_ODD = 1
+    FULL = 2
+
+caseFoldClassMap = {
+  'C' : CaseFoldClass.COMMON,
+  'F' : CaseFoldClass.FULL,
+  'S' : CaseFoldClass.SIMPLE,
+  'T' : CaseFoldClass.TURKIC
+}
+
+folds = []
+
+def ucs4_to_utf8(ucs4):
+    utf8_rep = []
+    
+    if ucs4 < 0x80:
+        utf8_rep.append(ucs4)
+        bits = -6
+    elif ucs4 < 0x800:
+        utf8_rep.append(((ucs4 >> 6) & 0x1F) | 0xC0)
+        bits = 0
+    elif ucs4 < 0x10000:
+        utf8_rep.append(((ucs4 >> 12) & 0x0F) | 0xE0)
+        bits = 6
+    elif ucs4 < 0x200000:
+        utf8_rep.append(((ucs4 >> 18) & 0x07) | 0xF0)
+        bits = 12
+    elif ucs4 < 0x4000000:
+        utf8_rep.append(((ucs4 >> 24) & 0x03) | 0xF8)
+        bits = 18
+    elif ucs4 < 0x80000000:
+        utf8_rep.append(((ucs4 >> 30) & 0x01) | 0xFC)
+        bits = 24
+    else:
+        return [];
+
+    while bits >= 0:
+        utf8_rep.append(((ucs4 >> bits) & 0x3F) | 0x80)
+        bits-= 6
+
+    return utf8_rep
+
+def utf8_size(ucs4):
+    return len(ucs4_to_utf8(ucs4))
+
+case_fold_method_name_map = {
+    CaseFoldMethod.RANGE: 'FC_CASE_FOLD_RANGE,',
+    CaseFoldMethod.EVEN_ODD: 'FC_CASE_FOLD_EVEN_ODD,',
+    CaseFoldMethod.FULL: 'FC_CASE_FOLD_FULL,',
+}
+
+if __name__=='__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('case_folding_file')
+    parser.add_argument('--template', dest='template_file', default=None)
+    parser.add_argument('--output', dest='output_file', default=None)
+
+    args = parser.parse_args()
+
+    minFoldChar = None
+    maxFoldChar = None
+    fold = None
+
+    foldChars = []
+    maxFoldChars = 0
+
+    maxExpand = 0
+
+    # Read the standard Unicode CaseFolding.txt file
+    with open(args.case_folding_file, 'r', encoding='utf-8') as casefile:
+        for cnt, line in enumerate(casefile):
+            if not line or not line[0] in string.hexdigits:
+                continue
+
+            # print('Line {}: {}'.format(cnt, line.strip()))
+
+            tokens = line.split('; ')
+
+            if len(tokens) < 3:
+                print('Not enough tokens in line {}'.format(cnt), file=sys.stderr)
+                sys.exit(1)
+
+            # Get upper case value
+            upper = int(tokens.pop(0), 16)
+
+            # Get class
+            cfclass = caseFoldClassMap[tokens.pop(0)]
+
+            # Get list of result characters
+            lower = list(map(lambda s: int(s,16), tokens.pop(0).split()))
+
+            # print('\t----> {:04X} {} {}'.format(upper, cfclass, lower))
+
+            if not minFoldChar:
+                minFoldChar = upper
+
+            maxFoldChar = upper;
+
+            if cfclass in [CaseFoldClass.COMMON, CaseFoldClass.FULL]:
+                if len(lower) == 1:
+                    # foldExtends
+                    if fold and fold['method'] == CaseFoldMethod.RANGE:
+                        foldExtends = (lower[0] - upper) == fold['offset'] and upper == fold['upper'] + fold['count']
+                    elif fold and fold['method'] == CaseFoldMethod.EVEN_ODD:
+                        foldExtends = (lower[0] - upper) == 1 and upper == (fold['upper'] + fold['count'] + 1)
+                    else:
+                        foldExtends = False
+
+                    if foldExtends:
+                        # This modifies the last fold item in the array too
+                        fold['count'] = upper - fold['upper'] + 1;
+                    else:
+                        fold = {}
+                        fold['upper'] = upper
+                        fold['offset'] = lower[0] - upper;
+                        if fold['offset'] == 1:
+                            fold['method'] = CaseFoldMethod.EVEN_ODD
+                        else:
+                            fold['method'] = CaseFoldMethod.RANGE
+                        fold['count'] = 1
+                        folds.append(fold)
+                    expand = utf8_size (lower[0]) - utf8_size(upper)
+                else:
+                    fold = {}
+                    fold['upper'] = upper
+                    fold['method'] = CaseFoldMethod.FULL
+                    fold['offset'] = len(foldChars)
+
+                    # add chars
+                    for c in lower:
+                        utf8_rep = ucs4_to_utf8(c)
+                        # print('{} -> {}'.format(c,utf8_rep))
+                        for utf8_char in utf8_rep:
+                            foldChars.append(utf8_char)
+
+                    fold['count'] = len(foldChars) - fold['offset']
+                    folds.append(fold)
+
+                    if fold['count'] > maxFoldChars:
+                        maxFoldChars = fold['count']
+
+                    expand = fold['count'] - utf8_size(upper)
+                    if expand > maxExpand:
+                        maxExpand = expand
+
+    # Open output file
+    if args.output_file:
+        sys.stdout = open(args.output_file, 'w', encoding='utf-8')
+
+    # Read the template file
+    if args.template_file:
+        tmpl_file = open(args.template_file, 'r', encoding='utf-8')
+    else:
+        tmpl_file = sys.stdin
+    
+    # Scan the input until the marker is found
+    # FIXME: this is a bit silly really, might just as well harcode
+    #        the license header in the script and drop the template
+    for line in tmpl_file:
+        if line.strip() == '@@@':
+            break
+        print(line, end='')
+    
+    # Dump these tables
+    print('#define FC_NUM_CASE_FOLD\t{}'.format(len(folds)))
+    print('#define FC_NUM_CASE_FOLD_CHARS\t{}'.format(len(foldChars)))
+    print('#define FC_MAX_CASE_FOLD_CHARS\t{}'.format(maxFoldChars))
+    print('#define FC_MAX_CASE_FOLD_EXPAND\t{}'.format(maxExpand))
+    print('#define FC_MIN_FOLD_CHAR\t0x{:08x}'.format(minFoldChar))
+    print('#define FC_MAX_FOLD_CHAR\t0x{:08x}'.format(maxFoldChar))
+    print('')
+
+    # Dump out ranges
+    print('static const FcCaseFold    fcCaseFold[FC_NUM_CASE_FOLD] = {')
+    for f in folds:
+         short_offset = f['offset']
+         if short_offset < -32367:
+             short_offset += 65536
+         if short_offset > 32368:
+             short_offset -= 65536
+         print('    {} 0x{:08x}, {:22s} 0x{:04x}, {:6d} {},'.format('{',
+               f['upper'], case_fold_method_name_map[f['method']],
+               f['count'], short_offset, '}'))
+    print('};\n')
+
+    # Dump out "other" values
+    print('static const FcChar8\tfcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {')
+    for n, c in enumerate(foldChars):
+        if n == len(foldChars) - 1:
+            end = ''
+        elif n % 16 == 15:
+            end = ',\n'
+        else:
+            end = ','
+        print('0x{:02x}'.format(c), end=end)
+    print('\n};')
+
+    # And flush out the rest of the input file
+    for line in tmpl_file:
+        print(line, end='')
+    
+    sys.stdout.flush()