ports/inputmethods/libkkc-data/patches/patch-sortlm_py

--- tools/sortlm.py.orig	Mon Jul 29 09:53:28 2013
+++ tools/sortlm.py	Thu Sep 23 10:02:37 2021
@@ -1,7 +1,7 @@
 #!/usr/bin/python

-# Copyright (C) 2011-2013 Daiki Ueno <ueno@gnu.org>
-# Copyright (C) 2011-2013 Red Hat, Inc.
+# Copyright (C) 2011-2014 Daiki Ueno <ueno@gnu.org>
+# Copyright (C) 2011-2014 Red Hat, Inc.

 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -40,10 +40,10 @@ class SortedGenerator(object):
         self.__min_cost = 0.0

     def read(self):
-        print "reading N-grams"
+        print("reading N-grams")
         self.__read_tries()
         self.__read_ngrams()
-        print "min cost = %lf" % self.__min_cost
+        print("min cost = %lf" % self.__min_cost)

     def __read_tries(self):
         while True:
@@ -58,7 +58,7 @@ class SortedGenerator(object):
             line = self.__infile.readline()
             if line == "":
                 break
-            line = line.strip()
+            line = line.strip('\n')
             if line == "":
                 break
             match = self.__ngram_line_regex.match(line)
@@ -89,7 +89,7 @@ class SortedGenerator(object):
                 line = self.__infile.readline()
                 if line == "":
                     break
-                line = line.strip()
+                line = line.strip('\n')
                 if line == "":
                     break
                 match = self.__ngram_line_regex.match(line)
@@ -125,14 +125,11 @@ class SortedGenerator(object):
         def quantize(cost, min_cost):
             return max(0, min(65535, int(cost * 65535 / min_cost)))

-        def cmp_header(a, b):
-            return cmp(a[0], b[0])
-
-        print "writing 1-gram file"
+        print("writing 1-gram file")
         unigram_offsets = {}
         unigram_file = open("%s.1gram" % self.__output_prefix, "wb")
         offset = 0
-        for ids, value in sorted(self.__ngram_entries[0].iteritems()):
+        for ids, value in sorted(self.__ngram_entries[0].items()):
             unigram_offsets[ids[0]] = offset
             s = struct.pack("=HHH",
                             quantize(value[0], self.__min_cost),
@@ -143,13 +140,13 @@ class SortedGenerator(object):
             offset += 1
         unigram_file.close()

-        print "writing 2-gram file"
+        print("writing 2-gram file")
         bigram_offsets = {}
         bigram_file = open("%s.2gram" % self.__output_prefix, "wb")
         keys = self.__ngram_entries[1].keys()
         items = [(struct.pack("=LL", ids[1], unigram_offsets[ids[0]]), ids) for ids in keys]
         offset = 0
-        for header, ids in sorted(items, cmp=cmp_header):
+        for header, ids in sorted(items, key=lambda x: x[0]):
             value = self.__ngram_entries[1][ids]
             bigram_offsets[ids] = offset
             s = struct.pack("=HH",
@@ -160,11 +157,11 @@ class SortedGenerator(object):
         bigram_file.close()

         if len(self.__ngram_entries[2]) > 0:
-            print "writing 3-gram file"
+            print("writing 3-gram file")
             trigram_file = open("%s.3gram" % self.__output_prefix, "wb")
             keys = self.__ngram_entries[2].keys()
             items = [(struct.pack("=LL", ids[2], bigram_offsets[(ids[0], ids[1])]), ids) for ids in keys]
-            for header, ids in sorted(items, cmp=cmp_header):
+            for header, ids in sorted(items, key=lambda x: x[0]):
                 value = self.__ngram_entries[2][ids]
                 s = struct.pack("=H",
                                 quantize(value[0], self.__min_cost))