SecBSD's official ports repository
This commit is contained in:
commit
2c0afcbbf3
64331 changed files with 5339189 additions and 0 deletions
90
inputmethods/libkkc-data/patches/patch-sortlm_py
Normal file
90
inputmethods/libkkc-data/patches/patch-sortlm_py
Normal file
|
@ -0,0 +1,90 @@
|
|||
--- tools/sortlm.py.orig Mon Jul 29 09:53:28 2013
|
||||
+++ tools/sortlm.py Thu Sep 23 10:02:37 2021
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
-# Copyright (C) 2011-2013 Daiki Ueno <ueno@gnu.org>
|
||||
-# Copyright (C) 2011-2013 Red Hat, Inc.
|
||||
+# Copyright (C) 2011-2014 Daiki Ueno <ueno@gnu.org>
|
||||
+# Copyright (C) 2011-2014 Red Hat, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -40,10 +40,10 @@ class SortedGenerator(object):
|
||||
self.__min_cost = 0.0
|
||||
|
||||
def read(self):
|
||||
- print "reading N-grams"
|
||||
+ print("reading N-grams")
|
||||
self.__read_tries()
|
||||
self.__read_ngrams()
|
||||
- print "min cost = %lf" % self.__min_cost
|
||||
+ print("min cost = %lf" % self.__min_cost)
|
||||
|
||||
def __read_tries(self):
|
||||
while True:
|
||||
@@ -58,7 +58,7 @@ class SortedGenerator(object):
|
||||
line = self.__infile.readline()
|
||||
if line == "":
|
||||
break
|
||||
- line = line.strip()
|
||||
+ line = line.strip('\n')
|
||||
if line == "":
|
||||
break
|
||||
match = self.__ngram_line_regex.match(line)
|
||||
@@ -89,7 +89,7 @@ class SortedGenerator(object):
|
||||
line = self.__infile.readline()
|
||||
if line == "":
|
||||
break
|
||||
- line = line.strip()
|
||||
+ line = line.strip('\n')
|
||||
if line == "":
|
||||
break
|
||||
match = self.__ngram_line_regex.match(line)
|
||||
@@ -125,14 +125,11 @@ class SortedGenerator(object):
|
||||
def quantize(cost, min_cost):
|
||||
return max(0, min(65535, int(cost * 65535 / min_cost)))
|
||||
|
||||
- def cmp_header(a, b):
|
||||
- return cmp(a[0], b[0])
|
||||
-
|
||||
- print "writing 1-gram file"
|
||||
+ print("writing 1-gram file")
|
||||
unigram_offsets = {}
|
||||
unigram_file = open("%s.1gram" % self.__output_prefix, "wb")
|
||||
offset = 0
|
||||
- for ids, value in sorted(self.__ngram_entries[0].iteritems()):
|
||||
+ for ids, value in sorted(self.__ngram_entries[0].items()):
|
||||
unigram_offsets[ids[0]] = offset
|
||||
s = struct.pack("=HHH",
|
||||
quantize(value[0], self.__min_cost),
|
||||
@@ -143,13 +140,13 @@ class SortedGenerator(object):
|
||||
offset += 1
|
||||
unigram_file.close()
|
||||
|
||||
- print "writing 2-gram file"
|
||||
+ print("writing 2-gram file")
|
||||
bigram_offsets = {}
|
||||
bigram_file = open("%s.2gram" % self.__output_prefix, "wb")
|
||||
keys = self.__ngram_entries[1].keys()
|
||||
items = [(struct.pack("=LL", ids[1], unigram_offsets[ids[0]]), ids) for ids in keys]
|
||||
offset = 0
|
||||
- for header, ids in sorted(items, cmp=cmp_header):
|
||||
+ for header, ids in sorted(items, key=lambda x: x[0]):
|
||||
value = self.__ngram_entries[1][ids]
|
||||
bigram_offsets[ids] = offset
|
||||
s = struct.pack("=HH",
|
||||
@@ -160,11 +157,11 @@ class SortedGenerator(object):
|
||||
bigram_file.close()
|
||||
|
||||
if len(self.__ngram_entries[2]) > 0:
|
||||
- print "writing 3-gram file"
|
||||
+ print("writing 3-gram file")
|
||||
trigram_file = open("%s.3gram" % self.__output_prefix, "wb")
|
||||
keys = self.__ngram_entries[2].keys()
|
||||
items = [(struct.pack("=LL", ids[2], bigram_offsets[(ids[0], ids[1])]), ids) for ids in keys]
|
||||
- for header, ids in sorted(items, cmp=cmp_header):
|
||||
+ for header, ids in sorted(items, key=lambda x: x[0]):
|
||||
value = self.__ngram_entries[2][ids]
|
||||
s = struct.pack("=H",
|
||||
quantize(value[0], self.__min_cost))
|
Loading…
Add table
Add a link
Reference in a new issue