Sliding window predictor, alpha version.
import collections as co
import sys
class Oracle:
def __init__ (self, source):
with open (source, "r") as f:
text = f.read ()
self.text = text
def length (self):
return len (self.text)
def guess (self, index, char):
c = self.text [index]
return True if c == char else c
class Acolyte:
def __init__ (self, oracle):
self.oracle = oracle
self.index = 0
self.score = 0
def length (self):
return self.oracle.length ()
def guess (self, char):
index = self.index
prophecy = self.oracle.guess (index, char)
if prophecy is True:
self.score += 1
self.index += 1
return prophecy
def status (self):
length = self.oracle.length ()
index = self.index
score = self.score
return "{}={} {}={} {}={:.6f} {}={} {}={:.6f}".format (
"length", length,
"index", index,
"cover", index / length,
"score", score,
"ratio", score / index if index else 0)
class Temple:
def __init__ (self, argv):
pass
def preach (self, acolyte):
pass
class TempleOfSpace (Temple):
def preach (self, acolyte):
guess = acolyte.guess
for k in range (acolyte.length ()):
guess (' ')
class TempleOfSlidingWindow (Temple):
def __init__ (self, argv):
self.window = int (argv [0])
def preach (self, acolyte):
window = self.window
length = acolyte.length ()
if length < window: return
guess = acolyte.guess
predict = {}
start = ' '
key = ''.join (
map (lambda charguess: charguess [0] if charguess [1] is True else charguess [1],
map (lambda k: (start, guess (start)),
range (window))))
for k in range (window, length):
counts = predict.get (key)
if counts is None:
gu = guess (start)
ch = start if gu is True else gu
predict [key] = co.Counter ([ch])
key = key [1:] + ch
else:
pred = counts.most_common (1) [0] [0]
gu = guess (pred)
ch = pred if gu is True else gu
counts [ch] += 1
key = key [1:] + ch
print ("{}={}".format (
"predict", len (predict)))
def work_temple (src, rest):
temple = TempleOfSlidingWindow (rest)
# temple = TempleOfSpace (rest)
oracle = Oracle (src)
acolyte = Acolyte (oracle)
temple.preach (acolyte)
print (acolyte.status ())
def main_temple (argv):
src = argv [0]
rest = argv [1:]
work_temple (src, rest)
def main ():
globals () ["main_" + sys.argv [1]] (sys.argv [2:])
if __name__ == "__main__": main ()
Predictor runs for various window sizes:
$ python3 -m flake.predictext temple o20u.md 2
predict=3096
length=1223834 index=1223834 cover=1.000000 score=546526 ratio=0.446569
$ python3 -m flake.predictext temple o20u.md 3
predict=17927
length=1223834 index=1223834 cover=1.000000 score=712032 ratio=0.581804
$ python3 -m flake.predictext temple o20u.md 4
predict=54483
length=1223834 index=1223834 cover=1.000000 score=778871 ratio=0.636419
$ python3 -m flake.predictext temple o20u.md 5
predict=115721
length=1223834 index=1223834 cover=1.000000 score=784534 ratio=0.641046
$ python3 -m flake.predictext temple o20u.md 6
predict=195485
length=1223834 index=1223834 cover=1.000000 score=761032 ratio=0.621843
$ python3 -m flake.predictext temple o20u.md 7
predict=286261
length=1223834 index=1223834 cover=1.000000 score=723739 ratio=0.591370
best window size: 5
score=784534 ratio=0.641046