[ prog / sol / mona ]

prog


[challenge] Try to rewrite SICP [urgent]

11 2022-08-16 20:54

Sliding window predictor, alpha version.

import collections as co
import sys


class Oracle:
   def __init__ (self, source):
      with open (source, "r") as f:
         text = f.read ()

      self.text = text

   def length (self):
      return len (self.text)

   def guess (self, index, char):
      c = self.text [index]
      return True if c == char else c


class Acolyte:
   def __init__ (self, oracle):
      self.oracle = oracle
      self.index  = 0
      self.score  = 0

   def length (self):
      return self.oracle.length ()

   def guess (self, char):
      index    = self.index
      prophecy = self.oracle.guess (index, char)

      if prophecy is True:
         self.score += 1

      self.index += 1
      return prophecy

   def status (self):
      length = self.oracle.length ()
      index  = self.index
      score  = self.score

      return "{}={} {}={} {}={:.6f} {}={} {}={:.6f}".format (
         "length", length,
         "index",  index,
         "cover",  index / length,
         "score",  score,
         "ratio",  score / index if index else 0)


class Temple:
   def __init__ (self, argv):
      pass

   def preach (self, acolyte):
      pass


class TempleOfSpace (Temple):
   def preach (self, acolyte):
      guess = acolyte.guess

      for k in range (acolyte.length ()):
         guess (' ')


class TempleOfSlidingWindow (Temple):
   def __init__ (self, argv):
      self.window = int (argv [0])

   def preach (self, acolyte):
      window  = self.window
      length  = acolyte.length ()
      if length < window: return
      guess   = acolyte.guess
      predict = {}

      start = ' '
      key   = ''.join (
         map (lambda charguess: charguess [0] if charguess [1] is True else charguess [1],
         map (lambda k: (start, guess (start)),
         range (window))))

      for k in range (window, length):
         counts = predict.get (key)

         if counts is None:
            gu  = guess (start)
            ch  = start if gu is True else gu
            predict [key] = co.Counter ([ch])
            key = key [1:] + ch
         else:
            pred = counts.most_common (1) [0] [0]
            gu   = guess (pred)
            ch   = pred if gu is True else gu
            counts [ch] += 1
            key = key [1:] + ch

      print ("{}={}".format (
         "predict", len (predict)))


def work_temple (src, rest):
   temple  = TempleOfSlidingWindow (rest)
#  temple  = TempleOfSpace (rest)
   oracle  = Oracle (src)
   acolyte = Acolyte (oracle)
   temple.preach (acolyte)
   print (acolyte.status ())

def main_temple (argv):
   src  = argv [0]
   rest = argv [1:]
   work_temple (src, rest)


def main ():
   globals () ["main_" + sys.argv [1]] (sys.argv [2:])

if __name__ == "__main__": main ()

Predictor runs for various window sizes:

$ python3 -m flake.predictext temple o20u.md 2
predict=3096
length=1223834 index=1223834 cover=1.000000 score=546526 ratio=0.446569
$ python3 -m flake.predictext temple o20u.md 3
predict=17927
length=1223834 index=1223834 cover=1.000000 score=712032 ratio=0.581804
$ python3 -m flake.predictext temple o20u.md 4
predict=54483
length=1223834 index=1223834 cover=1.000000 score=778871 ratio=0.636419
$ python3 -m flake.predictext temple o20u.md 5
predict=115721
length=1223834 index=1223834 cover=1.000000 score=784534 ratio=0.641046
$ python3 -m flake.predictext temple o20u.md 6
predict=195485
length=1223834 index=1223834 cover=1.000000 score=761032 ratio=0.621843
$ python3 -m flake.predictext temple o20u.md 7
predict=286261
length=1223834 index=1223834 cover=1.000000 score=723739 ratio=0.591370

best window size: 5
score=784534 ratio=0.641046

14


VIP:

do not edit these