here is repository for python generative text workshop:
Here's the markov.py module:
# Copyright 2005 Brendan Howell # All rights reserved under the GPL: see http://www.fsf.org # markov.py # implements a markov class to generate psuedo text based on # training from input. based on shaney.py by Greg McFarlane # and Joe Strout. # search on "Mark V. Shaney" and "Markov Chains" for more info. import sys import random import cPickle class Markov: def __init__(self): self.dict = {} self.freq = {} self.ends = [] def learn_text(self, learntext): words = learntext.split() prev1 = '' prev2 = '' for word in words: if prev1 != '' and prev2 != '': key = (prev2, prev1) if key in self.dict: self.dict[key].append(word) else: self.dict[key] = [word] if prev1[-1:] == '#': self.ends.append(key) if word in self.freq: self.freq[word] += 1 else: self.freq[word] = 1 prev2 = prev1 prev1 = word def learn_file(self, infile): file = open(infile) text = file.read() file.close() self.learn_text(text) def load_bin(self, binfile): return cPickle.load(open(binfile)) def save_bin(self, outfile): cPickle.dump(self,open(outfile,'w')) def print_sen(self): key = () while True: if key in self.dict: word = random.choice(self.dict[key]) print word, key = (key[1], word) if key in self.ends: print break else: key = random.choice(self.ends) def get_joke(self): key = () rtn = "" while True: if key in self.dict: word = random.choice(self.dict[key]) if word != "#": rtn += word + " " key = (key[1], word) if key in self.ends: rtn += "\n" break else: key = random.choice(self.ends) return rtn def print_sens(self, n): for count in range(n): self.print_sen() def respond(self, intext): n = 3 * random.betavariate(1,3) print self.sens_context(n,intext) def responds(self, intext): n = 3 * random.betavariate(1,3) return self.sens_context(n,intext) def sen_context(self, intext): key = () inwords = intext.split() words = '' while True: if key in self.dict: context = [] for inword in inwords: if inword in self.dict[key]: context.append(inword) if context: word = random.choice(context) else: word = random.choice(self.dict[key]) words += word + ' ' key = (key[1], word) if key in self.ends: break else: key = random.choice(self.ends) return words def sens_context(self, n, intext): words = '' for count in range(n): words += self.sen_context(intext) + ' ' return words def print_sen_context(self, intext): key = () inwords = intext.split() while True: if key in self.dict: context = [] for inword in inwords: if inword in self.dict[key]: context.append(inword) if context: word = random.choice(context) else: word = random.choice(self.dict[key]) print word, key = (key[1], word) if key in self.ends: print break else: key = random.choice(self.ends) def print_sens_context(self, n, intext): for count in range(n): self.print_sen_context(intext)