#!/usr/bin/python # User, google "1 in" to find other configurable odds, for mutations etc # How many words in the source text get mutated prior to being learned can # be changed this way it's around 1 in 60 words and 1 in 100 lines by default # I'm not sure if this hangs up sometimes or what. I'm trying to figure it # out. Just ctrl-c it if you don't see any new lines on stderr after a while. # But I should have that squashed soon if not already... # Command-line args # -showkills # -showbreeds # To alert to kills and breeds of word pairs in the output document itself # Hey try reading the output in Ace High Text To Speech for windows or # anything that reads the misspelled words doesnt spell them # there are only like 10 or so breeds or kills per 500 lines of input # sometimes so make a big corpus but ace high text to speech might not # load more than 10,000 or 20,000 lines or something. not sure. this # GA is weird I'll try to tune it to get more of both which is actually # really hard. it runs on the output of the markov chainer not the # lines of input really but there are more lines of markov output than # there are lines of input so I guess I really dont know for now # let it learn words in some table so it can brute force fuzz words until they match another one it knows and # like 1000 times or something, i don't know, and switch words to similar real words some of the time # yay i can put it in a function not drop it in the body i guess i'll just call it where i would have added it # sure # ok also just add random links to and from lucky words to get them said at # random points # just add/remove letters reverse some words yeah do that reverse em hurry # do syllables swaps # check these each when you do em # maybe i should protect the word ufo # maybe set word pairs default scores lower so they have to hide a while import random, sys, re, os if sys.argv.__contains__("-showbreeds"): showbreeds = 1 else: showbreeds = 0 if sys.argv.__contains__("-showkills"): showkills = 1 else: showkills = 0 if sys.argv.__contains__("-h") or sys.argv.__contains__("--help"): print "Usage:\n" print "cat text.txt | ./markov-attract-repel.py [-showkills] [-showbreeds] > output.txt" sys.exit() #random.seed(100) lvowels = 'aeiouy' uvowels = 'AEIOUY' lconso = 'bcdfghjklmnpqrstvwxyz' uconso = lconso.upper() # these are used a couple places skipwords = ['of','that','this','by','and','or','to','the','a','on','them'] # dont mutate these safewordpairs = [('.','.'),('.','\n'),('?','\n'),('!','\n')] # dont score and att/rep these # ^ these are safe from getting deleted or bred columnsForSoftWrapCheck = 50 wordpairstartenergy = 1000.0 startAttrep = 300.0 # attract/repel measure, based on time since occurrence attrepHit = 200.0 # attrep cost per word pair occurrence postbreedenergy = 1.0 wordscorethreshold = 1500 # basically how many words til occurring again is # a good idea. use something like 1000 or less # if you're using a really small corpus # stuff to tune in the gattrep function too, look there for gain and stuff textodds = 7 # much to mutate, 1 in this, per letter, when breed a line table = {} scores = {} attrep = {} # wow the markov cloud would be kind of ugly def removeFromMarkov((word1,word2)): #return word1 = word1.strip() word2 = word2.strip() # word2 = word2.replace("PERIOD", "\.") # WHAT IS THIS # word2 = word2.replace("Q-MARK", "?") # word2 = word2.replace("E-MARK", "!") # word1 = word1.replace("PERIOD", "\.") # word1 = word1.replace("Q-MARK", "?") # word1 = word1.replace("E-MARK", "!") if word1 == "": word1 = "\n" if word2 == "": word2 = "\n" if table.has_key((word1,word2)): sys.stderr.write("\nkill "+word1+", "+word2+"\n") if showkills == 1: print "\nkill "+word1+" "+word2+"\n" table.pop((word1,word2)) # this is gonna be pretty hard havetoclean = 1 for key in table.keys(): if not table.has_key(key): # print repr(key) # print "THATS IMPOSSIBLE" # sys.exit() # this should never happen but does # so we'll skip when it does wtf continue somedict = table[key] newdict = {} for word in somedict.keys(): if word == word2 and key[1] == word1: # print "trim: ("+key[0]+", "+key[1]+") of -> "+word2 pass else: newdict.update({word:somedict[word]}) # ew if there were no other suggestions than our killed word pair, # we need to provide some random ones # lets do three # if newdict == {}: # for n in range(3): # element = table[random.choice(table.keys())] # word = random.choice(element.keys()) # if newdict.has_key(word): # newdict.update({word:newdict[word]+1}) # christ # else: # newdict.update({word:1}) # table[key] = newdict # no instead we'll step back through the orphan chain and cut if off # where it joins another possibility table[key] = newdict if newdict == {}: rec_marko_kill(key[0], key[1]) #for key in table.keys(): # adict = table[key] # the key is a dictionary # for keeyy in adict.keys(): # keeyy = key[adict] # if keeyy == word1 or keeyy == word2: # # print "bad done" # adict.pop(keeyy) # table[key] = adict # table[key] = k # print key # print table[key] # print "HEY WTF" # sys.exit() def rec_marko_kill(word1,word2,recursions=0): sys.stdout.flush() # print "trim: pop ("+word1+", "+word2+"). no links?" table.pop((word1,word2)) go_on = 1 while go_on == 1: for key in table.keys(): if not table.has_key(key): continue somedict = table[key] newdict = {} for word in somedict.keys(): if word.strip().lower() == word2.strip().lower() and key[1].strip().lower() == word1.lower().strip(): # print "trim: ("+key[0]+", "+key[1]+"), of defunct -> "+word pass else: newdict.update({word:somedict[word]}) if newdict == {}: table[key] = {} # this is redundant but something is wrong right now recursions += 1 sys.stderr.write("trim: recursions: "+str(recursions)+", ") rec_marko_kill(key[0],key[1],recursions) else: table[key] = newdict go_on = 0 def gattrep_word_pairs((word1,word2)): # make one for single words too but # then do i remove all pairs in the # cloud whose keys are pairs not words # or what do i do wow # print "sdf gatt called" global skipwords word1 = word1.strip() word2 = word2.strip() for x in attrep.keys(): attrep[x] += 1 if scores[x] < 0: scores[x] += 0.005 else: scores[x] -= 0.01 if attrep.has_key((word1,word2)): # if hit scores[(word1,word2)] += attrep[(word1,word2)] attrep[(word1,word2)] -= attrepHit # wow a lot if attrep[(word1,word2)] < -10000: attrep[(word1,word2)] = -10000.0 # hmm if scores[(word1,word2)] < -10000: scores[(word1,word2)] = -10000.0 if scores[(word1,word2)] > 10000: scores[(word1,word2)] = 10000.0 else: scores.update({(word1,word2):wordpairstartenergy}) attrep.update({(word1,word2):startAttrep}) # if random.choice(range(1000)): # print attrep # no no def mutatelines(lines): # wow why all lines not line or word, wow global lvowels global uvowels global lconso global uconso global skipwords newarray = [] # hahaha global textodds for line in lines: newline = "" for word in line.split(" "): # this was a loop on the words in the line before ctr = 0 if word.strip() == "": continue # should not happen if skipwords.count(word.strip().lower()) == 0: # so we wont use this either #if (1): # just go for letter in word: # used to say letter in word fuckingWord = list(word) word += " " if random.choice(range(textodds)) == 1: pang = random.choice(range(30)) # in like joe if pang < 13: if letter.lower() == letter and lconso.__contains__(letter): fuckingWord[ctr] = random.choice(lconso) if letter.upper() == letter and uconso.__contains__(letter): fuckingWord[ctr] = random.choice(uconso) if letter.lower() == letter and lvowels.__contains__(letter): fuckingWord[ctr] = random.choice(lvowels) if letter.upper() == letter and uvowels.__contains__(letter): fuckingWord[ctr] = random.choice(uvowels) elif pang < 28: if random.choice(range(2)) == 0: if letter.lower() == letter and lconso.__contains__(letter): fuckingWord[ctr] = (lconso[lconso.index(letter)-1]) if letter.upper() == letter and uconso.__contains__(letter): fuckingWord[ctr] = (uconso[uconso.index(letter)-1]) if letter.lower() == letter and lvowels.__contains__(letter): fuckingWord[ctr] = (lvowels[lvowels.index(letter)-1]) if letter.upper() == letter and uvowels.__contains__(letter): fuckingWord[ctr] = (uvowels[uvowels.index(letter)-1]) else: if letter.lower() == letter and lconso.__contains__(letter): fuckingWord[ctr] = (lconso[(lconso.index(letter)+1)%len(lconso)]) if letter.upper() == letter and uconso.__contains__(letter): fuckingWord[ctr] = (uconso[(uconso.index(letter)+1)%len(uconso)]) if letter.lower() == letter and lvowels.__contains__(letter): fuckingWord[ctr] = (lvowels[(lvowels.index(letter)+1)%len(lvowels)]) if letter.upper() == letter and uvowels.__contains__(letter): fuckingWord[ctr] = (uvowels[(uvowels.index(letter)+1)%len(uvowels)]) else: fuckingWord[ctr] = random.choice(list('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')) ctr += 1 word = "".join(fuckingWord) newline += word+" " newarray.append(newline.strip()) return newarray def dotext(txt, sameline=0): burytoken = "\xfe\xfd" esc = "`" if txt.__contains__(esc): # bury `` txt = txt.replace(esc+esc,burytoken) txt = txt.replace(esc+"1", "\x1b[0;34m") # lo blue txt = txt.replace(esc+"2", "\x1b[0;32m") # lo green txt = txt.replace(esc+"3", "\x1b[0;36m") # lo cyan txt = txt.replace(esc+"4", "\x1b[0;31m") # lo red txt = txt.replace(esc+"5", "\x1b[0;35m") # lo magenta txt = txt.replace(esc+"6", "\x1b[0;33m") # lo yellow txt = txt.replace(esc+"7", "\x1b[0;37m") # lo white txt = txt.replace(esc+"8", "\x1b[1;30m") # hi black txt = txt.replace(esc+"9", "\x1b[1;34m") # hi blue txt = txt.replace(esc+"0", "\x1b[1;32m") # hi green txt = txt.replace(esc+"!", "\x1b[1;36m") # hi cyan txt = txt.replace(esc+"@", "\x1b[1;31m") # hi red txt = txt.replace(esc+"#", "\x1b[1;35m") # hi magenta txt = txt.replace(esc+"$", "\x1b[1;33m") # hi yellow txt = txt.replace(esc+"%", "\x1b[1;37m") # hi white txt = txt.replace(esc+"+", "\x1b[1m") # hi * txt = txt.replace(esc+"-", "\x1b[0m") # lo * # disinter and decode `` txt = txt.replace(burytoken,esc) #return txt if sameline == 0: print txt else: print txt, def dotext_rawmode(txt, sameline=0): burytoken = "\xfe\xfd" esc = "`" if txt.__contains__(esc): # bury `` txt = txt.replace(esc+esc,burytoken) txt = txt.replace(esc+"1", "\x1b[0;34m") # lo blue txt = txt.replace(esc+"2", "\x1b[0;32m") # lo green txt = txt.replace(esc+"3", "\x1b[0;36m") # lo cyan txt = txt.replace(esc+"4", "\x1b[0;31m") # lo red txt = txt.replace(esc+"5", "\x1b[0;35m") # lo magenta txt = txt.replace(esc+"6", "\x1b[0;33m") # lo yellow txt = txt.replace(esc+"7", "\x1b[0;37m") # lo white txt = txt.replace(esc+"8", "\x1b[1;30m") # hi black txt = txt.replace(esc+"9", "\x1b[1;34m") # hi blue txt = txt.replace(esc+"0", "\x1b[1;32m") # hi green txt = txt.replace(esc+"!", "\x1b[1;36m") # hi cyan txt = txt.replace(esc+"@", "\x1b[1;31m") # hi red txt = txt.replace(esc+"#", "\x1b[1;35m") # hi magenta txt = txt.replace(esc+"$", "\x1b[1;33m") # hi yellow txt = txt.replace(esc+"%", "\x1b[1;37m") # hi white txt = txt.replace(esc+"+", "\x1b[1m") # hi * txt = txt.replace(esc+"-", "\x1b[0m") # lo * # disinter and decode `` txt = txt.replace(burytoken,esc) txt = txt.replace("\n","\r\n") #return txt if sameline == 0: print txt+"\r\n", else: print txt, def markov(newlines,outputs=0,donotaddperiods=0,w1="",w2="",sentenceseed="",subjectseed=""): # print "asd" stopword = "\n" # Since we split on whitespace, this can never be a word stopsentence = (".", "!", "?", "\n") # Cause a "new sentence" if found at the end of a word sentencesep = "\n" #String used to seperate sentences # GENERATE TABLE if w1=="": w1 = stopword if w2=="": w2 = stopword #table = {} global table oldlen = len(table) totalwords = 0 #for line in loglines: for line in newlines.split("\n"): if line.strip() == "": continue line = line.strip() line = line.replace("\n", "") #if type(line) == type(1): # #main.say (nick, prepend+"Aborting -- end of log hit") # break # if there's an integer in a loglines[] record then it has not been written to yet, so abort if donotaddperiods == 1: period = "" else: period = "." #print "debug 666 >>> "+line try: line = re.compile(r"\s+").sub(" ", line).split(" ",3)[3]+period except: print "wow, line = "+line # sys.exit() while line.count("..") > 0: line = line.replace("..",".") while line.count("!!") > 0: line = line.replace("!!","!") while line.count("??") > 0: line = line.replace("??","?") while line.count("?!") > 0: line = line.replace("?!","?") while line.count("!?") > 0: line = line.replace("!?","!") # print "cooked line: "+line firstword = 1 for word in line.split(): if (word.endswith(":") or word.endswith("\x02:\x02")) and firstword == 1: firstword = 0 continue firstword = 0 totalwords += 1 if word[-1] in stopsentence: if table.setdefault( (w1, w2), {} ).has_key(word[0:-1]): if table.setdefault( (w1, w2), {} )[word[0:-1]] >= 1000 and word[0:-1] == stopword: # print "markovdebug> extra stopwords trimmed [1]" table.setdefault( (w1, w2), {} )[word[0:-1]] = 1000 pass else: table.setdefault( (w1, w2), {} )[word[0:-1]] += 1 else: table.setdefault( (w1, w2), {} ).update({word[0:-1]:1}) w1, w2 = w2, word[0:-1] word = word[-1] if table.setdefault( (w1, w2), {} ).has_key(word): if table.setdefault( (w1, w2), {} )[word] >= 1000 and word == stopword: # print "markovdebug>extra stopwords trimmed to 1000 [2]" table.setdefault( (w1, w2), {} )[word] = 1000 pass else: table.setdefault( (w1, w2), {} )[word] += 1 else: table.setdefault( (w1, w2), {} ).update({word:1}) w1, w2 = w2, word # Mark the end of the file # should i do this at all <<< ??? if table.setdefault( (w1, w2), {} ).has_key(stopword): if table.setdefault( (w1, w2), {} )[stopword] >= 1000: # we're trimmin these now just to be careful like # print "markovdebug>extra stopwords trimmed to 1000 [3]" table.setdefault( (w1, w2), {} )[stopword] = 1000 pass else: table.setdefault( (w1, w2), {} )[stopword] += 1 else: table.setdefault( (w1, w2), {} ).update({stopword:1}) # dotext("`5[`8markov'd `#"+str(totalwords)+" `8words.`5]",sameline=1) # dotext("`#[`8markov cloud grows from `%"+str(oldlen)+"`8 to `%"+str(len(table))+" `8nodes`5.`#]",sameline=1) print if outputs == 0: return "" # GENERATE SENTENCE OUTPUT maxsentences = 14 # if sentenceseed != "": # dotext("`#[`8seeking 1.seed `5"+sentenceseed+"`#]",sameline=1) # if subjectseed != "": # dotext("`#[`8seeking 2.seed `5"+subjectseed+"`#]",sameline=1) sys.stdout.flush() # new tack w1 = stopword w2 = stopword sentencecount = 0 sentence = [] output = "" seedtries = 2500 # not a good way to do this seedctr = 0 if subjectseed != "" and sentenceseed != "" and table.has_key((sentenceseed,subjectseed)): w1 = sentenceseed w2 = subjectseed elif subjectseed != "" and sentenceseed != "" and table.has_key((subjectseed,sentenceseed)): w2 = sentenceseed w1 = subjectseed elif sentenceseed != "" and table.has_key((stopword,sentenceseed)): w2 = sentenceseed w1 = stopword elif sentenceseed != "" and table.has_key((sentenceseed,stopword)): w1 = sentenceseed w2 = stopword elif subjectseed != "" and table.has_key((subjectseed,stopword)): w1 = subjectseed w2 = stopword elif subjectseed != "" and table.has_key((stopword,subjectseed)): w2 = subjectseed w1 = stopword sentenceseed = "" subjectseed = "" oldsentenceseed = "" while sentencecount < maxsentences: if output.count(" ") > 30: # beetris protection break expansion = [] #print table[(w1, w2)] #print "!!!" #wtf = 0 #while wtf == 0: #try: for mykey in table[(w1, w2)]: for mytemp in range(table[(w1, w2)][mykey]): if mykey=="\n" and mytemp > 10: pass else: expansion.append(mykey) wtf = 1 # except: # print "couldn't get expansion for "+w1+", "+w2 # print "?", # print str(len(expansion)) sys.stdout.flush() try: newword = random.choice(expansion) # i should exhaustively search this table instead except: print "couldn't get expansion for "+w1+", "+w2+" from "+repr(expansion) print "table[('"+repr(w1)+"','"+repr(w2)+"')] = "+repr(table[(w1,w2)]) sys.stdout.flush() os.system("killall -9 markov-attract-repel.py") #print expansion # dont you want to know # ok why are there like 1,000,000 \n's for some. i should cap those. yeah you can have like ten. how about on addition tho if sentenceseed != "" and newword.lower() != sentenceseed.lower() and sentence == [] and seedctr < seedtries: seedctr += 1 continue if seedctr >= seedtries and sentenceseed != "": sentenceseed = "" #subjectseed = "" # dotext("`#[`8seed not found in `5"+str(seedtries)+" `8tries, last candidate word `1'`5"+(newword.replace("\n"," "))+"`1'`#]",sameline=1) # dotext("`#[`8giving up on sentence seed`#]",sameline=1) sentenceseed = "" #return output # bad sentence seeds also get you an empty line [once upon a time] sys.stdout.flush() if newword.lower() == sentenceseed.lower() and sentence == [] and sentenceseed != "": # dotext("`#[`8matched sentence seed `5"+sentenceseed+" `8to word `5"+newword+"`8!`#]") oldsentenceseed = sentenceseed sentenceseed = "" if newword == stopword: return output # some kind of pathological end condition if (newword in stopsentence): add = 0 if subjectseed == "": add = 1 else: #print "!#!#$" # do this next line with a regex instead ok if (" "+(" ".join(sentence).lower())+" ").count(" "+(subjectseed.lower())+" ") > 0: # dotext("`#[`8matched subject seed `5"+subjectseed+" `8to sentence `5"+(" ".join(sentence))+"`8!`#]") #seed = "" add = 1 else: seedctr += 1 #print ".", add = 0 if seedctr >= seedtries: # dotext("`#[`8seed not found in `5"+str(seedtries)+" `8tries, last candidate sentence `1'`5"+(" ".join(sentence))+"`1'`#]",sameline=1) seedctr = 0 sentencecount += 1 # bad subject seeds will get you an empty list if add == 1: output += "%s%s%s" % (" ".join(sentence), newword, sentencesep) sentencecount += 1 sentence = [] seedctr = 0 if oldsentenceseed != "": sentenceseed = oldsentenceseed else: sentence.append(newword) w1, w2 = w2, newword return output # ok body starts FH = sys.stdin l = " " lll = "" # though a = 0 ls = FH.readlines() wow = 0 wtf = 0 longl = "" #print ls[0:20] #sys.exit() ls.append("-----flamootitsoverreaditoutorexit-----") for l in ls: sys.stderr.write(l+"\n") if len(l) >= columnsForSoftWrapCheck and wtf == 0 and l != "-----flamootitsoverreaditoutorexit-----": longl = l.replace("\n","") wtf = 1 continue if wtf == 1 or l == "-----flamootitsoverreaditoutorexit-----": longl = longl + " " + l.replace("\n", "").replace("-----flamootitsoverreaditoutorexit-----","") if len(l) < columnsForSoftWrapCheck or l == "" or l == "\n": l = longl.strip() wtf = 0 else: continue #print "l = "+l; os.system("sleep 1") #if l.__contains__("Anne"): print "l = "+l # if wtf == 2: # l = longl + l # wtf = 0 # print "SDF" l = l.strip() while (l.replace(" ", " ") != l): l = l.replace(" ", " ") l = l.replace(" !","! ") l = l.replace(" .",". ") l = l.replace(" ?","? ") l = l.replace(" :",": ") newdamn = [] last_word = "" for word in l.split(" "): if word == "" or word == "\n": continue if not skipwords.__contains__(word.lower().strip()): # tword = word # tword = tword.replace(".", " PERIOD ") # tword = tword.replace("?", " Q-MARK ") # tword = tword.replace("!", " E-MARK ") # tword = (tword+" x").split(" ")[0] # tlast_word = last_word # tlast_word = tlast_word.replace(".", " PERIOD ") # tlast_word = tlast_word.replace("?", " Q-MARK ") # tlast_word = tlast_word.replace("!", " E-MARK ") # tlast_word = (tlast_word+" x").split(" ")[0] # 0 in 100 words with scores > -3 get hooked randomly into the # markov table: # print "sdf "+repr(scores)[0:76] # print "sdf ("+tlast_word+", "+tword+")" # a = {(tlast_word,tword):111} # print "sdf "+repr(a) # for key in scores.keys(): # a = key[0] # b = key[1] # if a == tword and b == tlast_word: # print "sdf does too much" # if random.choice(range(3)) == 0 and scores.has_key((tlast_word,tword)) \ # and scores[(tlast_word,tword)] > -3: # should work # hook up a word randomly to markov 1% of the time # wowAllWords = [] word = list(word) lctr = 0 if random.choice(range(70)) == 1 and len(word) > 0: # 1 in 70 words get letters transposed sys.stderr.write("*") for letter in word: if random.choice(range(15)) == 1: # 1 in 7 letters tidx = random.choice(range(len(word))) tltr = word[tidx] word[lctr] = tltr word[tidx] = letter # letter swap (might not be pronounceable) lctr += 1 if random.choice(range(80)) == 1: # 1 in 80 get letter inc/dec/randomized sys.stderr.write(">") letctr = 0 for letter in word: p = random.choice(range(10)) # 3 in 10 sort of if p == 0: if letter.lower() == letter and lconso.__contains__(letter): word[letctr] = (lconso[lconso.index(letter)-1]) if letter.upper() == letter and uconso.__contains__(letter): word[letctr] = (uconso[uconso.index(letter)-1]) if letter.lower() == letter and lvowels.__contains__(letter): word[letctr] = (lvowels[lvowels.index(letter)-1]) if letter.upper() == letter and uvowels.__contains__(letter): word[letctr] = (uvowels[uvowels.index(letter)-1]) elif p == 1: if letter.lower() == letter and lconso.__contains__(letter): word[letctr] = (lconso[(lconso.index(letter)+1)%len(lconso)]) if letter.upper() == letter and uconso.__contains__(letter): word[letctr] = (uconso[(uconso.index(letter)+1)%len(uconso)]) if letter.lower() == letter and lvowels.__contains__(letter): word[letctr] = (lvowels[(lvowels.index(letter)+1)%len(lvowels)]) if letter.upper() == letter and uvowels.__contains__(letter): word[letctr] = (uvowels[(uvowels.index(letter)+1)%len(uvowels)]) elif p == 2: word[letctr] = random.choice(list('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')) letctr += 1 word = "".join(word) last_word = word newdamn.append(" "+word) if random.choice(range(100)) == 1 and len(newdamn) > 1: # 1 in 100 lines, transpose 2 words hah sys.stderr.write("<") tidx = random.choice(range(len(newdamn))) tidx2 = random.choice(range(len(newdamn))) tltr = newdamn[tidx] newdamn[tidx] = newdamn[tidx2] newdamn[tidx2] = tltr l = " ".join(newdamn).strip() while (l.replace(" ", " ") != l): l = l.replace(" ", " ") l = l.replace(" !","! ") l = l.replace(" .",". ") l = l.replace(" ?","? ") l = l.replace(" :",": ") if l.strip().strip("\n") == "": continue if l == "" or l == "\n": continue # print "A> "+l lastlll = lll passes = 0 dumbctr = 0 #lastlll = lastlll.replace(".", " . ") #lastlll = lastlll.replace("?", " ? ") #lastlll = lastlll.replace("!", " ! ") bw1 = "" bw2 = "" for word in lastlll.split(" ")+["\n"]: # oops. well if word == "\n" or word == "": continue # print lll # print dumbctr nextword = (lastlll.split(" ")+["\n"])[dumbctr+1] if not (skipwords.__contains__(word.strip()) or skipwords.__contains__(nextword.lower())): gattrep_word_pairs((word,nextword)) if scores.has_key((word,nextword)): # if scores[(word,nextword)] > 15: # print word+", "+nextword+" good score: "+str(scores[(word,nextword)]) # if scores[(word,nextword)] < 5: # print word+", "+nextword+" bad score: "+str(scores[(word,nextword)]) if scores[(word,nextword)] < 0.1 and not safewordpairs.__contains__((word.strip(),nextword.strip())): #print # print "fuck \""+word+","+nextword+"\"\n" # too spammy removeFromMarkov((word,nextword)) # need to write this function # and figure out a good way scores.pop((word,nextword)) attrep.pop((word,nextword)) elif scores[(word,nextword)] > wordscorethreshold and not safewordpairs.__contains__((word.strip(),nextword.strip())): #print "adsf should pass" passes = 2 # this will be bred below by setting this and if bw1 == "": bw1 = word if bw2 == "": bw2 = nextword # sys.stderr.write("breed "+bw1+" "+bw2+" "+str(scores[(bw1,bw2)])) scores[(word,nextword)] = postbreedenergy attrep[(word,nextword)] = startAttrep # ok we just breed the word not how the whole line # oh maybe i'll switch to that but this way it is next to other words pass else: passes = 1 dumbctr += 1 # print "adsf fsd "+l if l != "": fuckyou = 0 #print list(l) #sys.exit() while fuckyou == 0: # try: # print "B> "+l # os.system("sleep 1") lll= markov("a a a "+l,outputs=1,w1="",w2="",subjectseed=random.choice(l.split(" ")),sentenceseed=random.choice(l.split(" "))) fuckyou = 1 # except: # print "why wont it say l: "+l # l += " " # l = " "+l # if len(l) > 1000: l += "!"; print "}}}}}"+l; #sys.exit() # if len(l) > 10000: # print "line dropped" # break # protection against this loop just hanging. the markov chainer # threw some next wobbly i can't track where on CERTAIN strings # it crashes but if i just pad them with spaces it doesnt so # WTF I DONT KNOW. lll = lll.replace(" !","! ") lll = lll.replace(" .",". ") lll = lll.replace(" ?","? ") lll = lll.replace(" :",": ") lll = lll.replace("-\\","-") # ace-high text reader for win bug print " "+lll # sys.stderr.write(" "+lll+"\n") #if l != "" and passes > 0: # wow nothing happens here anymore i sort of screwed up # we're changing the markov clouds training so i'm not # going to just censor lines with bad scoring words no mo # pass if passes == 2: # sys.stderr.write("sdf") # i will reward bred words by mutating their whole line they might not # get a mutant copy in the table but they will appear next to other words # well I will mix up the order of words in the sentence too ttttt = lll.split(" ") random.shuffle(ttttt) lllll = " ".join(ttttt) mut=(mutatelines([lllll])[0]).replace("\n"," ").strip() # print "breed> "+bw1+" "+bw2 # bred waskilled = 0 try: sys.stderr.write("\nbreed "+bw1+" "+bw2+" "+str(scores[(bw1,bw2)])) except: waskilled = 1 # should basically never happen have to kill and breed a # pair in the same line. you'll still get a mutant copy # of the line but the pair that was killed won't get re- # wired to the markov table randomly, here, below. maybe # you can do that part anyway, still, when this happens. # i'll try to find out if waskilled == 0: if showbreeds == 1: print "\nbreed "+bw1+" "+bw2+"\n" for a in range(random.choice(range(3))+2): # this is the old unfair breed i did to random pairs now # i only do it to pairs that hit the score threshold # basically it wires them into the markov cloud a couple # random ways, since breeding only used to mutate the # line they were in and learn that if len(table.keys()) > 3: # for key in table.keys(): # if len(key[0]) > 1: # wowAllWords.append(key[0]) #print "test" tuple1 = random.choice(table.keys()) tuple2 = random.choice(table.keys()) tuple3 = random.choice(table.keys()) tuple4 = random.choice(table.keys()) # wowAllWords = list(set(wowAllWords)) gangidx = random.choice(range(25)) if gangidx < 13: gang = " " elif gangidx < 17: gang = "? " elif gangidx < 23: gang = "! " else: gang = ", " #print tuple1 #print tuple2 #print tuple3 #print tuple4 fakeString = tuple1[0]+" "+tuple1[1]+" "+tuple2[0]+" "+tuple2[1]+" "+bw1+" "+bw2+gang+tuple3[0]+" "+tuple3[1]+" "+tuple4[0]+" "+tuple4[1] fakeString = fakeString.replace("\n", " ") #print fakeString #sys.exit() if fakeString.strip().strip("\n") != "": bam = markov("x x x "+fakeString,outputs=1,w1="",w2="",subjectseed=tuple1[0],sentenceseed=tuple4[0]) sys.stderr.write(".") #print "test bam "+fakeString bam = bam.replace(" !","! ") bam = bam.replace(" .",". ") bam = bam.replace(" ?","? ") bam = bam.replace(" :",": ") bam = bam.replace("-\\","-") # ace-high text reader for win bug # sys.stderr.write(" "+bam+"\n") print " "+bam sys.stderr.write("\n") # print scores[(bw1,bw2)] bw1 = "" bw2 = "" fuckyou = 0 while fuckyou == 0 and mut.strip().strip("\n") != "": # try: pen = markov("a a a "+mut,outputs=1,w1="",w2="",subjectseed=random.choice(lllll.split(" ")),sentenceseed=random.choice(lllll.split(" "))) fuckyou = 1 # except: # print "why wont it say mut: "+mut # mut += " " # mut = " "+mut # if len(mut) > 1000: mut += "!"; print ">>>>"+mut; #sys.exit() # if len(mut) > 10000: # print "line dropped" # break # protection against this loop just hanging. the markov chainer # threw some next wobbly i can't track where on CERTAIN strings # it crashes but if i just pad them with spaces it doesnt so # WTF I DONT KNOW. #print "MUTANT ITS OUTPUT: "+pen pen = pen.replace(" !","! ") pen = pen.replace(" .",". ") pen = pen.replace(" ?","? ") pen = pen.replace(" :",": ") pen = pen.replace("-\\","-") # ace-high text reader for win bug print " "+pen # sys.stderr.write(" "+pen+"\n") #print "WHY THE FUCK IS IT QUITTING\n" #print table.keys() #print table[('user:/:/bin/false', '.')]