Text::Travesty (Chapter 17) The Text::Travesty module implements "travesty," a Markov chain algorithm that analyzes a text document and generates a new document that preserves all the word-pair (tuple) frequencies of the original. The result is an incomprehensible document that has an eerie similarity to the writing style of the original. 0 package Text::Travesty; 1 use strict; 2 use Text::Wrap qw(fill); 3 use IO::File; 4 sub new { 5 my $pack = shift; 6 return bless { 7 words => [], 8 lookup => {}, 9 num => {}, 10 a => ' ', p=> ' ', n=>' ', 11 },$pack; 12 } 13 sub add { 14 my $self = shift; 15 my $string = shift; 16 my ($words,$lookup,$num,$a,$p,$n) = 17 @{$self}{qw(words lookup num a p n)}; 18 for my $w (split /\s+/,$string) { 19 ($a,$p) = ($p,$n); 20 unless (defined($n = $num->{$w})) { 21 push @{$words},$w; 22 $n = pack 'S',$#$words; 23 $num->{$w} = $n; 24 } 25 $lookup->{"$a$p"} .= $n; 26 } 27 @{$self}{'a','p','n'} = ($a,$p,$n); 28 } 29 sub analyze_file { 30 my $self = shift; 31 my $file = shift; 32 unless (defined (fileno $file)) { 33 $file = IO::File->new($file) croak("Couldn't open $file: $!\n"); 34 } 35 $self->add($_) while defined ($_ = <$file>); 36 } 37 sub generate { 38 my $self = shift; 39 my $word_count = shift 1000; 40 my ($words,$lookup,$a,$p) = @{$self}{qw(words lookup a p)}; 41 my ($n,$foo,$result); 42 while ($word_count--) { 43 $n = $lookup->{"$a$p"}; 44 ($foo,$n) = each(%$lookup) if $n eq ' '; 45 $n = substr($n,int(rand(length($n))) & 0177776,2); 46 ($a,$p) = ($p,$n); 47 my $w = unpack('S',$n); 48 $w = $words->[$w]; 49 $result .= $w; 50 $result .= $w =~ /\.$/ && rand() < .1 ? "\n\n" : ' '; 51 } 52 @{$self}{qw(a p)} = ($a,$p); 53 return $result; 54 } 55 sub words { 56 return @{shift->{words}}; 57 } 58 sub pretty_text { 59 my $self = shift; 60 my $text = $self->generate(@_); 61 return fill("\t",' ',$text) . "\n"; 62 } 63 sub reset { 64 my $self= shift; 65 @{$self}{qw(lookup num)} = ({},{}); 66 $self->{words} = []; 67 delete $self->{a}; 68 delete $self->{p}; 69 } 70 1; 71 =head1 NAME 72 Text::Travesty - Turn text into a travesty 73 =head1 SYNOPSIS 74 use Text::Travesty 75 my $travesty = Text::Travesty->new; 76 $travesty->analyze_file('for_whom_the_bell_tolls.txt'); 77 print $travesty->generate(1000); 78 =head1 DESCRIPTION 79 This module uses the travesty algorithm to construct a Markov chain of 80 human-readable text and spew out stylistically similar (but entirely 81 meaningless) text that has the same word-frequency characteristics. 82 =head1 CONSTRUCTOR 83 =over 4 84 =item $travesty = Text::Travesty->new 85 The new() method constructs a new Text::Travesty object with empty 86 frequency tables. You will typically call add() or analyze_file() one 87 or more times to add text to the frequency tables. 88 =back 89 =head1 OBJECT METHODS 90 =over 4 91 =item $travesty->add($text); 92 This method splits the provided text into words and adds them to the 93 internal frequency tables. You will typically call add() multiple 94 times during the analysis of a longer text. 95 The definition of "words" is a bit unusual, because it includes 96 punctuation and other characters (not whitespace). The 97 pseudopunctuation makes the generated travesties more fun. 98 =item $travesty->analyze_file($file) 99 This method adds the entire contents of the indicated file to the 100 frequency tables. C<$file> may be an opened filehandle, in which case 101 analyze_file() reads its contents through to EOF, or a file path, in 102 which case the method opens it for reading. 103 =item $text = $travesty->generate([$count]) 104 The generate() method spews back a travesty of the input text 105 based on a Markov model built from the word-frequency tables. 106 C<$count>, if provided, gives the length of the text to generate in 107 words. If not provided, the count defaults to 1000. 108 =item $text = $travesty->pretty_text([$count]) 109 This method is similar to generate() except that the returned text is 110 formatted into wrapped paragraphs. 111 =item @words = $travesty->words 112 This method returns a list of all the unique words in the frequency 113 tables. Punctuation and capitalization count for uniqueness. 114 =item $travesty->reset 115 Reset the travesty object, clearing out its frequency tables and 116 readying it to accept a new text to analyze. 117 =back 118 =head1 SEE ALSO 119 L<Text::Wrap>, L<IO::File>, L<perl> 120 =head1 AUTHOR 121 Lincoln Stein <lstein@cshl.org> 122 =head1 COPYRIGHT 123 Copyright (c) 2000 Lincoln Stein. All rights reserved. This program is 124 free software; you can redistribute it and/or modify it under the same 125 terms as Perl itself. 126 =cut |