
If you’re a fan of Shakespeare’s works, you’ve probably marveled at the elegance of his language and his verse. Shakespeare’s works have captivated audiences for centuries, and remain deeply influential in modern English literature. So what if you could generate your own Shakespearean-style text using artificial intelligence?
In this post, I’ll show you how to build an AI model in Python that can generate text and stories in a Shakespearean style.
Step 1: Install dependencies
First we need to install our dependencies:
pip install tensorflow
pip install tensorflow_datasets
pip install numpy
Step 2: Create a Python file and import dependencies
Create a python file called ‘model_train.py’ then we import our dependencies:
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds
Step 3: Tokenize the data
We’ll use the Tiny Shakespeare dataset from TensorFlow Datasets. Since the dataset is relatively small, we will tokenize by character (Ex: ‘T’) rather than by word. This means that uppercase and lowercase letters will be treated as distinct characters, and we’ll only number each unique character we encounter.
# Load Tiny Shakespeare dataset (fix: remove `as_supervised=True`)
dataset_name = "tiny_shakespeare"
data, info = tfds.load(dataset_name, with_info=True)
# Extract text data
text = ""
for example in data['train']: # No (input, label) pairs, just a single text feature
text += example['text'].numpy().decode('utf-8') + "\n"
# Unique characters in the dataset
vocab = sorted(set(text))
char2idx = {char: idx for idx, char in enumerate(vocab)}
idx2char = np.array(vocab)
# Convert text to numerical representation
text_as_int = np.array([char2idx[c] for c in text])
# Print dataset stats
print(f"Total characters: {len(text)}")
print(f"Unique characters: {len(vocab)}")
Step 4: Prepare the data for training
We need to structure the dataset into sequences of fixed length (100 characters per sequence) and map them into input-target pairs. Then we define the layers of the model.
# Sequence length for training
seq_length = 100 # Number of characters per training sample
batch_size = 64
buffer_size = 10000
# Create input-output sequences
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)
# Function to split sequences into input (X) and target (y)
def split_input_target(seq):
input_text = seq[:-1] # All but last character
target_text = seq[1:] # All but first character
return input_text, target_text
dataset = sequences.map(split_input_target)
dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)
Step 5: Define and train the model
We will define the model layers and an embedding layer followed by an LSTM (Long Short-Term Memory) network to predict the next character in a sequence.
# Define model parameters
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024
# Build the model
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=None),
tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
tf.keras.layers.Dense(vocab_size)
])
# Compile the model
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))
# Training epochs
epochs = 20
# Train the model
history = model.fit(dataset, epochs=epochs)
# Save the trained model
model.save("shakespeare_model2.keras")
Step 6: Generate Shakespearean text
Now, let’s create a second script to load the trained model and generate text and call the file ‘model_load.py’ then run it.
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds
# Load Tiny Shakespeare dataset (fix: remove `as_supervised=True`)
dataset_name = "tiny_shakespeare"
data, info = tfds.load(dataset_name, with_info=True)
# Extract text data
text = ""
for example in data['train']: # No (input, label) pairs, just a single text feature
text += example['text'].numpy().decode('utf-8') + "\n"
# Unique characters in the dataset
vocab = sorted(set(text))
char2idx = {char: idx for idx, char in enumerate(vocab)}
idx2char = np.array(vocab)
# Convert text to numerical representation
text_as_int = np.array([char2idx[c] for c in text])
# Print dataset stats
print(f"Total characters: {len(text)}")
print(f"Unique characters: {len(vocab)}")
model = tf.keras.models.load_model("shakespeare_model.keras")
def generate_text(model, start_string, num_generate=500):
# Create an input with batch size 64
input_ids = [char2idx[s] for s in start_string]
# We need to expand dims and then tile to create a batch of 64 identical sequences
input_eval = tf.expand_dims(input_ids, 0) # shape: (1, len(start_string))
input_eval = tf.tile(input_eval, [64, 1]) # shape: (64, len(start_string))
text_generated = []
# Reset the LSTM states by calling reset_states on the stateful layer directly
model.layers[1].reset_states()
for in range(numgenerate):
predictions = model(input_eval) # shape: (64, seq_len, vocab_size)
# Select predictions for the first sample in the batch and only the last time step
predictions = predictions[0, -1, :] # shape: (vocab_size,)
predicted_id = tf.random.categorical(tf.expand_dims(predictions, 0), num_samples=1)[0, 0].numpy()
# Prepare input for next step: tile predicted character to match batch size 64
input_eval = tf.tile(tf.expand_dims([predicted_id], 0), [64, 1])
text_generated.append(idx2char[predicted_id])
return start_string + ''.join(text_generated)
print(generate_text(model, start_string="T", num_generate=500))
Comments