用Tensorflow模仿许巍写歌词

Tensorflow生成歌词

其实就是抄的这篇文章

char-rnn是基于字符预测下一个字符,比如说已知hello的前四个字母hell,那我们就可以据此预测下一个字符很可能是o,因为是字符char级别的,并没有单词或句子层次上的特征提取,相对而言比较简单易学。

使用方法及数据都在Github了:CharRNN-LRC

Requirements

  • Tensorflow 1.8.0
  • numpy
  • Python 3.x

原理

拿中文举例来说,每个字与每个字并不是统计上独立的,比如说如果不爱就不要再伤害 长度为10的序列,如果我们知道如,下一个字有可能是果,如果知道前两个字如果,第三个字就是不的可能性大些,依次类推,如果知道前9个字如果不爱就不要再伤,那么最后一个就有可能是害字。用图直观的表示如下。

RNN
总的来说,这是一个seq2seq的模型,训练数据用的是许巍60多首歌的歌词,训练数据和代码稍后会放到github上。

代码中预热LSTM模型用的语句是永生永世的爱恋,当然也可以换成其他的,不过需要是txt中包含的文字~

代码

抄一遍的原因是Tensorflow已经更新到1.8了,而原版的使用的是1.2,目前又有api更换了,所以小改一下就能用了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import os
import sys
import time

import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
from tensorflow.contrib import legacy_seq2seq as seq2seq


class HParam():

batch_size = 32
n_epoch = 100
learning_rate = 0.01
decay_steps = 1000
decay_rate = 0.9
grad_clip = 5

state_size = 100
num_layers = 3
seq_length = 20
log_dir = './logs'
metadata = 'metadata.tsv'
gen_num = 500 # how many chars to generate


class DataGenerator():

def __init__(self, datafiles, args):
self.seq_length = args.seq_length
self.batch_size = args.batch_size
with open(datafiles, encoding='utf-8') as f:
self.data = f.read()

self.total_len = len(self.data) # total data length
self.words = list(set(self.data))
self.words.sort()
# vocabulary
self.vocab_size = len(self.words) # vocabulary size
print('Vocabulary Size: ', self.vocab_size)
self.char2id_dict = {w: i for i, w in enumerate(self.words)}
self.id2char_dict = {i: w for i, w in enumerate(self.words)}

# pointer position to generate current batch
self._pointer = 0

# save metadata file
self.save_metadata(args.metadata)

def char2id(self, c):
return self.char2id_dict[c]

def id2char(self, id):
return self.id2char_dict[id]

def save_metadata(self, file):
with open(file, 'w') as f:
f.write('id\tchar\n')
for i in range(self.vocab_size):
c = self.id2char(i)
f.write('{}\t{}\n'.format(i, c))

def next_batch(self):
x_batches = []
y_batches = []
for i in range(self.batch_size):
if self._pointer + self.seq_length + 1 >= self.total_len:
self._pointer = 0
bx = self.data[self._pointer: self._pointer + self.seq_length]
by = self.data[self._pointer +
1: self._pointer + self.seq_length + 1]
self._pointer += self.seq_length # update pointer position

# convert to ids
bx = [self.char2id(c) for c in bx]
by = [self.char2id(c) for c in by]
x_batches.append(bx)
y_batches.append(by)

return x_batches, y_batches


class Model():
"""
The core recurrent neural network model.
"""

def __init__(self, args, data, infer=False):
if infer:
args.batch_size = 1
args.seq_length = 1
with tf.name_scope('inputs'):
self.input_data = tf.placeholder(
tf.int32, [args.batch_size, args.seq_length])
self.target_data = tf.placeholder(
tf.int32, [args.batch_size, args.seq_length])

with tf.name_scope('model'):
self.cell = tf.contrib.rnn.BasicLSTMCell(args.state_size)
self.cell = tf.contrib.rnn.MultiRNNCell([self.cell] * args.num_layers)
self.initial_state = self.cell.zero_state(
args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
w = tf.get_variable(
'softmax_w', [args.state_size, data.vocab_size])
b = tf.get_variable('softmax_b', [data.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable(
'embedding', [data.vocab_size, args.state_size])
inputs = tf.nn.embedding_lookup(embedding, self.input_data)
outputs, last_state = tf.nn.dynamic_rnn(
self.cell, inputs, initial_state=self.initial_state)

with tf.name_scope('loss'):
output = tf.reshape(outputs, [-1, args.state_size])

self.logits = tf.matmul(output, w) + b
self.probs = tf.nn.softmax(self.logits)
self.last_state = last_state

targets = tf.reshape(self.target_data, [-1])
loss = seq2seq.sequence_loss_by_example([self.logits],
[targets],
[tf.ones_like(targets, dtype=tf.float32)])
self.cost = tf.reduce_sum(loss) / args.batch_size
tf.summary.scalar('loss', self.cost)

with tf.name_scope('optimize'):
self.lr = tf.placeholder(tf.float32, [])
tf.summary.scalar('learning_rate', self.lr)

optimizer = tf.train.AdamOptimizer(self.lr)
tvars = tf.trainable_variables()
grads = tf.gradients(self.cost, tvars)
for g in grads:
tf.summary.histogram(g.name, g)
grads, _ = tf.clip_by_global_norm(grads, args.grad_clip)

self.train_op = optimizer.apply_gradients(zip(grads, tvars))
self.merged_op = tf.summary.merge_all()


def train(data, model, args):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
writer = tf.summary.FileWriter(args.log_dir, sess.graph)

# Add embedding tensorboard visualization. Need tensorflow version
# >= 0.12.0RC0
config = projector.ProjectorConfig()
embed = config.embeddings.add()
embed.tensor_name = 'rnnlm/embedding:0'
embed.metadata_path = args.metadata
projector.visualize_embeddings(writer, config)

max_iter = args.n_epoch * \
(data.total_len // args.seq_length) // args.batch_size
for i in range(max_iter):
learning_rate = args.learning_rate * \
(args.decay_rate ** (i // args.decay_steps))
x_batch, y_batch = data.next_batch()
feed_dict = {model.input_data: x_batch,
model.target_data: y_batch, model.lr: learning_rate}
train_loss, summary, _, _ = sess.run([model.cost, model.merged_op, model.last_state, model.train_op],
feed_dict)

if i % 10 == 0:
writer.add_summary(summary, global_step=i)
print('Step:{}/{}, training_loss:{:4f}'.format(i,
max_iter, train_loss))
if i % 2000 == 0 or (i + 1) == max_iter:
saver.save(sess, os.path.join(
args.log_dir, 'lyrics_model.ckpt'), global_step=i)


def sample(data, model, args):
saver = tf.train.Saver()
with tf.Session() as sess:
ckpt = tf.train.latest_checkpoint(args.log_dir)
print(ckpt)
saver.restore(sess, ckpt)

# initial phrase to warm RNN
prime = u'永生永世的爱恋'
state = sess.run(model.cell.zero_state(1, tf.float32))

for word in prime[:-1]:
x = np.zeros((1, 1))
x[0, 0] = data.char2id(word)
feed = {model.input_data: x, model.initial_state: state}
state = sess.run(model.last_state, feed)

word = prime[-1]
lyrics = prime
for i in range(args.gen_num):
x = np.zeros([1, 1])
x[0, 0] = data.char2id(word)
feed_dict = {model.input_data: x, model.initial_state: state}
probs, state = sess.run([model.probs, model.last_state], feed_dict)
p = probs[0]
word = data.id2char(np.argmax(p))
print(word, end='')
sys.stdout.flush()
time.sleep(0.05)
lyrics += word
return lyrics


def main(infer):

args = HParam()
data = DataGenerator('xuwei.txt', args)
model = Model(args, data, infer=infer)

run_fn = sample if infer else train

run_fn(data, model, args)


if __name__ == '__main__':
msg = """
Usage:
Training:
python3 gen.py 0
Sampling:
python3 gen.py 1
"""
if len(sys.argv) == 2:
infer = int(sys.argv[-1])
print('--Sampling--' if infer else '--Training--')
main(infer)
else:
print(msg)
sys.exit(1)

结果

总是那么遥远沉重
不变的轮回之中
生命变得虚无飘渺
曾经遥远漫长的幻想
为何它始终不能实
那一种力量
依然在我心中流淌
在我怀里是温暖的家
就让我的心里永远一次被唤醒
我不知道转变的天风里
你爱蓝色夜晚漫天的星光
是否爱将这世界上一定在这短暂的一切
是因为这一切的地方
我的心却那么遥远沉重

不变的轮回之中
生命变得虚无飘渺
曾经遥远漫长的幻想
为何它始终不能实
那一种力量
依然在我心中流淌
在我怀里是温暖的家
就让我的心里永远一次被唤醒
我不知道转变的天风里
你爱蓝色夜晚漫天的星光
是否爱将这世界上一定在这短暂的一切
是因为这一切的地方
我的心却那么遥远沉重

前几句是《永恒》的原歌词,因为数据集太小,就变成背诵了~后面来看效果还不错(加粗的算是原创了)

许巍的歌词大多比较简短,相比杰伦的歌词风格更加一致,所以虽然文本量少很多,但生成的效果更好一些。

如果用汪峰的歌词应该也不错,不过这些人的歌词训练之后容易出现的问题就是常常会无限循环某几句话,大概是训练文本量还是不够吧。