로봇이 당시를 쓰고 모형 훈련을 하다
7762 단어 딥 러닝
coding=utf-8
"author:lei function: 로봇이 당시를 쓴다"
import tensorflow as tf import numpy as np import collections import re
데이터 추출
def extract_data(file_path): poems = [] with open(file_path, "r", encoding="utf8") as f: # for line in f.readlines (): # 시사 파일을 읽고 기호를 사용하여 목록 content_list = f.read().strip().split("◇")#print(content_list)#시마다 for content in content_list: #줄 바꿈 문자를 지우고tab를 콘텐츠 = 콘텐츠로 삭제합니다.replace("", “”).replace ("\u3000", ") # 구분자를 사용하여 나머지 모든 시를 분할합니다. 볼륨 content = content가 없습니다.split ("]") if len (content) = = 2: # 시 한 편의 내용을 가져옴values = content[1] # 있는 시를 if "("in values or "?"in values or "볼륨"in values: continue # print (values) # 목록을 사용하여 모든 시의 내용을 저장합니다 values = "G"+ values + "F"poems.append(values) poems = [poem for poem in poems if len(poem) < 200] return poems
def word_to_vec(poems): pading_list = [""] counter = collections.Counter("".join(poems)) counter_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) words, _ = list(zip(*counter_pairs)) pading_list.extend(words) word_to_id = dict(zip(pading_list, range(len(pading_list)))) id_to_word = dict(zip(range(len(pading_list)), pading_list)) return word_to_id, id_to_word
def padding_data(data): # 피쳐 값 및 labels에 150line_ 채우기list = [] for line in data: if len(line) > 150: line_list.append(np.array(line[:150])) else: line_list.append(np.hstack((np.array(line), np.zeros(150, dtype=np.int)))[:150]) return line_list
def data_main(file_path): poems = extract_data(file_path) word_to_id, id_to_word = word_to_vec(poems) # print(len(poems)) # 39094
# print(word_to_id) # 6701 + 1
poem_list = []
for poem in poems:
temp = []
for word in poem:
temp.append(word_to_id[word])
poem_list.append(temp)
# print(len(poems)) # 39094
features = [i[:-1] for i in poem_list]
labels = [i[1:] for i in poem_list]
features = np.array(padding_data(features))
labels = np.array(padding_data(labels))
return features, labels, word_to_id, id_to_word
모델 구축
class PoemRobot(object): def init(self, file_path, save_path, training): features, labels, word_to_id, id_to_word = data_main(file_path) self.batch_size = 10 self.n_steps = 150 self.n_layers = 3 self.word_size = 6702 self.num_neutrals = 200# 단어 벡터의 차원은 200self이다.learning_rate = 0.01 self.training = training self.keep_prob = 0.5 self.poem_nums = 39094 self.epoch_size = self.poem_nums//self.batch_size - 1 # 3900 self.frequences = 10 tf.reset_default_graph()
with tf.compat.v1.name_scope("data"):
x = tf.compat.v1.placeholder(tf.int32, [self.batch_size, self.n_steps])
y_true = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.n_steps, self.word_size])
with tf.compat.v1.name_scope("embedding"):
embedding = tf.compat.v1.Variable(tf.compat.v1.random_uniform([self.word_size, self.num_neutrals], -1.0, 1.0))
inputs = tf.compat.v1.nn.embedding_lookup(embedding, x)
if self.training is True:
inputs = tf.compat.v1.nn.dropout(inputs, keep_prob=self.keep_prob)
# lstm
with tf.compat.v1.variable_scope("lstm_model"):
cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(num_units=self.num_neutrals)
if self.training is True:
tf.compat.v1.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
multi_cells = tf.compat.v1.nn.rnn_cell.MultiRNNCell([cell for i in range(self.n_layers)])
initial_state = multi_cells.zero_state(self.batch_size, dtype=tf.float32)
outputs, states = tf.compat.v1.nn.dynamic_rnn(multi_cells, inputs, initial_state=initial_state, dtype=tf.compat.v1.float32)
outputs = tf.reshape(outputs, [-1, self.num_neutrals])
# print(outputs) # Tensor("lstm_model/Reshape:0", shape=(9600, 200), dtype=float32)
#
with tf.compat.v1.variable_scope("correct"):
weight = tf.compat.v1.Variable(tf.truncated_normal([self.num_neutrals, self.word_size], stddev=0.1))
bias = tf.compat.v1.Variable(tf.truncated_normal([self.word_size], stddev=0.1))
logits = tf.compat.v1.nn.xw_plus_b(outputs, weight, bias)
# print(logits) # Tensor("correct/xw_plus_b:0", shape=(9600, 6702), dtype=float32)
#
with tf.compat.v1.name_scope("loss"):
y_true_2 = tf.reshape(y_true, [-1, self.word_size])
# print(y_true) # Tensor("loss/Reshape:0", shape=(9600, 6702), dtype=float32)
y_predict = tf.argmax(tf.argmax(logits, 1))
loss = tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits(labels=y_true_2, logits=logits))
with tf.compat.v1.name_scope("train_step"):
train_op = tf.compat.v1.train.AdamOptimizer(self.learning_rate).minimize(loss)
with tf.compat.v1.name_scope("acc"):
equal = tf.equal(tf.argmax(y_true_2, 1), tf.argmax(logits, 1))
accuracy = tf.reduce_mean(tf.cast(equal, tf.float32))
init_op = tf.global_variables_initializer()
saver = tf.compat.v1.train.Saver()
if self.training is True:
with tf.compat.v1.Session() as sess:
sess.run(init_op)
new_acc = 0
t = 0
for frequence in range(self.frequences):
for epoch in range(self.epoch_size):
t += 1
x_batch = features[epoch * self.batch_size: (epoch+1) * self.batch_size]
y_batch = labels[epoch * self.batch_size: (epoch+1) * self.batch_size]
y_batch = tf.one_hot(y_batch, depth=self.word_size, axis=2)
sess.run(train_op, feed_dict={x: x_batch, y_true: y_batch.eval()})
if epoch % 50 == 0:
acc_val = sess.run(accuracy, feed_dict={x: x_batch, y_true: y_batch.eval()})
print("frequence:{}, epoch: {}, acc: {}".format(frequence, epoch, acc_val))
if acc_val > new_acc and frequence > 7:
new_acc = acc_val
saver.save(sess, save_path, global_step=t)
print(" , !")
else:
saver.save(sess, save_path, global_step=t)
print(" !")
else:
# data = input(" :")
data = " "
data_num = 0
if data in word_to_id.keys():
data_num = word_to_id[data]
else:
print(" , !")
exit(0)
init_op = tf.compat.v1.global_variables_initializer()
input_data = tf.placeholder(tf.int32, [1, None])
with tf.compat.v1.Session() as sess:
sess.run(init_op)
saver.restore(sess, save_path)
x = np.array([list(map(word_to_id.get, "G"))])
pridict = sess.run(y_predict, feed_dict={input_data: x})
if data:
word = data
else:
word = id_to_word[pridict]
poem = ""
while word != "F":
poem += word
x = np.zeros(1, 1)
x[0, 0] = word_to_id[word]
predict = sess.run(y_predict, feed_dict={input_data: x})
word = id_to_word[predict]
print(" :", poem)
if name == ‘main’: file_path = “/home/aistudio/poem/data/tang_all.txt” save_path = “/home/aistudio/poem/model/” # file_path = “./data/tang_all.txt” poem = PoemRobot(file_path, save_path, False)
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
keras 기반 EfficientNet 재현[2 트레이닝 모듈]
B3는 일반적인 서버가 달리기에 적합할 것 같아요.batchsize는 너무 크게 설정하지 마세요.
메모리를 초과하기 쉬워요.이것은 앞의 AlexNet 데이터 집합을 사용하고 다음에 데이터 집합 주소를 넣으세요.
트림...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.
def extract_data(file_path): poems = [] with open(file_path, "r", encoding="utf8") as f: # for line in f.readlines (): # 시사 파일을 읽고 기호를 사용하여 목록 content_list = f.read().strip().split("◇")#print(content_list)#시마다 for content in content_list: #줄 바꿈 문자를 지우고tab를 콘텐츠 = 콘텐츠로 삭제합니다.replace("", “”).replace ("\u3000", ") # 구분자를 사용하여 나머지 모든 시를 분할합니다. 볼륨 content = content가 없습니다.split ("]") if len (content) = = 2: # 시 한 편의 내용을 가져옴values = content[1] # 있는 시를 if "("in values or "?"in values or "볼륨"in values: continue # print (values) # 목록을 사용하여 모든 시의 내용을 저장합니다 values = "G"+ values + "F"poems.append(values) poems = [poem for poem in poems if len(poem) < 200] return poems
def word_to_vec(poems): pading_list = [""] counter = collections.Counter("".join(poems)) counter_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) words, _ = list(zip(*counter_pairs)) pading_list.extend(words) word_to_id = dict(zip(pading_list, range(len(pading_list)))) id_to_word = dict(zip(range(len(pading_list)), pading_list)) return word_to_id, id_to_word
def padding_data(data): # 피쳐 값 및 labels에 150line_ 채우기list = [] for line in data: if len(line) > 150: line_list.append(np.array(line[:150])) else: line_list.append(np.hstack((np.array(line), np.zeros(150, dtype=np.int)))[:150]) return line_list
def data_main(file_path): poems = extract_data(file_path) word_to_id, id_to_word = word_to_vec(poems)
# print(len(poems)) # 39094
# print(word_to_id) # 6701 + 1
poem_list = []
for poem in poems:
temp = []
for word in poem:
temp.append(word_to_id[word])
poem_list.append(temp)
# print(len(poems)) # 39094
features = [i[:-1] for i in poem_list]
labels = [i[1:] for i in poem_list]
features = np.array(padding_data(features))
labels = np.array(padding_data(labels))
return features, labels, word_to_id, id_to_word
모델 구축
class PoemRobot(object): def init(self, file_path, save_path, training): features, labels, word_to_id, id_to_word = data_main(file_path) self.batch_size = 10 self.n_steps = 150 self.n_layers = 3 self.word_size = 6702 self.num_neutrals = 200# 단어 벡터의 차원은 200self이다.learning_rate = 0.01 self.training = training self.keep_prob = 0.5 self.poem_nums = 39094 self.epoch_size = self.poem_nums//self.batch_size - 1 # 3900 self.frequences = 10 tf.reset_default_graph()
with tf.compat.v1.name_scope("data"):
x = tf.compat.v1.placeholder(tf.int32, [self.batch_size, self.n_steps])
y_true = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.n_steps, self.word_size])
with tf.compat.v1.name_scope("embedding"):
embedding = tf.compat.v1.Variable(tf.compat.v1.random_uniform([self.word_size, self.num_neutrals], -1.0, 1.0))
inputs = tf.compat.v1.nn.embedding_lookup(embedding, x)
if self.training is True:
inputs = tf.compat.v1.nn.dropout(inputs, keep_prob=self.keep_prob)
# lstm
with tf.compat.v1.variable_scope("lstm_model"):
cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(num_units=self.num_neutrals)
if self.training is True:
tf.compat.v1.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
multi_cells = tf.compat.v1.nn.rnn_cell.MultiRNNCell([cell for i in range(self.n_layers)])
initial_state = multi_cells.zero_state(self.batch_size, dtype=tf.float32)
outputs, states = tf.compat.v1.nn.dynamic_rnn(multi_cells, inputs, initial_state=initial_state, dtype=tf.compat.v1.float32)
outputs = tf.reshape(outputs, [-1, self.num_neutrals])
# print(outputs) # Tensor("lstm_model/Reshape:0", shape=(9600, 200), dtype=float32)
#
with tf.compat.v1.variable_scope("correct"):
weight = tf.compat.v1.Variable(tf.truncated_normal([self.num_neutrals, self.word_size], stddev=0.1))
bias = tf.compat.v1.Variable(tf.truncated_normal([self.word_size], stddev=0.1))
logits = tf.compat.v1.nn.xw_plus_b(outputs, weight, bias)
# print(logits) # Tensor("correct/xw_plus_b:0", shape=(9600, 6702), dtype=float32)
#
with tf.compat.v1.name_scope("loss"):
y_true_2 = tf.reshape(y_true, [-1, self.word_size])
# print(y_true) # Tensor("loss/Reshape:0", shape=(9600, 6702), dtype=float32)
y_predict = tf.argmax(tf.argmax(logits, 1))
loss = tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits(labels=y_true_2, logits=logits))
with tf.compat.v1.name_scope("train_step"):
train_op = tf.compat.v1.train.AdamOptimizer(self.learning_rate).minimize(loss)
with tf.compat.v1.name_scope("acc"):
equal = tf.equal(tf.argmax(y_true_2, 1), tf.argmax(logits, 1))
accuracy = tf.reduce_mean(tf.cast(equal, tf.float32))
init_op = tf.global_variables_initializer()
saver = tf.compat.v1.train.Saver()
if self.training is True:
with tf.compat.v1.Session() as sess:
sess.run(init_op)
new_acc = 0
t = 0
for frequence in range(self.frequences):
for epoch in range(self.epoch_size):
t += 1
x_batch = features[epoch * self.batch_size: (epoch+1) * self.batch_size]
y_batch = labels[epoch * self.batch_size: (epoch+1) * self.batch_size]
y_batch = tf.one_hot(y_batch, depth=self.word_size, axis=2)
sess.run(train_op, feed_dict={x: x_batch, y_true: y_batch.eval()})
if epoch % 50 == 0:
acc_val = sess.run(accuracy, feed_dict={x: x_batch, y_true: y_batch.eval()})
print("frequence:{}, epoch: {}, acc: {}".format(frequence, epoch, acc_val))
if acc_val > new_acc and frequence > 7:
new_acc = acc_val
saver.save(sess, save_path, global_step=t)
print(" , !")
else:
saver.save(sess, save_path, global_step=t)
print(" !")
else:
# data = input(" :")
data = " "
data_num = 0
if data in word_to_id.keys():
data_num = word_to_id[data]
else:
print(" , !")
exit(0)
init_op = tf.compat.v1.global_variables_initializer()
input_data = tf.placeholder(tf.int32, [1, None])
with tf.compat.v1.Session() as sess:
sess.run(init_op)
saver.restore(sess, save_path)
x = np.array([list(map(word_to_id.get, "G"))])
pridict = sess.run(y_predict, feed_dict={input_data: x})
if data:
word = data
else:
word = id_to_word[pridict]
poem = ""
while word != "F":
poem += word
x = np.zeros(1, 1)
x[0, 0] = word_to_id[word]
predict = sess.run(y_predict, feed_dict={input_data: x})
word = id_to_word[predict]
print(" :", poem)
if name == ‘main’: file_path = “/home/aistudio/poem/data/tang_all.txt” save_path = “/home/aistudio/poem/model/” # file_path = “./data/tang_all.txt” poem = PoemRobot(file_path, save_path, False)
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
keras 기반 EfficientNet 재현[2 트레이닝 모듈]
B3는 일반적인 서버가 달리기에 적합할 것 같아요.batchsize는 너무 크게 설정하지 마세요.
메모리를 초과하기 쉬워요.이것은 앞의 AlexNet 데이터 집합을 사용하고 다음에 데이터 집합 주소를 넣으세요.
트림...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.
tf.reset_default_graph()
with tf.compat.v1.name_scope("data"):
x = tf.compat.v1.placeholder(tf.int32, [self.batch_size, self.n_steps])
y_true = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.n_steps, self.word_size])
with tf.compat.v1.name_scope("embedding"):
embedding = tf.compat.v1.Variable(tf.compat.v1.random_uniform([self.word_size, self.num_neutrals], -1.0, 1.0))
inputs = tf.compat.v1.nn.embedding_lookup(embedding, x)
if self.training is True:
inputs = tf.compat.v1.nn.dropout(inputs, keep_prob=self.keep_prob)
# lstm
with tf.compat.v1.variable_scope("lstm_model"):
cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(num_units=self.num_neutrals)
if self.training is True:
tf.compat.v1.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
multi_cells = tf.compat.v1.nn.rnn_cell.MultiRNNCell([cell for i in range(self.n_layers)])
initial_state = multi_cells.zero_state(self.batch_size, dtype=tf.float32)
outputs, states = tf.compat.v1.nn.dynamic_rnn(multi_cells, inputs, initial_state=initial_state, dtype=tf.compat.v1.float32)
outputs = tf.reshape(outputs, [-1, self.num_neutrals])
# print(outputs) # Tensor("lstm_model/Reshape:0", shape=(9600, 200), dtype=float32)
#
with tf.compat.v1.variable_scope("correct"):
weight = tf.compat.v1.Variable(tf.truncated_normal([self.num_neutrals, self.word_size], stddev=0.1))
bias = tf.compat.v1.Variable(tf.truncated_normal([self.word_size], stddev=0.1))
logits = tf.compat.v1.nn.xw_plus_b(outputs, weight, bias)
# print(logits) # Tensor("correct/xw_plus_b:0", shape=(9600, 6702), dtype=float32)
#
with tf.compat.v1.name_scope("loss"):
y_true_2 = tf.reshape(y_true, [-1, self.word_size])
# print(y_true) # Tensor("loss/Reshape:0", shape=(9600, 6702), dtype=float32)
y_predict = tf.argmax(tf.argmax(logits, 1))
loss = tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits(labels=y_true_2, logits=logits))
with tf.compat.v1.name_scope("train_step"):
train_op = tf.compat.v1.train.AdamOptimizer(self.learning_rate).minimize(loss)
with tf.compat.v1.name_scope("acc"):
equal = tf.equal(tf.argmax(y_true_2, 1), tf.argmax(logits, 1))
accuracy = tf.reduce_mean(tf.cast(equal, tf.float32))
init_op = tf.global_variables_initializer()
saver = tf.compat.v1.train.Saver()
if self.training is True:
with tf.compat.v1.Session() as sess:
sess.run(init_op)
new_acc = 0
t = 0
for frequence in range(self.frequences):
for epoch in range(self.epoch_size):
t += 1
x_batch = features[epoch * self.batch_size: (epoch+1) * self.batch_size]
y_batch = labels[epoch * self.batch_size: (epoch+1) * self.batch_size]
y_batch = tf.one_hot(y_batch, depth=self.word_size, axis=2)
sess.run(train_op, feed_dict={x: x_batch, y_true: y_batch.eval()})
if epoch % 50 == 0:
acc_val = sess.run(accuracy, feed_dict={x: x_batch, y_true: y_batch.eval()})
print("frequence:{}, epoch: {}, acc: {}".format(frequence, epoch, acc_val))
if acc_val > new_acc and frequence > 7:
new_acc = acc_val
saver.save(sess, save_path, global_step=t)
print(" , !")
else:
saver.save(sess, save_path, global_step=t)
print(" !")
else:
# data = input(" :")
data = " "
data_num = 0
if data in word_to_id.keys():
data_num = word_to_id[data]
else:
print(" , !")
exit(0)
init_op = tf.compat.v1.global_variables_initializer()
input_data = tf.placeholder(tf.int32, [1, None])
with tf.compat.v1.Session() as sess:
sess.run(init_op)
saver.restore(sess, save_path)
x = np.array([list(map(word_to_id.get, "G"))])
pridict = sess.run(y_predict, feed_dict={input_data: x})
if data:
word = data
else:
word = id_to_word[pridict]
poem = ""
while word != "F":
poem += word
x = np.zeros(1, 1)
x[0, 0] = word_to_id[word]
predict = sess.run(y_predict, feed_dict={input_data: x})
word = id_to_word[predict]
print(" :", poem)
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
keras 기반 EfficientNet 재현[2 트레이닝 모듈]B3는 일반적인 서버가 달리기에 적합할 것 같아요.batchsize는 너무 크게 설정하지 마세요. 메모리를 초과하기 쉬워요.이것은 앞의 AlexNet 데이터 집합을 사용하고 다음에 데이터 집합 주소를 넣으세요. 트림...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.