I am completely new to tensorflow. I was working on a project and I got an error message: 2018-05-13 20:50:57.669722: F T:\src\github\tensorflow\tensorflow/core/framework/tensor.h:630] Check failed: NDIMS == new_sizes.size() (2 vs. 1) and Pycharm says: Process finished with exit code -1073740791 (0xC0000409)
I have no idea what that means. I am running windows and python 3.6.
Here is my code:
import tensorflow as tf
import gym
import numpy as np
env = gym.make("MountainCar-v0").env
n_inputs = 2
n_hidden = 3
n_output = 3
initializer = tf.contrib.layers.variance_scaling_initializer()
learning_rate = 0.1
X = tf.placeholder(tf.float32, shape=[None,n_inputs])
hidden = tf.layers.dense(X,n_hidden,activation=tf.nn.elu,kernel_initializer=initializer)
logits = tf.layers.dense(hidden,n_output,kernel_initializer=initializer)
outputs = tf.nn.softmax(logits)
index,action = tf.nn.top_k(logits,1)
y = tf.to_float(action)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y,logits=logits)
optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(cross_entropy)
gradients = [grad for grad, variable in grads_and_vars]
gradient_placeholders = []
grads_and_vars_feed = []
for grad, variable in grads_and_vars:
gradient_placeholder = tf.placeholder(tf.float32, shape=grad.get_shape())
gradient_placeholders.append(gradient_placeholder)
grads_and_vars_feed.append((gradient_placeholder,variable))
training_op = optimizer.apply_gradients(grads_and_vars_feed)
#Variablen und Speicher initialisieren
init = tf.global_variables_initializer()
saver = tf.train.Saver()
#Belohnung der versch. Schritte abziehen
def discount_rewards(rewards, discount_rate):
discounted_rewards = np.empty(len(rewards))
comulative_rewards = 0
for step in reversed(range(len(rewards))):
comulative_rewards = rewards[step] + comulative_rewards * discount_rate
discounted_rewards[step] = comulative_rewards
return discounted_rewards
def discount_and_normalize_rewards(all_rewards, discount_rate):
all_discounted_rewards = [discount_rewards(rewards, discount_rate) for rewards in all_rewards]
#Zusammenfügen aller rewards zu einem array
flat_rewards = np.concatenate(all_discounted_rewards)
reward_mean = flat_rewards.mean()
reward_std = flat_rewards.std()
return [(discount_rewards - reward_mean)/reward_std for discount_rewards in all_discounted_rewards]
n_iterations = 25
n_max_steps = 10000
n_games_per_update = 10
save_iteration = 10
discount_rate = 0.95
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
all_rewards = []
my_rewards = []
all_gradients = []
for game in range(n_games_per_update):
current_rewards = []
current_gradients = []
#env.render()
obs = env.reset()
for step in range(n_max_steps):
action_val,gradient_val = sess.run([action,gradients], feed_dict={X: obs.reshape(1, n_inputs)})
obs, reward, done, info = env.step(action_val)
current_rewards.append(reward)
current_gradients.append(gradient_val)
if done:
break
my_rewards.append(sum(current_rewards))
print(iteration,": ", sum(current_rewards))
all_rewards.append(current_rewards)
all_gradients.append(current_gradients)
all_rewards = discount_and_normalize_rewards(all_rewards,discount_rate)
feed_dict = {}
for var_index, grad_placeholder in enumerate(gradient_placeholders):
mean_gradients = np.mean([reward * all_gradients[game_index][step][var_index] for game_index,rewards in enumerate(all_rewards) for step,reward in enumerate(rewards)],axis=0)
feed_dict[grad_placeholder] = mean_gradients
sess.run(training_op, feed_dict=feed_dict)
if iteration % save_iteration == 0:
saver.save(sess, "./my_policy_net_pg.ckpt")
print("Average: ", sum(my_rewards) / len(my_rewards))
print("Maximum: ", max(my_rewards))
These lines seem to contain multiple bugs:
index, action = tf.nn.top_k( logits, 1 )
y = tf.to_float( action )
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels = y, logits = logits )
To start with, tf.nn.top_k()
returns the value first and then the index. Therefore, action
will hold the index, and not index
. y
then becomes the index (in float), and you pass it as labels
to tf.nn.softmax_cross_entropy_with_logits_v2()
.
Two major problems with that. First, you should pass the labels as one-hot vectors, not as index. I suspect that's why your getting the error, you're passing a 1 dimensional tensor instead of 2.
The second problem is theoretical (and unrelated to your error, but I thought to point it out): since logits
are your predictions, and you take y
from there, you're basically comparing your logits
to itself. There will be no learning. You need to feed the actual labels and base your learning on that.
Just a comment, it's usually beneficial to post the whole error backtrace, not just the last line, because I'm only guessing now where the error is, cannot be sure.
User contributions licensed under CC BY-SA 3.0