I have a multi-layer perceptron for a multi-output regression problem which predicts 14 continuous values. The following is the code snippet for the same:
# Parameters learning_rate = 0.001 training_epochs = 1000 batch_size = 500 # Network Parameters n_hidden_1 = 32 n_hidden_2 = 200 n_hidden_3 = 200 n_hidden_4 = 256 n_input = 14 n_classes = 14 # tf Graph input x = tf.placeholder("float", [None, n_input],name="x") y = tf.placeholder("float", [None, n_classes]) # Store layers weight & bias weights = { 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)), 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)), 'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0, 0.1)), 'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], 0, 0.1)), 'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], 0, 0.1)) } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)), 'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)), 'b3': tf.Variable(tf.random_normal([n_hidden_3], 0, 0.1)), 'b4': tf.Variable(tf.random_normal([n_hidden_4], 0, 0.1)), 'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1)) } # Create model def multilayer_perceptron(x): # Hidden layer with RELU activation layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) layer_1 = tf.nn.relu(layer_1) layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']) layer_2 = tf.nn.relu(layer_2) layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']) layer_3 = tf.nn.relu(layer_3) layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4']) layer_4 = tf.nn.relu(layer_4) out_layer = tf.matmul(layer_4, weights['out']) + biases['out'] return out_layer # Construct model pred = multilayer_perceptron(x) cost = tf.reduce_mean(tf.square(pred-y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Random batch generation total_len = X_train.shape[0] INDEXES = list(range(total_len // batch_size)) random.shuffle(INDEXES) x_batch_data = tf.slice(X_train, [batch_size * ix, 0], [batch_size, -1]) y_batch_data = tf.slice(Y_train, [batch_size * ix, 0], [batch_size, -1]) # Run the graph in the session init = tf.global_variables_initializer() with tf.Session() as sess: for epoch in range(training_epochs): avg_cost = 0. total_batch = int(total_len/batch_size) for i in INDEXES: x_batch, y_batch= sess.run([x_batch_data, y_batch_data], feed_dict={ix:i}) c,o,p=sess.run([cost, optimizer, pred], feed_dict={x:x_batch, y:y_batch}) print('pred: {}'.format(p)) avg_cost += c / total_batch
OUTPUT:
x_batch_data: [ 1.77560000e+04 4.00000000e+00 4.00000000e+00 ..., 1.00000000e+00 5.61000000e+02 1.00000000e+00] [ 1.34310000e+04 4.00000000e+00 4.00000000e+00 ..., 1.00000000e+00 5.61000000e+02 1.00000000e+00] [ 2.98800000e+03 1.00000000e+00 0.00000000e+00 ..., 0.00000000e+00 0.00000000e+00 1.00000000e+00] y_batch_data: [[ 4.19700000e-01 1.04298450e+02 1.50000000e+02 ..., 2.75250000e-01 1.02000000e-01 7.28565000e+00] [ 5.59600000e-01 1.39064600e+02 2.00000000e+02 ..., 3.67000000e-01 1.36000000e-01 9.71420000e+00] [ 2.79800000e-01 6.95323000e+01 1.00000000e+02 ..., 1.83500000e-01 6.80000000e-02 4.85710000e+00] Prediction: [[ 0.85085869 90.53585815 130.17015076 ..., 0.62335277 0.26637274 5.52062225] [ 0.85085869 90.53585815 130.17015076 ..., 0.62335277 0.26637274 5.52062225] [ 0.85085869 90.53585815 130.17015076 ..., 0.62335277 0.26637274 5.52062225]
The predicted value is always same despite different input values. Can someone point out what could be the reason behind this?
P.S Similar questions referred to: tensorflow deep neural network for regression always predict same results in one batch
Approaches tried:
1. Gradually reduced the learning rate from 0.1 to 0.0001
2. Tried other optimizer algorithms
3. Changed the network architecture (number of hidden nodes and layers and activation functions)
Any help is appreciated.
0 comments:
Post a Comment