2019-07-23

深度学习

10 分钟读完 (大约 1548 个字)

基于TensorFlow实现鸢尾花分类

问题描述

本次使用TensorFlow对鸢尾花进行分类，鸢尾花分为三种：Iris Setosa(山鸢尾)、Iris Versicolour (变色鸢尾)以及Iris Virginica(维吉尼亚鸢尾)。

我们通过4个特征值(单位：cm)来进行区分，分别为：sepal length(萼片长度)、sepal width(萼片宽度)、petal length(花瓣长度)、petal width(花瓣宽度)。

获取数据

从网上下载数据：

1
2
3

df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
                     header=None)
print(df)

一共有150条数据，每一条分别为4个特征加上标签。

数据处理

#获取数据
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
                     header=None)
#提取特征
features = df.iloc[1:len(df.index), [0, 1, 2, 3]].values
#提取标签
labels = df.iloc[1:len(df.index), 4].values
# 调节数据,标准化
scaler = preprocessing.StandardScaler().fit(features)
features_standard = scaler.transform(features)

标准化是指将数据按比例缩放，使之落入一个小的特定区间。

接下来转换一下标签：

山鸢尾->0
变色鸢尾->1
维吉尼亚鸢尾->2

    # 转换一下标签
    labels1 = []
    for i in range(len(labels)):
        if labels[i] == "Iris-setosa":
            labels1.append(0)
        elif labels[i] == "Iris-versicolor":
            labels1.append(1)
        else :
            labels1.append(2)
    #将数据分为训练、测试数据比例0.67 : 0.33        
features_train, features_test, labels_train, labels_test =train_test_split(features_standard, labels1,test_size=0.33)

搭建网络

开始构建神经网络，本次网络的构建为：4个结点的输入层、2个每层10个结点的隐藏层以及3个结点的输出层。

网络的权重以及偏差

#权重
weights = {
    'w1': tf.Variable(tf.random_normal([4, 10])),
    'w2': tf.Variable(tf.random_normal([10, 10])),
    'w3': tf.Variable(tf.random_normal([10, 10])),
    'out': tf.Variable(tf.random_normal([10, 3]))
}
#偏差
biases = {
    'b1': tf.Variable(tf.random_normal([1, 10])),
    'b2': tf.Variable(tf.random_normal([1, 10])),
    'b3': tf.Variable(tf.random_normal([1, 10])),
    'b4': tf.Variable(tf.random_normal([1, 3]))
}

搭建网络

#TensorFlow中的占位符，用于传入外部数据
xs = tf.placeholder(tf.float32, [None, 4])

def network(x, weights, biases):
    #第一层
    z1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    a1 = tf.nn.tanh(z1)
    #第二层
    z2 = tf.add(tf.matmul(a1, weights['w2']), biases['b2'])
    a2 = tf.nn.tanh(z2)
    #第三层
    z3 = tf.add(tf.matmul(a2, weights['w3']), biases['b3'])
    a3 = tf.nn.tanh(z3)
    #输出层
    z4 = tf.add(tf.matmul(a3, weights['out']), biases['b4'])
    outputs = tf.nn.softmax(z4)#最后一层使用softmax激活函数
    return outputs

这里说下，softmax激活函数用于多分类，它能将多个神经元的输出，映射到(0,1)区间内，这样就可以看成概率来理解，从而实现多分类。表达式为：

如下面一个神经网络的输出：

z1 = w11 * 01 + w21 * o2 + w31 * 03 + w41 * o4 + b1

z2 = w12 * 01 + w22 * o2 + w32 * 03 + w42 * o4 +b2

z3 = w13 * 01 + w23 * o2 + w33 * 03 + w43 * o4 + b3

ps: o1 代表第一个神经元的值

那么经过softmax激活可得：

损失函数以及优化器

1
2
3

out = network(xs, weights, biases)
loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels_train, logits=out))
op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

损失函数使用的是交叉熵，它是一个信息论中的概念，它原来是用来估算平均编码长度的。给定两个概率分布yi和yi-，通过yi-来表示yi的交叉熵为:

交叉熵刻画的是两个概率分布之间的距离，或可以说它刻画的是通过概率分布yi-来表达概率分布yi的困难程度，yi代表正确答案，yi-代表的是预测值，交叉熵越小，两个概率的分布约接近。

优化方法使用的是梯度下降。

开始训练

训练1000次，每100次输出当前损失值。

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(1000):
        sess.run(op, feed_dict={xs: features_train})
        if i % 100 == 0:
            print(sess.run(loss, feed_dict={xs: features_train}))

可见损失值到0.56时，梯度已经接近山底。

使用测试数据测试

      #获取预测值
test = sess.run(network(xs, weights, biases), feed_dict={xs: features_test})
      sum = 0
      for i in range(len(test)):
          if test[i][0] > test[i][1] and test[i][0] > test[i][2]:
              if labels_test[i] == 0:
                  sum+=1
          elif test[i][1] > test[i][0] and test[i][1] > test[i][2]:
              if labels_test[i] == 1:
                  sum+=1
          else:
              if labels_test[i] == 2:
                  sum+=1
      print("准确率为 %f" %(sum/len(test)))

可见准确率达到了百分之96。

完整代码

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import pandas as pd

#权重
weights = {
    'w1': tf.Variable(tf.random_normal([4, 10])),
    'w2': tf.Variable(tf.random_normal([10, 10])),
    'w3': tf.Variable(tf.random_normal([10, 10])),
    'out': tf.Variable(tf.random_normal([10, 3]))
}
#偏差
biases = {
    'b1': tf.Variable(tf.random_normal([1, 10])),
    'b2': tf.Variable(tf.random_normal([1, 10])),
    'b3': tf.Variable(tf.random_normal([1, 10])),
    'b4': tf.Variable(tf.random_normal([1, 3]))
}

if __name__ == "__main__":
    df = pd.read_csv('iris.data', header=None)
    features = df.iloc[1:len(df.index), [0, 1, 2, 3]].values
    labels = df.iloc[1:len(df.index), 4].values

    # 调节数据,标准化
    scaler = preprocessing.StandardScaler().fit(features)
    features_standard = scaler.transform(features)

    # 转换一下标签
    labels1 = []
    for i in range(len(labels)):
        if labels[i] == "Iris-setosa":
            labels1.append(0)
        elif labels[i] == "Iris-versicolor":
            labels1.append(1)
        else :
            labels1.append(2)

    features_train, features_test, labels_train, labels_test = train_test_split(features_standard, labels1,
                                                                  test_size=0.33)
    #TensorFlow中的占位符，用于传入外部数据
    xs = tf.placeholder(tf.float32, [None, 4])
    ys = tf.placeholder(tf.float32, [None, 1])

    def network(x, weights, biases):
        #第一层
        z1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
        a1 = tf.nn.relu(z1)
        #第二层
        z2 = tf.add(tf.matmul(a1, weights['w2']), biases['b2'])
        a2 = tf.nn.relu(z2)

        #输出层
        z4 = tf.add(tf.matmul(a2, weights['out']), biases['b4'])
        outputs = tf.nn.softmax(z4)#最后一层使用softmax激活函数
        return outputs

    out = network(xs, weights, biases)
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels_train, logits=out))
    op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for i in range(1000):
            sess.run(op, feed_dict={xs: features_train})
            if i % 100 == 0:
                print(sess.run(loss, feed_dict={xs: features_train}))

        #获取预测值
        test = sess.run(network(xs, weights, biases), feed_dict={xs: features_test})
        sum = 0
        for i in range(len(test)):
            if test[i][0] > test[i][1] and test[i][0] > test[i][2]:
                if labels_test[i] == 0:
                    sum+=1
            elif test[i][1] > test[i][0] and test[i][1] > test[i][2]:
                if labels_test[i] == 1:
                    sum+=1
            else:
                if labels_test[i] == 2:
                    sum+=1
        print("准确率为 %f" %(sum/len(test)))