Tensorflow 预测燃油效率

Gowi

2020-02-28

机器学习

字数统计:

1.4k字

阅读时长≈

7分

Tensorflow 预测燃油效率

环境： jupyter notebook tensorflow2.1.0 python3.7.5

import pathlib
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras as keras
from tensorflow.keras import layers as layers

1 2	dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data") print(dataset_path)

1	/Users/xxxx/.keras/datasets/auto-mpg.data

column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin']
#'每加伦汽油能距的英里数'，'汽缸'，'排量'，'马力'，'重量'，“加速”，“型号年份”，“来源”
raw_dataset=pd.read_csv(dataset_path,
                        names=column_names, # 表头
                        na_values='?', # 把未知名改为？
                        comment='\t', 
                        sep=' ', # 要使用的定界符
                        skipinitialspace=True) # 在定界符后跳过空格。

1	dataset = raw_dataset.copy()

1	dataset.tail() # 返回最后n行，n默认5

	MPG	Cylinders	Displacement	Horsepower	Weight	Acceleration	Model Year	Origin
393	27.0	4	140.0	86.0	2790.0	15.6	82	1
394	44.0	4	97.0	52.0	2130.0	24.6	82	2
395	32.0	4	135.0	84.0	2295.0	11.6	82	1
396	28.0	4	120.0	79.0	2625.0	18.6	82	1
397	31.0	4	119.0	82.0	2720.0	19.4	82	1

1	dataset.isna().sum() # 计算缺失值的个数

MPG             0
Cylinders       0
Displacement    0
Horsepower      6
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64

1	dataset = dataset.dropna() # 删除缺失值

1	origin=dataset.pop('Origin') # 弹出Origin标签，并用origin来获取dataset中的Origin的值

1
2
3

dataset['USA']=(origin==1)*1.0 # 如果origin为1，则USA的标签下为1.0
dataset['Europe']=(origin==2)*1.0 # 如果origin为2，则Europe的标签下为1.0
dataset['Japan']=(origin==3)*1.0 # 如果origin为3，则Japan的标签下为1.0

1	dataset.tail()

	MPG	Cylinders	Displacement	Horsepower	Weight	Acceleration	Model Year	USA	Europe
393	27.0	4	140.0	86.0	2790.0	15.6	82	1.0	0.0
394	44.0	4	97.0	52.0	2130.0	24.6	82	0.0	1.0
395	32.0	4	135.0	84.0	2295.0	11.6	82	1.0	0.0
396	28.0	4	120.0	79.0	2625.0	18.6	82	1.0	0.0
397	31.0	4	119.0	82.0	2720.0	19.4	82	1.0	0.0

1 2	train_dataset = dataset.sample(frac=0.8,random_state=0) #以随机数种子0在数据集中抽取80% test_dataset = dataset.drop(train_dataset.index) # 在数据集中删除训练集作为测试集

1	sns.pairplot(train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]],diag_kind='kde')

1	<seaborn.axisgrid.PairGrid at 0x10cd98110>

1 2	train_stats = train_dataset.describe() train_stats.pop("MPG") # jupyter 交换环境弹出并输出输出MPG

count    314.000000
mean      23.310510
std        7.728652
min       10.000000
25%       17.000000
50%       22.000000
75%       28.950000
max       46.600000
Name: MPG, dtype: float64

1	train_stats = train_stats.transpose()

1	train_stats

	count	mean	std	min	25%	50%	75%	max
Cylinders	314.0	5.477707	1.699788	3.0	4.00	4.0	8.00	8.0
Displacement	314.0	195.318471	104.331589	68.0	105.50	151.0	265.75	455.0
Horsepower	314.0	104.869427	38.096214	46.0	76.25	94.5	128.00	225.0
Weight	314.0	2990.251592	843.898596	1649.0	2256.50	2822.5	3608.00	5140.0
Acceleration	314.0	15.559236	2.789230	8.0	13.80	15.5	17.20	24.8
Model Year	314.0	75.898089	3.675642	70.0	73.00	76.0	79.00	82.0
USA	314.0	0.624204	0.485101	0.0	0.00	1.0	1.00	1.0
Europe	314.0	0.178344	0.383413	0.0	0.00	0.0	0.00	1.0
Japan	314.0	0.197452	0.398712	0.0	0.00	0.0	0.00	1.0

1
2
3

train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
# 这个标签是使用训练模型进行预测的值。

1 2	def norm(x): return (x - train_stats['mean']) / train_stats['std'] # 化成0-1正态分布

1 2	normed_train_data = norm(train_dataset) # 归一化 normed_test_data = norm(test_dataset) # 归一化

model = keras.models.Sequential()
model.add(layers.Dense(64,activation='relu',input_shape=[len(train_dataset.keys())]))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))

1
2
3

model.compile(loss='mse',
              optimizer=tf.keras.optimizers.RMSprop(0.001),
              metrics=['mae', 'mse'])

1	model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 64)                640       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
=================================================================
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________

1 2	example_batch = normed_train_data[:10] example_result = model.predict(example_batch)

1	print(example_result)

[[ 0.06187941]
 [ 0.16284567]
 [ 0.19416149]
 [ 0.3226478 ]
 [ 0.09883147]
 [ 0.00343724]
 [ 0.13330291]
 [ 0.62984717]
 [-0.05348695]
 [ 0.44078857]]

# 通过为每个完成的时期打印一个点来显示训练进度
class PrintDot(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if (epoch%100==0):
            print(' ') # 每一百行换行
        print('.',end=' ')

history=model.fit(normed_train_data,
                  train_labels,
                  epochs=1000,
                  validation_split=0.2, # 把训练集的20%作为验证集
                  verbose=0,  # 不显示进度条
                  callbacks=[PrintDot()]) # 回调函数为PrintDot

. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

1 2	hist=pd.DataFrame(history.history) hist['epoch']=history.epoch # 增加epoch的标签

1	hist.tail()

	loss	mae	mse	val_loss	val_mae	val_mse	epoch
995	2.448885	0.975710	2.448885	9.030066	2.268713	9.030066	995
996	2.376843	0.999163	2.376843	9.096817	2.273271	9.096817	996
997	2.383884	0.992754	2.383883	9.657296	2.356696	9.657296	997
998	2.504148	1.021134	2.504148	9.152325	2.318949	9.152325	998
999	2.421463	0.947287	2.421463	9.146635	2.284075	9.146635	999

def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [MPG]')
  plt.plot(hist['epoch'], hist['mae'], # (x,y)
           label='Train Error') # 线段的名称即标签卡上的名称
  plt.plot(hist['epoch'], hist['val_mae'],
           label = 'Val Error')
  plt.ylim([0,5])
  plt.legend()

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error [$MPG^2$]')
  plt.plot(hist['epoch'], hist['mse'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error')
  plt.ylim([0,20])
  plt.legend() # 打印标签卡
  plt.show()

1	plot_history(history) # 该图表显示在约100个epoch之后，误差非但没有改进，反而出现恶化。

1	train_dataset.keys()

1
2
3

Index(['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration',
       'Model Year', 'USA', 'Europe', 'Japan'],
      dtype='object')

model = keras.models.Sequential()
model.add(layers.Dense(64,activation='relu',input_shape=[len(train_dataset.keys())]))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))

1
2
3

model.compile(loss='mse',
              optimizer=tf.keras.optimizers.RMSprop(0.001),
              metrics=['mae', 'mse'])

history=model.fit(normed_train_data,
                  train_labels,
                  epochs=1000,
                  validation_split=0.2,
                  verbose=0,
                  callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10), 
                             # 当绝对变化值小于min_data，则退出，min_data默认为0
                             PrintDot()])

1	. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

1	plot_history(history)

1	test_predictions = model.predict(normed_test_data).flatten() # 展平

plt.scatter(test_labels,test_predictions)
plt.xlabel('True values [MPG]')
plt.ylabel("Predictions [MPG]")
plt.axis('equal') # 等比例
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100,100],[-100,100]) # 画一条经过（-100，-100）与（100，100）的线段

error = test_predictions-test_labels
plt.hist(error, bins=25) # 有bins条数
plt.xlabel('Prediction Error [MPG]')
_ = plt.ylabel("Count")

本文由USC-AIR创作和发表,采用BY-NC-SA国际许可协议进行许可转载请注明作者及出处,本文作者为USC-AIR,本文标题为Tensorflow 预测燃油效率 本文链接为http://uscair.club/2020/02/28/基本回归：预测燃油效率/.

AIR

Tensorflow 预测燃油效率

Tensorflow 预测燃油效率

Previous

python语言程序设计（一）

文件

Next

OpenCV图像处理初体验

OpenCV 图片变化

Comments