Tensorflow 预测燃油效率 环境: jupyter notebook tensorflow2.1.0 python3.7.5
1 2 3 4 5 6 7 import pathlibimport pandas as pdimport seaborn as snsimport tensorflow as tfimport matplotlib.pyplot as pltfrom tensorflow import keras as kerasfrom tensorflow.keras import layers as layers
1 2 dataset_path = keras.utils.get_file("auto-mpg.data" , "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data" ) print(dataset_path)
1 /Users/xxxx/.keras/datasets/auto-mpg.data
1 2 3 4 5 6 7 8 9 column_names = ['MPG' ,'Cylinders' ,'Displacement' ,'Horsepower' ,'Weight' , 'Acceleration' , 'Model Year' , 'Origin' ] raw_dataset=pd.read_csv(dataset_path, names=column_names, na_values='?' , comment='\t' , sep=' ' , skipinitialspace=True )
1 dataset = raw_dataset.copy()
MPG
Cylinders
Displacement
Horsepower
Weight
Acceleration
Model Year
Origin
393
27.0
4
140.0
86.0
2790.0
15.6
82
1
394
44.0
4
97.0
52.0
2130.0
24.6
82
2
395
32.0
4
135.0
84.0
2295.0
11.6
82
1
396
28.0
4
120.0
79.0
2625.0
18.6
82
1
397
31.0
4
119.0
82.0
2720.0
19.4
82
1
1 2 3 4 5 6 7 8 9 MPG 0 Cylinders 0 Displacement 0 Horsepower 6 Weight 0 Acceleration 0 Model Year 0 Origin 0 dtype: int64
1 dataset = dataset.dropna()
1 origin=dataset.pop('Origin' )
1 2 3 dataset['USA' ]=(origin==1 )*1.0 dataset['Europe' ]=(origin==2 )*1.0 dataset['Japan' ]=(origin==3 )*1.0
MPG
Cylinders
Displacement
Horsepower
Weight
Acceleration
Model Year
USA
Europe
Japan
393
27.0
4
140.0
86.0
2790.0
15.6
82
1.0
0.0
0.0
394
44.0
4
97.0
52.0
2130.0
24.6
82
0.0
1.0
0.0
395
32.0
4
135.0
84.0
2295.0
11.6
82
1.0
0.0
0.0
396
28.0
4
120.0
79.0
2625.0
18.6
82
1.0
0.0
0.0
397
31.0
4
119.0
82.0
2720.0
19.4
82
1.0
0.0
0.0
1 2 train_dataset = dataset.sample(frac=0.8 ,random_state=0 ) test_dataset = dataset.drop(train_dataset.index)
1 sns.pairplot(train_dataset[["MPG" , "Cylinders" , "Displacement" , "Weight" ]],diag_kind='kde' )
1 <seaborn.axisgrid.PairGrid at 0x10cd98110 >
1 2 train_stats = train_dataset.describe() train_stats.pop("MPG" )
1 2 3 4 5 6 7 8 9 count 314.000000 mean 23.310510 std 7.728652 min 10.000000 25 % 17.000000 50 % 22.000000 75 % 28.950000 max 46.600000 Name: MPG, dtype: float64
1 train_stats = train_stats.transpose()
count
mean
std
min
25%
50%
75%
max
Cylinders
314.0
5.477707
1.699788
3.0
4.00
4.0
8.00
8.0
Displacement
314.0
195.318471
104.331589
68.0
105.50
151.0
265.75
455.0
Horsepower
314.0
104.869427
38.096214
46.0
76.25
94.5
128.00
225.0
Weight
314.0
2990.251592
843.898596
1649.0
2256.50
2822.5
3608.00
5140.0
Acceleration
314.0
15.559236
2.789230
8.0
13.80
15.5
17.20
24.8
Model Year
314.0
75.898089
3.675642
70.0
73.00
76.0
79.00
82.0
USA
314.0
0.624204
0.485101
0.0
0.00
1.0
1.00
1.0
Europe
314.0
0.178344
0.383413
0.0
0.00
0.0
0.00
1.0
Japan
314.0
0.197452
0.398712
0.0
0.00
0.0
0.00
1.0
1 2 3 train_labels = train_dataset.pop('MPG' ) test_labels = test_dataset.pop('MPG' )
1 2 def norm (x) : return (x - train_stats['mean' ]) / train_stats['std' ]
1 2 normed_train_data = norm(train_dataset) normed_test_data = norm(test_dataset)
1 2 3 4 model = keras.models.Sequential() model.add(layers.Dense(64 ,activation='relu' ,input_shape=[len(train_dataset.keys())])) model.add(layers.Dense(64 ,activation='relu' )) model.add(layers.Dense(1 ))
1 2 3 model.compile(loss='mse' , optimizer=tf.keras.optimizers.RMSprop(0.001 ), metrics=['mae' , 'mse' ])
1 2 3 4 5 6 7 8 9 10 11 12 13 14 Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param ================================================================= dense (Dense) (None , 64 ) 640 _________________________________________________________________ dense_1 (Dense) (None , 64 ) 4160 _________________________________________________________________ dense_2 (Dense) (None , 1 ) 65 ================================================================= Total params: 4 ,865 Trainable params: 4 ,865 Non-trainable params: 0 _________________________________________________________________
1 2 example_batch = normed_train_data[:10 ] example_result = model.predict(example_batch)
[[ 0.06187941]
[ 0.16284567]
[ 0.19416149]
[ 0.3226478 ]
[ 0.09883147]
[ 0.00343724]
[ 0.13330291]
[ 0.62984717]
[-0.05348695]
[ 0.44078857]]
1 2 3 4 5 6 class PrintDot (keras.callbacks.Callback) : def on_epoch_end (self, epoch, logs=None) : if (epoch%100 ==0 ): print(' ' ) print('.' ,end=' ' )
1 2 3 4 5 6 history=model.fit(normed_train_data, train_labels, epochs=1000 , validation_split=0.2 , verbose=0 , callbacks=[PrintDot()])
1 2 3 4 5 6 7 8 9 10 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
1 2 hist=pd.DataFrame(history.history) hist['epoch' ]=history.epoch
loss
mae
mse
val_loss
val_mae
val_mse
epoch
995
2.448885
0.975710
2.448885
9.030066
2.268713
9.030066
995
996
2.376843
0.999163
2.376843
9.096817
2.273271
9.096817
996
997
2.383884
0.992754
2.383883
9.657296
2.356696
9.657296
997
998
2.504148
1.021134
2.504148
9.152325
2.318949
9.152325
998
999
2.421463
0.947287
2.421463
9.146635
2.284075
9.146635
999
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 def plot_history (history) : hist = pd.DataFrame(history.history) hist['epoch' ] = history.epoch plt.figure() plt.xlabel('Epoch' ) plt.ylabel('Mean Abs Error [MPG]' ) plt.plot(hist['epoch' ], hist['mae' ], label='Train Error' ) plt.plot(hist['epoch' ], hist['val_mae' ], label = 'Val Error' ) plt.ylim([0 ,5 ]) plt.legend() plt.figure() plt.xlabel('Epoch' ) plt.ylabel('Mean Square Error [$MPG^2$]' ) plt.plot(hist['epoch' ], hist['mse' ], label='Train Error' ) plt.plot(hist['epoch' ], hist['val_mse' ], label = 'Val Error' ) plt.ylim([0 ,20 ]) plt.legend() plt.show()
1 2 3 Index(['Cylinders' , 'Displacement' , 'Horsepower' , 'Weight' , 'Acceleration' , 'Model Year' , 'USA' , 'Europe' , 'Japan' ], dtype='object' )
1 2 3 4 model = keras.models.Sequential() model.add(layers.Dense(64 ,activation='relu' ,input_shape=[len(train_dataset.keys())])) model.add(layers.Dense(64 ,activation='relu' )) model.add(layers.Dense(1 ))
1 2 3 model.compile(loss='mse' , optimizer=tf.keras.optimizers.RMSprop(0.001 ), metrics=['mae' , 'mse' ])
1 2 3 4 5 6 7 8 history=model.fit(normed_train_data, train_labels, epochs=1000 , validation_split=0.2 , verbose=0 , callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss' , patience=10 ), PrintDot()])
1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
1 test_predictions = model.predict(normed_test_data).flatten()
1 2 3 4 5 6 7 8 plt.scatter(test_labels,test_predictions) plt.xlabel('True values [MPG]' ) plt.ylabel("Predictions [MPG]" ) plt.axis('equal' ) plt.axis('square' ) plt.xlim([0 ,plt.xlim()[1 ]]) plt.ylim([0 ,plt.ylim()[1 ]]) _ = plt.plot([-100 ,100 ],[-100 ,100 ])
1 2 3 4 error = test_predictions-test_labels plt.hist(error, bins=25 ) plt.xlabel('Prediction Error [MPG]' ) _ = plt.ylabel("Count" )