使用keras对糖尿病进行分类

导入需要的库

1
2
3
4
5
6
7
8
9
import numpy as np
from keras import Sequential
from keras.callbacks import TensorBoard
from keras.layers import Dense
from keras.activations import relu
from keras.activations import sigmoid
from keras import optimizers
from keras import losses
from sklearn.preprocessing import MinMaxScaler

加载数据

1
2
3
4
5
np.random.seed(7)
data = np.loadtxt('../teaching/pima-indians-diabetes.csv', delimiter=',')

x = data[:, 0:8]
y = data[:, 8]

可以打印一下数据看一下数据维度,x,y维度分别为:

1
2
(768, 8)
(768,)

数据归一化

1
2
scaler = MinMaxScaler()
x = scaler.fit_transform(x)

建立模型

这里使用四层全连接

1
2
3
4
5
6
7
8
model = Sequential(
[
Dense(24, input_dim=8, activation=relu),
Dense(12, input_dim=8, activation=relu),
Dense(8, input_dim=8, activation=relu),
Dense(1, input_dim=8, activation=sigmoid),
]
)

image

建立回调

1
2
3
4
5
6
7
8
9
tb_call_back = TensorBoard(log_dir='./logs',  # log 目录
histogram_freq=0, # 按照何等频率(epoch)来计算直方图,0为不计算
# batch_size=32, # 用多大量的数据计算直方图
write_graph=True, # 是否存储网络结构图
write_grads=True, # 是否可视化梯度直方图
write_images=True, # 是否可视化参数
embeddings_freq=0,
embeddings_layer_names=None,
embeddings_metadata=None)

加载模型开始训练

1
2
3
4
5
6
7
8
9
sgd = optimizers.adam(lr=0.01, decay=1e-5)
model.compile(loss=losses.binary_crossentropy, optimizer=sgd, metrics=['accuracy'])
model.fit(x, y, epochs=200, batch_size=10, validation_split=0.33, callbacks=[tb_call_back])

pre_y = model.predict(x)
losses.binary_crossentropy(y, pre_y)

scores = model.evaluate(x, y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))

寻找最优学习率

建立lr_find回调

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from keras.callbacks import Callback
import keras.backend as K
import numpy as np
import matplotlib.pyplot as plt


class LRFinder(Callback):
"""
Up-to date version: https://github.com/WittmannF/LRFinder
Example of usage:
from keras.models import Sequential
from keras.layers import Flatten, Dense
from keras.datasets import fashion_mnist
!git clone https://github.com/WittmannF/LRFinder.git
from LRFinder.keras_callback import LRFinder
# 1. Input Data
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
mean, std = X_train.mean(), X_train.std()
X_train, X_test = (X_train-mean)/std, (X_test-mean)/std
# 2. Define and Compile Model
model = Sequential([Flatten(),
Dense(512, activation='relu'),
Dense(10, activation='softmax')])
model.compile(loss='sparse_categorical_crossentropy', \
metrics=['accuracy'], optimizer='sgd')
# 3. Fit using Callback
lr_finder = LRFinder(min_lr=1e-4, max_lr=1)
model.fit(X_train, y_train, batch_size=128, callbacks=[lr_finder], epochs=2)
"""

def __init__(self, min_lr, max_lr, mom=0.9, stop_multiplier=None,
reload_weights=True, batches_lr_update=5):
self.min_lr = min_lr
self.max_lr = max_lr
self.mom = mom
self.reload_weights = reload_weights
self.batches_lr_update = batches_lr_update
if stop_multiplier is None:
self.stop_multiplier = -20 * self.mom / 3 + 10 # 4 if mom=0.9
# 10 if mom=0
else:
self.stop_multiplier = stop_multiplier

def on_train_begin(self, logs={}):
p = self.params
try:
n_iterations = p['epochs'] * p['samples'] // p['batch_size']
except:
n_iterations = p['steps'] * p['epochs']

self.learning_rates = np.geomspace(self.min_lr, self.max_lr, \
num=n_iterations // self.batches_lr_update + 1)
self.losses = []
self.iteration = 0
self.best_loss = 0
if self.reload_weights:
self.model.save_weights('tmp.hdf5')

def on_batch_end(self, batch, logs={}):
loss = logs.get('loss')

if self.iteration != 0: # Make loss smoother using momentum
loss = self.losses[-1] * self.mom + loss * (1 - self.mom)

if self.iteration == 0 or loss < self.best_loss:
self.best_loss = loss

if self.iteration % self.batches_lr_update == 0: # Evaluate each lr over 5 epochs

if self.reload_weights:
self.model.load_weights('tmp.hdf5')

lr = self.learning_rates[self.iteration // self.batches_lr_update]
K.set_value(self.model.optimizer.lr, lr)

self.losses.append(loss)

if loss > self.best_loss * self.stop_multiplier: # Stop criteria
self.model.stop_training = True

self.iteration += 1

def on_train_end(self, logs=None):
if self.reload_weights:
self.model.load_weights('tmp.hdf5')

plt.figure(figsize=(12, 6))
plt.plot(self.learning_rates[:len(self.losses)], self.losses)
plt.xlabel("Learning Rate")
plt.ylabel("Loss")
plt.xscale('log')
plt.show()

导入lr find并实例化

1
lr_finder = LRFinder(min_lr=1e-4, max_lr=1)

调整fit,添加lr find回调

1
model.fit(x, y, epochs=200, batch_size=10, validation_split=0.33, callbacks=[tb_call_back, lr_finder])

link2

可以看到lr与loss的关系,寻找最优学习率

[]:

  • Copyright: Copyright is owned by the author. For commercial reprints, please contact the author for authorization. For non-commercial reprints, please indicate the source.
  • Copyrights © 2017-2021 More Star

请我喝杯咖啡吧~

支付宝
微信