신경망 초기화
def initialize_network(input_size, hidden_layers, output_size):
layer_sizes = [input_size] + hidden_layers + [output_size]
network = {}
for i in range(1, len(layer_sizes)):
network[f'W{i}'] = np.random.randn(layer_sizes[i], layer_sizes[i-1]) * np.sqrt(2.0 / (layer_sizes[i-1] + layer_sizes[i]))
network[f'b{i}'] = np.zeros((layer_sizes[i], 1))
return network
전방향 전파
def forward_pass(network, X):
cache = {'A0': X}
L = len(network) // 2
for l in range(1, L + 1):
cache[f'Z{l}'] = np.dot(network[f'W{l}'], cache[f'A{l-1}']) + network[f'b{l}']
if l == L:
cache[f'A{l}'] = softmax(cache[f'Z{l}'])
else:
cache[f'A{l}'] = relu(cache[f'Z{l}'])
return cache
역방향 전파
def backward_pass(network, cache, X, Y):
m = X.shape[1]
L = len(network) // 2
dZ = cache[f'A{L}'] - Y
grads = {}
grads[f'dW{L}'] = (1/m) * np.dot(dZ, cache[f'A{L-1}'].T)
grads[f'db{L}'] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
for l in reversed(range(1, L)):
dA = np.dot(network[f'W{l+1}'].T, dZ)
dZ = dA * relu_derivative(cache[f'Z{l}'])
grads[f'dW{l}'] = (1/m) * np.dot(dZ, cache[f'A{l-1}'].T)
grads[f'db{l}'] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
return grads
매개변수 업데이트
def update_parameters(network, grads, learning_rate):
L = len(network) // 2
for l in range(1, L + 1):
network[f'W{l}'] -= learning_rate * grads[f'dW{l}']
network[f'b{l}'] -= learning_rate * grads[f'db{l}']
return network
활성화 함수
def relu(Z):
return np.maximum(0, Z)
def relu_derivative(Z):
return Z > 0
def softmax(Z):
expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))
return expZ / np.sum(expZ, axis=0, keepdims=True)
네트워크 훈련
def train_bp_network(X_train, Y_train, hidden_layers, learning_rate=0.01, epochs=1000):
input_size = X_train.shape[0]
output_size = Y_train.shape[0]
network = initialize_network(input_size, hidden_layers, output_size)
for epoch in range(epochs):
cache = forward_pass(network, X_train)
loss = cross_entropy_loss(cache[f'A{len(hidden_layers)+1}'], Y_train)
grads = backward_pass(network, cache, X_train, Y_train)
network = update_parameters(network, grads, learning_rate)
if epoch % 100 == 0:
print(f"Epoch {epoch}, Loss: {loss}")
return network
손실 함수
def cross_entropy_loss(AL, Y):
m = Y.shape[1]
loss = -np.sum(Y * np.log(AL + 1e-8)) / m
return loss
예측 함수
def predict(network, X):
cache = forward_pass(network, X)
L = len(network) // 2
predictions = np.argmax(cache[f'A{L}'], axis=0)
return predictions