BP 신경망 구현을 위한 기본 알고리즘

신경망 초기화

def initialize_network(input_size, hidden_layers, output_size):
    layer_sizes = [input_size] + hidden_layers + [output_size]
    network = {}
    for i in range(1, len(layer_sizes)):
        network[f'W{i}'] = np.random.randn(layer_sizes[i], layer_sizes[i-1]) * np.sqrt(2.0 / (layer_sizes[i-1] + layer_sizes[i]))
        network[f'b{i}'] = np.zeros((layer_sizes[i], 1))
    return network

전방향 전파

def forward_pass(network, X):
    cache = {'A0': X}
    L = len(network) // 2
    for l in range(1, L + 1):
        cache[f'Z{l}'] = np.dot(network[f'W{l}'], cache[f'A{l-1}']) + network[f'b{l}']
        if l == L:
            cache[f'A{l}'] = softmax(cache[f'Z{l}'])
        else:
            cache[f'A{l}'] = relu(cache[f'Z{l}'])
    return cache

역방향 전파

def backward_pass(network, cache, X, Y):
    m = X.shape[1]
    L = len(network) // 2
    dZ = cache[f'A{L}'] - Y
    grads = {}
    grads[f'dW{L}'] = (1/m) * np.dot(dZ, cache[f'A{L-1}'].T)
    grads[f'db{L}'] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    for l in reversed(range(1, L)):
        dA = np.dot(network[f'W{l+1}'].T, dZ)
        dZ = dA * relu_derivative(cache[f'Z{l}'])
        grads[f'dW{l}'] = (1/m) * np.dot(dZ, cache[f'A{l-1}'].T)
        grads[f'db{l}'] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    return grads

매개변수 업데이트

def update_parameters(network, grads, learning_rate):
    L = len(network) // 2
    for l in range(1, L + 1):
        network[f'W{l}'] -= learning_rate * grads[f'dW{l}']
        network[f'b{l}'] -= learning_rate * grads[f'db{l}']
    return network

활성화 함수

def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))
    return expZ / np.sum(expZ, axis=0, keepdims=True)

네트워크 훈련

def train_bp_network(X_train, Y_train, hidden_layers, learning_rate=0.01, epochs=1000):
    input_size = X_train.shape[0]
    output_size = Y_train.shape[0]
    network = initialize_network(input_size, hidden_layers, output_size)
    for epoch in range(epochs):
        cache = forward_pass(network, X_train)
        loss = cross_entropy_loss(cache[f'A{len(hidden_layers)+1}'], Y_train)
        grads = backward_pass(network, cache, X_train, Y_train)
        network = update_parameters(network, grads, learning_rate)
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")
    return network

손실 함수

def cross_entropy_loss(AL, Y):
    m = Y.shape[1]
    loss = -np.sum(Y * np.log(AL + 1e-8)) / m
    return loss

예측 함수

def predict(network, X):
    cache = forward_pass(network, X)
    L = len(network) // 2
    predictions = np.argmax(cache[f'A{L}'], axis=0)
    return predictions

태그: bp-neural-network machine-learning deep-learning python NumPy

6월 4일 19:22에 게시됨