全连接神经网络


import numpy as np
import pandas as pd

np.random.seed(1)


def softmax_cross_entropy(logits, y):
    N = logits.shape[0]
    shifted = logits - np.max(logits, axis=1, keepdims=True)
    exp_scores = np.exp(shifted)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    loss = -np.mean(np.sum(y * np.log(probs + 1e-9), axis=1))
    dlogits = (probs - y) / N

    return loss, dlogits


def softmax(logits):
    shifted = logits - np.max(logits, axis=1, keepdims=True)
    exp_scores = np.exp(shifted)
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)


def load_titanic_dataset(path="titanic.csv"):
    df = pd.read_csv(path)

    y = df["survived"].astype(int).values

    features = df[["pclass", "age", "sibsp", "parch", "fare"]].copy()
    features["age"] = features["age"].fillna(features["age"].median())
    features["fare"] = features["fare"].fillna(features["fare"].median())
    features["sex"] = df["sex"].map({"male": 0, "female": 1})
    features["adult_male"] = df["adult_male"].astype(float)
    features["alone"] = df["alone"].astype(float)

    embarked = df["embarked"].fillna(df["embarked"].mode()[0])
    embarked = pd.get_dummies(embarked, prefix="embarked", dtype=float)
    features = pd.concat([features, embarked], axis=1)

    X = features.values.astype(np.float32)
    X = (X - X.mean(axis=0)) / (X.std(axis=0) + 1e-8)
    y = np.eye(2)[y]

    return X, y


class Dense:
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        scale = np.sqrt(2 / (self.input_dim + self.output_dim))
        self.w = np.random.randn(self.input_dim, self.output_dim) * scale
        self.b = np.zeros((1, self.output_dim))
        self.X = None
        self.dw = None
        self.db = None

    def forward(self, X):
        self.X = X
        return self.X @ self.w + self.b

    def backward(self, dz):
        self.dw = self.X.T @ dz
        self.db = np.sum(dz, axis=0, keepdims=True)
        dX = dz @ self.w.T
        return dX

    def step(self, lr):
        self.w -= lr * self.dw
        self.b -= lr * self.db


class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, X):
        self.out = 1 / (1 + np.exp(-X))
        return self.out

    def backward(self, dout):
        dX = dout * self.out * (1 - self.out)
        return dX


class ReLU:
    def __init__(self):
        self.X = None

    def forward(self, X):
        self.X = X
        return np.maximum(0, X)

    def backward(self, dout):
        return dout * (self.X > 0)


class Network:
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim

        self.dense1 = Dense(input_dim, 10)
        self.relu1 = ReLU()

        self.dense2 = Dense(10, 50)
        self.relu2 = ReLU()

        self.dense3 = Dense(50, 10)
        self.relu3 = ReLU()

        self.dense4 = Dense(10, output_dim)

    def forward(self, X):
        z1 = self.dense1.forward(X)
        a1 = self.relu1.forward(z1)

        z2 = self.dense2.forward(a1)
        a2 = self.relu2.forward(z2)

        z3 = self.dense3.forward(a2)
        a3 = self.relu3.forward(z3)

        logits = self.dense4.forward(a3)
        return logits

    def backward(self, dlogits):
        da3 = self.dense4.backward(dlogits)

        dz3 = self.relu3.backward(da3)
        da2 = self.dense3.backward(dz3)

        dz2 = self.relu2.backward(da2)
        da1 = self.dense2.backward(dz2)

        dz1 = self.relu1.backward(da1)
        dX = self.dense1.backward(dz1)

        return dX

    def step(self, lr):
        self.dense1.step(lr)
        self.dense2.step(lr)
        self.dense3.step(lr)
        self.dense4.step(lr)


if __name__ == "__main__":
    X, y = load_titanic_dataset("titanic.csv")
    net = Network(X.shape[1], 2)

    epochs = 1000
    batch_size = 32
    lr = 0.05
    n_samples = X.shape[0]

    for epoch in range(epochs):
        indices = np.random.permutation(n_samples)
        total_loss = 0.0

        for start in range(0, n_samples, batch_size):
            end = start + batch_size
            batch_idx = indices[start:end]

            X_batch = X[batch_idx]
            y_batch = y[batch_idx]

            logits = net.forward(X_batch)
            loss, dlogits = softmax_cross_entropy(logits, y_batch)

            net.backward(dlogits)
            net.step(lr)

            total_loss += loss * len(X_batch)

        if epoch % 100 == 0:
            logits = net.forward(X)
            probs = softmax(logits)
            pred = np.argmax(probs, axis=1)
            true = np.argmax(y, axis=1)
            acc = np.mean(pred == true)
            avg_loss = total_loss / n_samples
            print(f"epoch={epoch}, loss={avg_loss:.4f}, acc={acc:.4f}")

声明:Hello World|版权所有,违者必究|如未注明,均为原创|本网站采用BY-NC-SA协议进行授权

转载:转载请注明原文链接 - 全连接神经网络


我的朋友,理论是灰色的,而生活之树是常青的!