Submitted:
27 June 2025
Posted:
01 July 2025
You are already at the latest version
Abstract
Keywords:
1. Introduction
2. Conception and Implementation of Nash Normalization Mechanism
3. Experimental Design and Evaluation
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| import torchvision |
| import torchvision.transforms as transforms |
| from torch.utils.data import Subset, DataLoader |
| import numpy as np |
| import time |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score |
| # Set random seed |
| torch.manual_seed(0) |
| np.random.seed(0) |
| # Device |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
| # Load CIFAR-10 (800 train, 800 test) |
| transform = transforms.Compose([ |
| transforms.ToTensor(), |
| transforms.Normalize((0.5,), (0.5,)) |
| ]) |
| train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) |
| test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) |
| train_subset = Subset(train_dataset, range(800)) |
| test_subset = Subset(test_dataset, range(800)) |
| train_loader = DataLoader(train_subset, batch_size=64, shuffle=True) |
| test_loader = DataLoader(test_subset, batch_size=64, shuffle=False) |
| # MLP config |
| input_size = 3 * 32 * 32 |
| hidden_size = 256 |
| num_classes = 10 |
| # Standard MLP with BatchNorm |
| class StandardMLP(nn.Module): |
| def __init__(self): |
| super(StandardMLP, self).__init__() |
| self.fc1 = nn.Linear(input_size, hidden_size) |
| self.bn1 = nn.BatchNorm1d(hidden_size) |
| self.fc2 = nn.Linear(hidden_size, hidden_size) |
| self.bn2 = nn.BatchNorm1d(hidden_size) |
| self.fc3 = nn.Linear(hidden_size, num_classes) |
| def forward(self, x): |
| x = x.view(-1, input_size) |
| x = F.relu(self.bn1(self.fc1(x))) |
| x = F.relu(self.bn2(self.fc2(x))) |
| x = self.fc3(x) |
| return x |
| # Nash BatchNorm (innovative) |
| class NashBatchNorm1d(nn.Module): |
| def __init__(self, num_features, eps=1e-5, momentum=0.1): |
| super(NashBatchNorm1d, self).__init__() |
| self.gamma = nn.Parameter(torch.ones(num_features)) |
| self.beta = nn.Parameter(torch.zeros(num_features)) |
| self.eps = eps |
| self.momentum = momentum |
| self.register_buffer('running_mean', torch.zeros(num_features)) |
| self.register_buffer('running_var', torch.ones(num_features)) |
| def forward(self, x): |
| if self.training: |
| batch_mean = x.mean(0) |
| batch_var = x.var(0, unbiased=False) |
| self.running_mean = self.momentum * batch_mean + (1 - self.momentum) * self.running_mean |
| self.running_var = self.momentum * batch_var + (1 - self.momentum) * self.running_var |
| x_hat = (x - batch_mean) / torch.sqrt(batch_var + self.eps) |
| else: |
| x_hat = (x - self.running_mean) / torch.sqrt(self.running_var + self.eps) |
| interaction = torch.matmul(x_hat, x_hat.T) / x_hat.size(1) |
| strategic_feedback = torch.mean(interaction, dim=1, keepdim=True) |
| x_nash = x_hat - strategic_feedback |
| return self.gamma * x_nash + self.beta |
| # MLP with NashBN |
| class NashMLP(nn.Module): |
| def __init__(self): |
| super(NashMLP, self).__init__() |
| self.fc1 = nn.Linear(input_size, hidden_size) |
| self.bn1 = NashBatchNorm1d(hidden_size) |
| self.fc2 = nn.Linear(hidden_size, hidden_size) |
| self.bn2 = NashBatchNorm1d(hidden_size) |
| self.fc3 = nn.Linear(hidden_size, num_classes) |
| def forward(self, x): |
| x = x.view(-1, input_size) |
| x = F.relu(self.bn1(self.fc1(x))) |
| x = F.relu(self.bn2(self.fc2(x))) |
| x = self.fc3(x) |
| return x |
| # Train function |
| def train_model(model, optimizer, criterion, train_loader, num_epochs=5): |
| model.train() |
| for _ in range(num_epochs): |
| for images, labels in train_loader: |
| images, labels = images.to(device), labels.to(device) |
| outputs = model(images) |
| loss = criterion(outputs, labels) |
| optimizer.zero_grad() |
| loss.backward() |
| optimizer.step() |
| # Evaluation |
| def evaluate_model(model, test_loader): |
| model.eval() |
| y_true, y_pred, y_prob = [], [], [] |
| with torch.no_grad(): |
| for images, labels in test_loader: |
| images = images.to(device) |
| outputs = model(images) |
| probs = F.softmax(outputs, dim=1) |
| _, predicted = torch.max(outputs.data, 1) |
| y_true.extend(labels.numpy()) |
| y_pred.extend(predicted.cpu().numpy()) |
| y_prob.extend(probs.cpu().numpy()) |
| y_true = np.array(y_true) |
| y_pred = np.array(y_pred) |
| y_prob = np.array(y_prob) |
| acc = accuracy_score(y_true, y_pred) |
| prec = precision_score(y_true, y_pred, average='macro', zero_division=0) |
| rec = recall_score(y_true, y_pred, average='macro', zero_division=0) |
| f1 = f1_score(y_true, y_pred, average='macro', zero_division=0) |
| try: |
| auc = roc_auc_score(y_true, y_prob, multi_class='ovr') |
| except: |
| auc = float('nan') |
| return acc, prec, rec, f1, auc |
| # Run experiment 10 times |
| def run_experiments(): |
| for model_name, ModelClass in [('StandardMLP', StandardMLP), ('NashMLP', NashMLP)]: |
| print(f"\n===== {model_name} =====") |
| metrics_all = [] |
| for run in range(10): |
| model = ModelClass().to(device) |
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) |
| criterion = nn.CrossEntropyLoss() |
| start_time = time.time() |
| train_model(model, optimizer, criterion, train_loader, num_epochs=5) |
| duration = time.time() - start_time |
| acc, prec, rec, f1, auc = evaluate_model(model, test_loader) |
| metrics_all.append([acc, prec, rec, f1, auc, duration]) |
| print(f"Run {run+1:2d} | Acc: {acc:.4f} | Prec: {prec:.4f} | Rec: {rec:.4f} | F1: {f1:.4f} | AUC: {auc:.4f} | Time: {duration:.2f}s") |
| metrics_np = np.array(metrics_all) |
| means = metrics_np.mean(axis=0) |
| stds = metrics_np.std(axis=0) |
| print("\n----- Final Results -----") |
| print(f"Average Accuracy : {means[0]:.4f} ± {stds[0]:.4f}") |
| print(f"Average Precision : {means[1]:.4f} ± {stds[1]:.4f}") |
| print(f"Average Recall : {means[2]:.4f} ± {stds[2]:.4f}") |
| print(f"Average F1 Score : {means[3]:.4f} ± {stds[3]:.4f}") |
| print(f"Average ROC AUC : {means[4]:.4f} ± {stds[4]:.4f}") |
| print(f"Average Time (s) : {means[5]:.2f} ± {stds[5]:.2f}") |
| run_experiments() |
| The output of the experimental code is as follows: |
| ===== StandardMLP ===== |
| ----- Final Results ----- |
| Average Accuracy : 0.3090 ± 0.0088 |
| Average Precision : 0.3080 ± 0.0089 |
| Average Recall : 0.3055 ± 0.0082 |
| Average F1 Score : 0.2997 ± 0.0086 |
| Average ROC AUC : 0.7716 ± 0.0070 |
| Average Time (s) : 1.62 ± 0.11 |
| ===== NashMLP ===== |
| ----- Final Results ----- |
| Average Accuracy : 0.3191 ± 0.0143 |
| Average Precision : 0.3155 ± 0.0139 |
| Average Recall : 0.3143 ± 0.0136 |
| Average F1 Score : 0.3089 ± 0.0152 |
| Average ROC AUC : 0.7724 ± 0.0068 |
| Average Time (s) : 1.93 ± 0.17 |
4. Consideration of the Universality of the Method
5. Conclusion and Future Outlook
References
- Zheng, S., Gao, Z., Sun, F. K., Boning, D., Yu, B., & Wong, M. D. (2024). Improving neural ode training with temporal adaptive batch normalization. Advances in Neural Information Processing Systems, 37, 95875-95895.
- Kim, Y., & Panda, P. (2021). Revisiting batch normalization for training low-latency deep spiking neural networks from scratch. Frontiers in neuroscience, 15, 773954. [CrossRef]
- Lakshmi, T. V., & Krishna Reddy, C. V. (2024). Classification of skin lesions by incorporating drop-block and batch normalization layers in representative CNN models. Arabian Journal for Science and Engineering, 49(3), 3671-3684. [CrossRef]
- Lubana, E. S., Dick, R., & Tanaka, H. (2021). Beyond batchnorm: Towards a unified understanding of normalization in deep learning. Advances in Neural Information Processing Systems, 34, 4778-4791.
- Talat, N., Alsadoon, A., Prasad, P. W. C., Dawoud, A., Rashid, T. A., & Haddad, S. (2023). A novel enhanced normalization technique for a mandible bones segmentation using deep learning: batch normalization with the dropout. Multimedia Tools and Applications, 82(4), 6147-6166. [CrossRef]
- Wu, Y., Chi, Z., Wang, Y., Plataniotis, K. N., & Feng, S. (2024, March). Test-time domain adaptation by learning domain-aware batch normalization. In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 38, No. 14, pp. 15961-15969).
- Gao, S. H., Han, Q., Li, D., Cheng, M. M., & Peng, P. (2021). Representative batch normalization with feature calibration. In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition (pp. 8669-8679).
- Yi-Ge, E., Wu, M., & Chen, Z. (2025, April). Reducing Divergence in Batch Normalization for Domain Adaptation. In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 39, No. 21, pp. 22155-22163).
- Brock, A., De, S., Smith, S. L., & Simonyan, K. (2021, July). High-performance large-scale image recognition without normalization. In International conference on machine learning (pp. 1059-1071). PMLR.
- Hu, F., Chen, A. A., Horng, H., Bashyam, V., Davatzikos, C., Alexander-Bloch, A., ... & Shinohara, R. T. (2023). Image harmonization: A review of statistical and deep learning methods for removing batch effects and evaluation metrics for effective harmonization. NeuroImage, 274, 120125. [CrossRef]
- Liao, Z., Hezbri, N., Quétu, V., Nguyen, V. T., & Tartaglione, E. (2025, April). Till the Layers Collapse: Compressing a Deep Neural Network through the Lenses of Batch Normalization Layers. In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 39, No. 18, pp. 18702-18710). [CrossRef]
- Mirza, M. J., Micorek, J., Possegger, H., & Bischof, H. (2022). The norm must go on: Dynamic unsupervised domain adaptation by normalization. In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition (pp. 14765-14775).
- Chai, J., Zeng, H., Li, A., & Ngai, E. W. (2021). Deep learning in computer vision: A critical review of emerging techniques and application scenarios. Machine Learning with Applications, 6, 100134. [CrossRef]
- Choi, S., Kim, T., Jeong, M., Park, H., & Kim, C. (2021). Meta batch-instance normalization for generalizable person re-identification. In Proceedings of the IEEE/CVF conference on Computer Vision and Pattern Recognition (pp. 3425-3435).
- Luo, X., Meng, Q., Chen, W., Wang, Y., & Liu, T. Y. (2021, December). Path-BN: Towards effective batch normalization in the Path Space for ReLU networks. In Uncertainty in Artificial Intelligence (pp. 834-843). PMLR.
- Saeedi, S., Rezayi, S., Keshavarz, H., & R. Niakan Kalhori, S. (2023). MRI-based brain tumor detection using convolutional deep learning methods and chosen machine learning techniques. BMC Medical Informatics and Decision Making, 23(1), 16. [CrossRef]
- Shah, D., Trivedi, V., Sheth, V., Shah, A., & Chauhan, U. (2022). ResTS: Residual deep interpretable architecture for plant disease detection. Information Processing in Agriculture, 9(2), 212-223. [CrossRef]
- Fekri, M. N., Patel, H., Grolinger, K., & Sharma, V. (2021). Deep learning for load forecasting with smart meter data: Online Adaptive Recurrent Neural Network. Applied Energy, 282, 116177. [CrossRef]
- Nayoga, B. P., Adipradana, R., Suryadi, R., & Suhartono, D. (2021). Hoax analyzer for Indonesian news using deep learning models. Procedia Computer Science, 179, 704-712. [CrossRef]
- Chattopadhyay, A., & Maitra, M. (2022). MRI-based brain tumour image detection using CNN based deep learning method. Neuroscience informatics, 2(4), 100060. [CrossRef]
- Rahman, T., & Islam, M. S. (2023). MRI brain tumor detection and classification using parallel deep convolutional neural networks. Measurement: Sensors, 26, 100694.
- Halbouni, A., Gunawan, T. S., Habaebi, M. H., Halbouni, M., Kartiwi, M., & Ahmad, R. (2022). CNN-LSTM: hybrid deep neural network for network intrusion detection system. IEEE Access, 10, 99837-99849. [CrossRef]
- Liu, H., Ma, R., Li, D., Yan, L., & Ma, Z. (2021). Machinery fault diagnosis based on deep learning for time series analysis and knowledge graphs. journal of signal processing systems, 93, 1433-1455. [CrossRef]
- Jain, P. K., Saravanan, V., & Pamula, R. (2021). A hybrid CNN-LSTM: A deep learning approach for consumer sentiment analysis using qualitative user-generated contents. Transactions on Asian and Low-Resource Language Information Processing, 20(5), 1-15. [CrossRef]
- Ya, T. U., Yun, L. I. N., Haoran, Z. H. A., Yu, W. A. N. G., Guan, G. U. I., & Shiwen, M. A. O. (2022). Large-scale real-world radio signal recognition with deep learning. Chinese Journal of Aeronautics, 35(9), 35-48.
- Kang, M., Cho, I., Park, J., Jeong, J., Lee, K., Lee, B., ... & Park, I. (2022). High accuracy real-time multi-gas identification by a batch-uniform gas sensor array and deep learning algorithm. ACS sensors, 7(2), 430-440. [CrossRef]
- Smadi, A. A., Abugabah, A., Al-Smadi, M. K., & Al-Smadi, A. M. (2024). Smart medical application of deep learning (MUNet) for detection of COVID-19 from chest images. Journal of Wireless Mobile Networks Ubiquitous Computing and Dependable Applications, 15(1), 133-153. [CrossRef]
- Majumder, S., & Kehtarnavaz, N. (2021). Multitasking deep learning model for detection of five stages of diabetic retinopathy. IEEE Access, 9, 123220-123230. [CrossRef]
- Zhang, Y. D., Satapathy, S. C., Liu, S., & Li, G. R. (2021). A five-layer deep convolutional neural network with stochastic pooling for chest CT-based COVID-19 diagnosis. Machine vision and applications, 32, 1-13. [CrossRef]
- Hsieh, Y. G., Antonakopoulos, K., & Mertikopoulos, P. (2021, July). Adaptive learning in continuous games: Optimal regret bounds and convergence to Nash equilibrium. In Conference on Learning Theory (pp. 2388-2422). PMLR.
- Bakhtyar, B., Qi, Z., Azam, M., & Rashid, S. (2023). Global declarations on electric vehicles, carbon life cycle and Nash equilibrium. Clean Technologies and Environmental Policy, 25(1), 21-34.
- Qian, Y. Y., Liu, M., Wan, Y., Lewis, F. L., & Davoudi, A. (2021). Distributed adaptive Nash equilibrium solution for differential graphical games. IEEE Transactions on Cybernetics, 53(4), 2275-2287. [CrossRef]
- Ye, M., Han, Q. L., Ding, L., & Xu, S. (2023). Distributed Nash equilibrium seeking in games with partial decision information: A survey. Proceedings of the IEEE, 111(2), 140-157. [CrossRef]
- Wang, Z., Liu, F., Ma, Z., Chen, Y., Jia, M., Wei, W., & Wu, Q. (2021). Distributed generalized Nash equilibrium seeking for energy sharing games in prosumers. IEEE Transactions on Power Systems, 36(5), 3973-3986. [CrossRef]
- Saxena, N., Sarkar, B., Wee, H. M., Reong, S., Singh, S. R., & Hsiao, Y. L. (2023). A reverse logistics model with eco-design under the Stackelberg-Nash equilibrium and centralized framework. Journal of Cleaner Production, 387, 135789. [CrossRef]
- Zou, Y., Huang, B., Meng, Z., & Ren, W. (2021). Continuous-time distributed Nash equilibrium seeking algorithms for non-cooperative constrained games. Automatica, 127, 109535. [CrossRef]
- Ye, M., Li, D., Han, Q. L., & Ding, L. (2022). Distributed Nash equilibrium seeking for general networked games with bounded disturbances. IEEE/CAA Journal of Automatica Sinica, 10(2), 376-387. [CrossRef]
Disclaimer/Publisher’s Note: The statements, opinions and data contained in all publications are solely those of the individual author(s) and contributor(s) and not of MDPI and/or the editor(s). MDPI and/or the editor(s) disclaim responsibility for any injury to people or property resulting from any ideas, methods, instructions or products referred to in the content. |
© 2025 by the authors. Licensee MDPI, Basel, Switzerland. This article is an open access article distributed under the terms and conditions of the Creative Commons Attribution (CC BY) license (http://creativecommons.org/licenses/by/4.0/).