Submitted:
01 July 2025
Posted:
01 July 2025
You are already at the latest version
Abstract
Keywords:
1. Introduction
2. Related Work
3. Method Design
4. Experimental Design
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Subset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np
import time
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])
train_full = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_full = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_subset = Subset(train_full, list(range(5000)))
test_subset = Subset(test_full, list(range(5000)))
train_loader = DataLoader(train_subset, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(test_subset, batch_size=128, shuffle=False, num_workers=2)
num_classes = 10
class BaselineMLP(nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(3*32*32, 256)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(0.5)
self.fc2 = nn.Linear(256, 128)
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(0.5)
self.fc3 = nn.Linear(128, num_classes)
def forward(self, x):
x = self.flatten(x)
x = self.relu1(self.fc1(x))
x = self.dropout1(x)
x = self.relu2(self.fc2(x))
x = self.dropout2(x)
x = self.fc3(x)
return x
class HarsanyiDropout(nn.Module):
def __init__(self, base_p=0.5):
super().__init__()
self.base_p = base_p
def forward(self, x):
if not self.training or self.base_p == 0:
return x
with torch.no_grad():
abs_x = torch.abs(x)
importance = abs_x.mean(dim=0)
importance_norm = (importance - importance.min()) / (importance.max() - importance.min() + 1e-8)
adaptive_p = self.base_p * (1 - importance_norm)
adaptive_p_expand = adaptive_p.unsqueeze(0).expand_as(x)
mask = torch.bernoulli(1 - adaptive_p_expand).to(x.device)
out = x * mask / (1 - adaptive_p_expand + 1e-8)
return out
class HarsanyiMLP(nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(3*32*32, 256)
self.relu1 = nn.ReLU()
self.harsanyi_dropout1 = HarsanyiDropout(base_p=0.5)
self.fc2 = nn.Linear(256, 128)
self.relu2 = nn.ReLU()
self.harsanyi_dropout2 = HarsanyiDropout(base_p=0.5)
self.fc3 = nn.Linear(128, num_classes)
def forward(self, x):
x = self.flatten(x)
x = self.fc1(x)
x = self.relu1(x)
x = self.harsanyi_dropout1(x)
x = self.fc2(x)
x = self.relu2(x)
x = self.harsanyi_dropout2(x)
x = self.fc3(x)
return x
def train_model(model, dataloader, criterion, optimizer, epochs=5):
model.train()
for epoch in range(epochs):
running_loss = 0.0
for inputs, labels in dataloader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
def evaluate_model(model, dataloader, compute_roc_auc=False):
model.eval()
all_preds = []
all_labels = []
all_probs = []
with torch.no_grad():
for inputs, labels in dataloader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
probs = nn.Softmax(dim=1)(outputs)
preds = outputs.argmax(dim=1)
all_preds.append(preds.cpu())
all_labels.append(labels.cpu())
all_probs.append(probs.cpu())
all_preds = torch.cat(all_preds).numpy()
all_labels = torch.cat(all_labels).numpy()
all_probs = torch.cat(all_probs).numpy()
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
recall = recall_score(all_labels, all_preds, average='macro', zero_division=0)
f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
if compute_roc_auc:
try:
from sklearn.preprocessing import label_binarize
all_labels_bin = label_binarize(all_labels, classes=list(range(num_classes)))
roc_auc = roc_auc_score(all_labels_bin, all_probs, average='macro', multi_class='ovr')
except Exception:
roc_auc = float('nan')
else:
roc_auc = float('nan')
return [accuracy, precision, recall, f1, roc_auc]
def summarize_results(name, all_metrics):
all_metrics = np.array(all_metrics)
means = np.mean(all_metrics, axis=0)
stds = np.std(all_metrics, axis=0)
metric_names = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC']
print(f"\n===== {name} - Final Summary over 10 Runs =====")
for i, metric in enumerate(metric_names):
if np.isnan(means[i]):
print(f"{metric}: Skipped (ROC-AUC)")
else:
print(f"{metric}: Mean = {means[i]:.4f}, Std = {stds[i]:.4f}")
print("=============================================\n")
def run_experiment():
epochs = 5
criterion = nn.CrossEntropyLoss()
runs = 10
baseline_all_metrics = []
harsanyi_all_metrics = []
for i in range(runs):
print(f"======= Run {i+1} / {runs} =======")
# Baseline MLP
baseline_model = BaselineMLP().to(device)
baseline_optimizer = optim.Adam(baseline_model.parameters(), lr=0.001)
train_model(baseline_model, train_loader, criterion, baseline_optimizer, epochs=epochs)
metrics = evaluate_model(baseline_model, test_loader, compute_roc_auc=False)
baseline_all_metrics.append(metrics)
print(f"[Baseline MLP] Acc: {metrics[0]:.4f}, Prec: {metrics[1]:.4f}, Recall: {metrics[2]:.4f}, F1: {metrics[3]:.4f}")
# Harsanyi MLP
harsanyi_model = HarsanyiMLP().to(device)
harsanyi_optimizer = optim.Adam(harsanyi_model.parameters(), lr=0.001)
train_model(harsanyi_model, train_loader, criterion, harsanyi_optimizer, epochs=epochs)
metrics = evaluate_model(harsanyi_model, test_loader, compute_roc_auc=False)
harsanyi_all_metrics.append(metrics)
print(f"[Harsanyi MLP] Acc: {metrics[0]:.4f}, Prec: {metrics[1]:.4f}, Recall: {metrics[2]:.4f}, F1: {metrics[3]:.4f}")
# Summary
summarize_results("Baseline MLP", baseline_all_metrics)
summarize_results("Harsanyi MLP", harsanyi_all_metrics)
if __name__ == "__main__":
run_experiment()
The output results of the experimental code are as follows:
===== Baseline MLP - Final Summary over 10 Runs =====
Accuracy: Mean = 0.3882, Std = 0.0076
Precision: Mean = 0.3872, Std = 0.0063
Recall: Mean = 0.3882, Std = 0.0076
F1-Score: Mean = 0.3772, Std = 0.0101
ROC-AUC: Skipped (ROC-AUC)
=============================================
===== Harsanyi MLP - Final Summary over 10 Runs =====
Accuracy: Mean = 0.4075, Std = 0.0052
Precision: Mean = 0.4109, Std = 0.0050
Recall: Mean = 0.4077, Std = 0.0055
F1-Score: Mean = 0.3991, Std = 0.0041
ROC-AUC: Skipped (ROC-AUC)
=============================================
5. Generality and Scalability Analysis
6. Conclusion and Future Work
References
- Pham, H., & Le, Q. (2021, May). Autodropout: Learning dropout patterns to regularize deep networks. In Proceedings of the AAAI conference on artificial intelligence (Vol. 35, No. 11, pp. 9351-9359).
- Basnet, R. B., Johnson, C., & Doleck, T. (2022). Dropout prediction in Moocs using deep learning and machine learning. Education and Information Technologies, 27(8), 11499-11513. [CrossRef]
- Wu, L., Li, J., Wang, Y., Meng, Q., Qin, T., Chen, W., ... & Liu, T. Y. (2021). R-drop: Regularized dropout for neural networks. Advances in neural information processing systems, 34, 10890-10905.
- Omar, A., & Abd El-Hafeez, T. (2024). Optimizing epileptic seizure recognition performance with feature scaling and dropout layers. Neural Computing and Applications, 36(6), 2835-2852. [CrossRef]
- Kiruthiga, D., & Manikandan, V. (2023). Levy flight-particle swarm optimization-assisted BiLSTM+ dropout deep learning model for short-term load forecasting. Neural Computing and Applications, 35(3), 2679-2700. [CrossRef]
- Nguyen, D., Barkousaraie, A. S., Bohara, G., Balagopal, A., McBeth, R., Lin, M. H., & Jiang, S. (2021). A comparison of Monte Carlo dropout and bootstrap aggregation on the performance and uncertainty estimation in radiation therapy dose prediction with deep learning neural networks. Physics in Medicine & Biology, 66(5), 054002. [CrossRef]
- Hu, J., Weng, B., Huang, T., Gao, J., Ye, F., & You, L. (2021). Deep residual convolutional neural network combining dropout and transfer learning for ENSO forecasting. Geophysical Research Letters, 48(24), e2021GL093531. [CrossRef]
- Abdar, M., Samami, M., Mahmoodabad, S. D., Doan, T., Mazoure, B., Hashemifesharaki, R., ... & Nahavandi, S. (2021). Uncertainty quantification in skin cancer classification using three-way decision-based Bayesian deep learning. Computers in biology and medicine, 135, 104418. [CrossRef]
- Bacanin, N., Zivkovic, M., Al-Turjman, F., Venkatachalam, K., Trojovský, P., Strumberger, I., & Bezdan, T. (2022). Hybridized sine cosine algorithm with convolutional neural networks dropout regularization application. Scientific Reports, 12(1), 6302. [CrossRef]
- Wang, J. (2025). Credit Card Fraud Detection via Hierarchical Multi-Source Data Fusion and Dropout Regularization. Transactions on Computational and Scientific Methods, 5(1).
- Ali, A. A. A., & Mallaiah, S. (2022). Intelligent handwritten recognition using hybrid CNN architectures based-SVM classifier with dropout. Journal of King Saud University-Computer and Information Sciences, 34(6), 3294-3300. [CrossRef]
- Zunino, A., Bargal, S. A., Morerio, P., Zhang, J., Sclaroff, S., & Murino, V. (2021). Excitation dropout: Encouraging plasticity in deep neural networks. International Journal of Computer Vision, 129(4), 1139-1152. [CrossRef]
- Ait Skourt, B., El Hassani, A., & Majda, A. (2022). Mixed-pooling-dropout for convolutional neural network regularization. Journal of King Saud University-Computer and Information Sciences, 34(8), 4756-4762. [CrossRef]
- Zhang, D., Li, C., Lin, F., Zeng, D., & Ge, S. (2021, August). Detecting Deepfake Videos with Temporal Dropout 3DCNN. In IJCAI (pp. 1288-1294).
- Horvath, S., Laskaridis, S., Almeida, M., Leontiadis, I., Venieris, S., & Lane, N. (2021). Fjord: Fair and accurate federated learning under heterogeneous targets with ordered dropout. Advances in Neural Information Processing Systems, 34, 12876-12889.
- Wei, M., Gu, H., Ye, M., Wang, Q., Xu, X., & Wu, C. (2021). Remaining useful life prediction of lithium-ion batteries based on Monte Carlo Dropout and gated recurrent unit. Energy Reports, 7, 2862-2871. [CrossRef]
- Kong, X., Liu, X., Gu, J., Qiao, Y., & Dong, C. (2022). Reflash dropout in image super-resolution. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (pp. 6002-6012).
- Guo, D., Wang, X., Gao, K., Jin, Y., Ding, J., & Chai, T. (2021). Evolutionary optimization of high-dimensional multiobjective and many-objective expensive problems assisted by a dropout neural network. IEEE transactions on systems, man, and cybernetics: systems, 52(4), 2084-2097. [CrossRef]
- Lin, J., He, C., & Cheng, R. (2022). Adaptive dropout for high-dimensional expensive multiobjective optimization. Complex & Intelligent Systems, 8(1), 271-285. [CrossRef]
- Zhang, J., Phoon, K. K., Zhang, D., Huang, H., & Tang, C. (2021). Deep learning-based evaluation of factor of safety with confidence interval for tunnel deformation in spatially variable soil. Journal of Rock Mechanics and Geotechnical Engineering, 13(6), 1358-1367. [CrossRef]
- González, E. G., Villar, J. R., & de la Cal, E. (2021). Time series data augmentation and dropout roles in deep learning applied to fall detection. In 15th International Conference on Soft Computing Models in Industrial and Environmental Applications (SOCO 2020) 15 (pp. 563-570). Springer International Publishing.
- Liu, Y., Hu, R., & Balaprakash, P. (2021, May). Uncertainty quantification of deep neural network-based turbulence model for reactor transient analysis. In Verification and Validation (Vol. 84782, p. V001T11A001). American Society of Mechanical Engineers.
- Abbaszadeh Shahri, A., Shan, C., & Larsson, S. (2022). A novel approach to uncertainty quantification in groundwater table modeling by automated predictive deep learning. Natural Resources Research, 31(3), 1351-1373. [CrossRef]
- Mubarak, A. A., Cao, H., & Ahmed, S. A. (2021). Predictive learning analytics using deep learning model in MOOCs’ courses videos. Education and Information Technologies, 26(1), 371-392. [CrossRef]
- Guo, X., Zhang, X., Tian, X., Lu, W., & Li, X. (2022). Probabilistic prediction of the heave motions of a semi-submersible by a deep learning model. Ocean Engineering, 247, 110578. [CrossRef]
- Loftus, T. J., Shickel, B., Ruppert, M. M., Balch, J. A., Ozrazgat-Baslanti, T., Tighe, P. J., ... & Bihorac, A. (2022). Uncertainty-aware deep learning in healthcare: a scoping review. PLOS digital health, 1(8), e0000085. [CrossRef]
- Zoremsanga, C., & Hussain, J. (2024). Particle swarm optimized deep learning models for rainfall prediction: a case study in Aizawl, Mizoram. IEEE Access. [CrossRef]
- Papp, P. A., Martinkus, K., Faber, L., & Wattenhofer, R. (2021). DropGNN: Random dropouts increase the expressiveness of graph neural networks. Advances in Neural Information Processing Systems, 34, 21997-22009.
- Dolezal, J. M., Srisuwananukorn, A., Karpeyev, D., Ramesh, S., Kochanny, S., Cody, B., ... & Pearson, A. T. (2022). Uncertainty-informed deep learning models enable high-confidence predictions for digital histopathology. Nature communications, 13(1), 6572. [CrossRef]
- Njoku, J. N., Morocho-Cayamcela, M. E., & Lim, W. (2021). CGDNet: Efficient hybrid deep learning model for robust automatic modulation recognition. IEEE Networking Letters, 3(2), 47-51.
- Pivato, M., & Tchouante, É. F. (2024). Bayesian social aggregation with almost-objective uncertainty. Theoretical Economics, 19(3), 1351-1398. [CrossRef]
- Kinney, D. (2025). Aggregating Measures of Accuracy and Fairness in Prediction Algorithms.
- Karni, E., & Weymark, J. A. (2024). Impartiality and relative utilitarianism. Social Choice and Welfare, 63(1), 1-18. [CrossRef]
- Nebel, J. M. (2022). Aggregation without interpersonal comparisons of well-being. Philosophy and Phenomenological Research, 105(1), 18-41. [CrossRef]
- Pitis, S. (2023). Consistent aggregation of objectives with diverse time preferences requires non-markovian rewards. Advances in Neural Information Processing Systems, 36, 2877-2893.
- Billot, A., & Qu, X. (2021). Utilitarian aggregation with heterogeneous beliefs. American Economic Journal: Microeconomics, 13(3), 112-123. [CrossRef]
Disclaimer/Publisher’s Note: The statements, opinions and data contained in all publications are solely those of the individual author(s) and contributor(s) and not of MDPI and/or the editor(s). MDPI and/or the editor(s) disclaim responsibility for any injury to people or property resulting from any ideas, methods, instructions or products referred to in the content. |
© 2025 by the authors. Licensee MDPI, Basel, Switzerland. This article is an open access article distributed under the terms and conditions of the Creative Commons Attribution (CC BY) license (http://creativecommons.org/licenses/by/4.0/).