Commit 785708f5 authored by ADRIAN  AYUSO MUNOZ's avatar ADRIAN AYUSO MUNOZ

W&B Elements

parent 9e2d05a0
# Instructions for doing hyperparameter search with Weights and Biases.
1. Log into W&B.
2. If you have already created the project skip to the next step, if not create it in the "Projects" tab and clicking "Create new project".
3. Then click on the "Sweeps" tab (broom icon).
4. If you have already created the sweep skip to 6th step, if not create it by clicking "Create Sweep".
5. In the sweep creation tab you need to define the parameters of the sweep, once the first run has been launched they cannot be changed. It is highly recommended saving your sweep configuration, since it is possible that future hyperparameter searches will be similar. https://docs.wandb.ai/guides/sweeps/define-sweep-configuration
1. program -> Python script that will be called by the agent, the one that communicates with the W&B platform.
2. method -> Optimisation method (grid, random or bayes).
3. parameters -> Dictionary of parameters to be optimised, boundaries and distribution need to be indicated.
6. Copy the agent command, it will be similar to this "wandb agent ayusoupm/dmsr/ih7wyixk".
7. Paste the agent command on your terminal and the process will begin.
8. It is highly recommended running the agent command using some asynchronous functionality (https://linux.die.net/man/1/screen.
9. The results and logs of the sweep are accessible through the "Sweep" tab.
import numpy as np
import torch
from dmsr import main
from deepsnap.batch import Batch
from torch.utils.data import DataLoader
import heterograph_construction
from deepsnap.dataset import GraphDataset
from datetime import datetime
from utilities import plot_roc, plot_prc, plot_dist
import matplotlib.pyplot as plt
import scipy.stats as st
# Import W&B
import wandb
wandb.init(project="dmsr", entity="ayusoupm")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # It defines whether to execute on cpu or gpu.
constructor = heterograph_construction.DISNETConstructor(device=device) # Graph constructor.
toStudy = 'dis_dru_the' # Graph edge type to study.
"""
It gets the predictions of the model and decodes them.
Input:
model: Model to generate predictions.
eid: Edges to predict.
dataloader: Graph.
random: Whether the edges to predict are random or not.
Output:
Dataframe containing all the predictions ordered and decoded.
"""
def getDecode(model, eid, dataloader, random=''):
print(" Looking for new edges.")
for batch in dataloader:
batch.to(device)
preds = model.pred(batch, eid)
new = []
for i, pred in enumerate(preds):
new.append([eid[0][i].item(), eid[1][i].item(), pred.cpu().detach().numpy().item()])
n = len(preds)
print(" Decoding predictions, this may take a while.")
return constructor.decodePredictions(new, toStudy, n, True, random), torch.tensor(preds).cpu().detach()
"""
It generates random edges in the graph.
Output:
Randomly generated edges.
"""
def randomEids():
tensor1 = torch.randint(0, 30729, (5013,), device=torch.device(device))
tensor2 = torch.randint(0, 3944, (5013,), device=torch.device(device))
return (tensor1, tensor2)
"""
It plots the metrics for the results of the real edge and the random edge set. It joins them vertically and horizontally.
Input:
fpr: False positve rate.
tpr: True positive rate.
label1: Area Under the ROC curve.
recall: Recall.
precision: Precision.
label2: Area Under the PR curve.
"""
def plotMetrics(fpr, tpr, label1, recall, precision, label2):
# Vertical plotting.
fig, axs = plt.subplots(2, figsize=(6, 10))
axs[0].plot(fpr, tpr, label="AUC ROC = " + np.array2string(label1, formatter={'float_kind': lambda x: "%.2f" % x}))
axs[0].set_title('ROC Curve')
axs[0].legend(loc='lower right')
axs[0].plot([0, 1], [0, 1], 'r--')
axs[0].set_xlim([0, 1])
axs[0].set_ylim([0, 1])
axs[0].set_ylabel('True Positive Rate')
axs[0].set_xlabel('False Positive Rate')
axs[1].set_title('Precision-Recall Curve')
axs[1].plot(recall, precision,
label="PRC = " + np.array2string(label2, formatter={'float_kind': lambda x: "%.2f" % x}))
axs[1].legend(loc='lower right')
axs[1].set_xlim([0, 1])
axs[1].set_ylim([0, 1])
axs[1].set_ylabel('Precision')
axs[1].set_xlabel('Recall')
# Horizontal plotting.
fig2, axs2 = plt.subplots(1, 2, figsize=(12, 4))
axs2[0].plot(fpr, tpr, label="AUC ROC = " + np.array2string(label1, formatter={'float_kind': lambda x: "%.2f" % x}))
axs2[0].set_title('ROC Curve')
axs2[0].legend(loc='lower right')
axs2[0].plot([0, 1], [0, 1], 'r--')
axs2[0].set_xlim([0, 1])
axs2[0].set_ylim([0, 1])
axs2[0].set_ylabel('True Positive Rate')
axs2[0].set_xlabel('False Positive Rate')
axs2[1].set_title('Precision-Recall Curve')
axs2[1].plot(recall, precision,
label="PRC = " + np.array2string(label2, formatter={'float_kind': lambda x: "%.2f" % x}))
axs2[1].legend(loc='lower right')
axs2[1].set_xlim([0, 1])
axs2[1].set_ylim([0, 1])
axs2[1].set_ylabel('Precision')
axs2[1].set_xlabel('Recall')
fig.savefig('metrics/aucroc&prcRepoDBVertical.svg', format='svg', dpi=1200)
fig2.savefig('metrics/aucroc&prcRepoDBHorizontal.svg', format='svg', dpi=1200)
plt.close(fig)
plt.close(fig2)
plot_dist()
"""
It generates the metrics for the model.
Input:
model: Model to generate metrics of.
Output:
Area Under the ROC and PR curve.
"""
def metrics(model):
hetero, eids = constructor.DISNETHeterograph(full=False, withoutRepoDB=True)
dataset = GraphDataset(
[hetero],
task='link_pred',
edge_train_mode='disjoint',
edge_message_ratio=0.8
)
toInfer = DataLoader(
dataset, collate_fn=Batch.collate(), batch_size=1
)
model = model.to(device)
model.eval()
print("Started getting repoDB predictions at", datetime.now().strftime("%H:%M:%S"))
_, preds = getDecode(model, eids, toInfer)
print("Finished getting repoDB predictions at", datetime.now().strftime("%H:%M:%S"))
print("Started getting random predictions at", datetime.now().strftime("%H:%M:%S"))
_, predsN = getDecode(model, randomEids(), toInfer, 'R')
print("Finished getting random predictions at", datetime.now().strftime("%H:%M:%S"))
labels1 = torch.ones(len(preds))
labels2 = torch.zeros(len(predsN))
# Join real and random edge results in one list to calculate metrics.
pure_predictions = [item for sublist in [preds, predsN] for item in sublist]
labels = torch.tensor([item for sublist in [labels1, labels2] for item in sublist])
fpr, tpr, label1 = plot_roc(labels, pure_predictions, ('disorder', 'dis_dru_the', 'drug'), "dmsr-f/",
"RepoDB")
recall, precision, label2 = plot_prc(labels, pure_predictions, ('disorder', 'dis_dru_the', 'drug'), "dmsr-f/",
"RepoDB")
plotMetrics(fpr, tpr, label1, recall, precision, label2)
return label1, label2
if __name__ == '__main__':
# Get config of W&B
config = wandb.config
# Metrics list for each model.
rocL, prcL = np.array([]), np.array([])
# Number of iterations.
k = 50
# Set of hyperparameters.
epochs = config.epochs
hidden_dim = config.hidden_dim
lr = config.lr
weight_decay = config.weight_decay
dropout = config.dropout
# Train and test k models and obtain their metrics.
for i in range(k):
model = main(epochs, hidden_dim, lr, weight_decay, dropout)
roc1, prc1 = metrics(model)
rocL = np.append(rocL, roc1)
prcL = np.append(prcL, prc1)
# Keep track of evolution along iterations. It is not necessary.
wandb.log({'rocAverage': sum(rocL) / (i + 1), 'prcAverage': sum(prcL) / (i + 1)})
# Average of the metrics of all the generated models.
rocM = sum(rocL) / k
prcM = sum(prcL) / k
# Obtain confidence intervals, if number of samples is under 30 t-distribution is used, if over 30 the normal
# distribution is used.
if k < 30:
r = st.t.interval(0.95, k - 1, loc=np.mean(rocL), scale=st.sem(rocL))
p = st.t.interval(0.95, k - 1, loc=np.mean(prcL), scale=st.sem(prcL))
else:
r = st.norm.interval(0.95, loc=np.mean(rocL), scale=st.sem(rocL))
p = st.norm.interval(0.95, loc=np.mean(prcL), scale=st.sem(prcL))
# Send data to W&B. Necessary.
wandb.log({'rocAverage': rocM, 'prcAverage': prcM})
print("AUCROC: ", rocM, "+-", rocM - r[0])
print("AUCPR: ", prcM, "+-", prcM - p[0])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment