I am trying to run the following code:
for ie in range(100):
energy = torch.tensor([.1 + .1 * ie], dtype = torch.float32, requires_grad = True)
xe = .5 + 20 * (torch.log(energy) - log_01) / (log_10 - log_01) #(log_ parts are some constants)
xe2 = xe * xe
for it in range(100):
theta = torch.tensor([it * theta_max / 99], dtype = torch.float32, requires_grad = True)
Y[0] = torch.exp(PXmg1_p[0, 0]) + torch.exp(PXmg1_p[0, 1] * torch.pow(xe, PXmg1_p[0, 2]))
Y[1] = torch.exp(PXmg2_p[0, 0]) + torch.exp(PXmg2_p[0, 1] * torch.pow(xe, PXmg2_p[0, 2]))
Y[2] = torch.exp(PXmg3_p[0, 0]) + torch.exp(PXmg3_p[0, 1] * torch.pow(xe, PXmg3_p[0, 2]))
Y[3] = torch.exp(PXmg4_p[0, 0]) + torch.exp(PXmg4_p[0, 1] * torch.pow(xe, PXmg4_p[0, 2]))
thisp0_mg[ie, it] = solvecubic(energy, theta, 0)
thisp0de_mg[ie, it] = solvecubic(energy, theta, 2)
thisp0de2_mg[ie, it] = solvecubic(energy, theta, 22)
thisp0de3_mg[ie, it] = solvecubic(energy, theta, 25)
thisp0dth_mg[ie, it] = solvecubic(energy, theta, 3)
thisp0dth2_mg[ie, it] = solvecubic(energy, theta, 32)
where
def solvecubic(energy, theta, mode):
#Evaluate B given Y
B = torch.linalg.solve(A, Y)
val = 0
x = .5 + 4 * theta / theta_max
for i in range(4):
val += B[i] * x**i
if mode == 0:
return val.item()
elif mode == 2 or mode == 22 or mode == 25:
if mode == 2:
return torch.autograd.grad(val, energy, retain_graph = True)[0].item()
if mode == 22:
first_der = torch.autograd.grad(val, energy, create_graph = True)[0]
return torch.autograd.grad(first_der, energy, retain_graph = True)[0].item()
if mode == 25:
first_der = torch.autograd.grad(val, energy, create_graph = True)[0]
second_der = torch.autograd.grad(first_der, energy, create_graph = True)[0]
return torch.autograd.grad(first_der, energy, retain_graph = True)[0].item()
elif mode == 3 or mode == 32:
if mode == 3:
return torch.autograd.grad(val, theta, retain_graph = True)[0].item()
if mode == 32:
first_der = torch.autograd.grad(val, theta, create_graph = True)[0]
return torch.autograd.grad(first_der, theta, retain_graph = True)[0].item()
where A is vandermode matrix (4x4)
This code seem to work really slowly, and the cuda is not available on my computer, so I cannot use torch.cuda.clear_cache(). I have also tried using del theta, or del energy after their iteration over the value is done, but it doesn't seem it improves that much.
Is there a way to delete the computational graphs after each iteration so that I save some memory. The code is extremely slow as for now.
Just to give an idea, in the inner loop I have tried range 1, 2, and 3. The time it takes to compile is 1 min, 2 min 30 sec, 10 min respectively.
some_function(xe2, theta). With your code anddef some_function(xe2, theta): return xe2 * thetathe result is obtained in 0.2 seconds. To do more to help we would need to know more aboutsome_function()itertools.product()tosome_function(). Doing this reduces the runtime to 0.1 seconds