Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ lightning_logs
data/cache
data/instances
logs
nohup.out
*.out
94 changes: 36 additions & 58 deletions common/cal_reward.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,40 @@
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import numba as nb
import numpy as np
from common.ops import run_parallel, convert_vars_np
from common.nb_utils import gen_tours_batch, calc_length


def get_reward(vars, actions=None, tours_batch=None):
if tours_batch is None:
tours_batch = gen_tours_batch(actions)
if not isinstance(vars, dict):
vars = convert_vars_np(vars)

reward1 = run_parallel(reward_ins, tours_batch, vars['adj'], vars['service_time'], vars['clss'], k = 1)
# reward2 = run_parallel(reward_ins, tours_batch, vars['adj'], vars['service_time'], vars['clss'], k = 2)
# reward3 = run_parallel(reward_ins, tours_batch, vars['adj'], vars['service_time'], vars['clss'], k = 3)
# return np.float32([reward1, reward2, reward3]).T @ np.float32([1e2, 1e0, 1e-2])
return reward1

def get_Ts(vars, actions=None, tours_batch=None):
if tours_batch is None:
tours_batch = gen_tours_batch(actions)
reward1 = run_parallel(reward_ins, tours_batch, adj=vars['adj'], service=vars['service_time'], clss=vars['clss'], k = 1)
reward2 = run_parallel(reward_ins, tours_batch, adj=vars['adj'], service=vars['service_time'], clss=vars['clss'], k = 2)
reward3 = run_parallel(reward_ins, tours_batch, adj=vars['adj'], service=vars['service_time'], clss=vars['clss'], k = 3)
return np.float32([reward1, reward2, reward3]).T
import torch
from common.local_search import lsRL

def get_Ts_RL(vars, actions=None, tours_batch=None):
if tours_batch is None:
tours_batch = gen_tours_batch(actions)

if not isinstance(vars, dict):
vars = convert_vars_np(vars)

reward1 = run_parallel(reward_ins, tours_batch, vars['adj'], vars['service_time'], vars['clss'], k = 1)
reward2 = run_parallel(reward_ins, tours_batch, vars['adj'], vars['service_time'], vars['clss'], k = 2)
reward3 = run_parallel(reward_ins, tours_batch, vars['adj'], vars['service_time'], vars['clss'], k = 3)
return np.float32([reward1, reward2, reward3]).T
def action_to_tours(action):
zero_indices = np.where(action == 0)[0]
split_indices = np.concatenate(([-1], zero_indices, [len(action)]))
lengths = np.diff(split_indices) - 1
valid_lengths = lengths[lengths > 0]
nonzero_action = action[action != 0]
tours = np.split(nonzero_action, np.cumsum(valid_lengths)[:-1])
# Tìm độ dài lớn nhất để padding
max_len = max(len(r) for r in tours)

@nb.njit(nb.float32(nb.int32[:, :], nb.float32[:, :], nb.float32[:], nb.int32[:], nb.int32), nogil=True)
def reward_ins(tours, adj, service, clss, k):
r = 0.0
for tour in tours:
pos = np.where(clss[tour] == k)[0]
if len(pos) <= 0:
continue
candidate = tour[:pos[-1] + 1]
length = calc_length(adj, service, candidate)
r = max(r, length)
return r
# Padding để đảm bảo tất cả có cùng kích thước
padded = np.zeros((len(tours), max_len+2), dtype=np.int32)
for idx, tour in enumerate(tours):
padded[idx][1:len(tour)+1] = tour
return padded

@nb.njit(nb.float32(nb.float32[:, :], nb.float32[:], nb.int32[:], nb.int32[:], nb.int32), nogil=True)
def reward_in(adj, service, clss, tour, k):
r = 0.0
pos = np.where(clss[tour] == k)[0]
if len(pos) > 0:
candidate = tour[:pos[-1] + 1]
length = calc_length(adj, service, candidate)
r = max(r, length)
return r
def calc_reward(action, td, pos_val=[1,2,3], **kwargs):
tours = action_to_tours(action)
prior = td['clss'][tours]
total_time = td['service_times'][tours]
shortest_traversal_time = td['adj'][tours[:, :-1], tours[:, 1:]]
total_time[:, 1:] += shortest_traversal_time
total_time = torch.cumsum(total_time, dim=1)
# if kwargs.get("local_search", False):
# tours = lsRL(td, tours)
rs = []
for p in pos_val:
pos = torch.nonzero(prior == p, as_tuple=True)
if len(pos[0]) == 0:
pos = [[0], [0]]
rs.append(total_time[pos].max())
if kwargs.get("return_list", False):
return rs
if kwargs.get("return_numpy", False):
return np.array(rs)
return torch.tensor(rs)
273 changes: 135 additions & 138 deletions common/inter.py
Original file line number Diff line number Diff line change
@@ -1,141 +1,138 @@
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import numba as nb
import numpy as np
from common.nb_utils import calc_length, calc_demand
from common.cal_reward import reward_in
from common.consts import *

@nb.njit(nb.float32(nb.float32[:, :], nb.float32[:], nb.float32[:], nb.float32[:], nb.int32[:], nb.int32[:]), nogil=True)
def once_interP(adj, service, demand, remain_demand, sub1, sub2):
start, end, min_delta = 0, 0, 0

best = max(calc_length(adj, service, sub1[1:]) + adj[sub1[0], sub1[1]],
calc_length(adj, service, sub2[1:]) + adj[sub2[0], sub2[1]])
# import numba as nb
# import numpy as np
# from common.nb_utils import calc_length, calc_demand
# from common.cal_reward import reward_in
# from common.consts import *

# @nb.njit(nb.float32(nb.float32[:, :], nb.float32[:], nb.float32[:], nb.float32[:], nb.int32[:], nb.int32[:]), nogil=True)
# def once_interP(adj, service, demand, remain_demand, sub1, sub2):
# start, end, min_delta = 0, 0, 0

# best = max(calc_length(adj, service, sub1[1:]) + adj[sub1[0], sub1[1]],
# calc_length(adj, service, sub2[1:]) + adj[sub2[0], sub2[1]])

demand_best = calc_demand(demand, sub1[1:]), calc_demand(demand, sub2[1:])

for i in range(1, len(sub1)):
for j in range(1, len(sub2)):
candidate1 = sub1.copy()
candidate2 = sub2.copy()
candidate1[i], candidate2[j] = candidate2[j], candidate1[i]
candidate_demand = calc_demand(demand, candidate1[1:]), calc_demand(demand, candidate2[1:])
exceed_demand = (candidate_demand[0] - demand_best[0] > remain_demand[0]) or \
(candidate_demand[1] - demand_best[1] > remain_demand[1])
if exceed_demand:
continue

length = max(calc_length(adj, service, candidate1[1:]) + adj[candidate1[0], candidate1[1]],
calc_length(adj, service, candidate2[1:]) + adj[candidate2[0], candidate2[1]])

change = length - best
if change < min_delta:
start, end, min_delta, best, demand_best = i, j, change, length, candidate_demand

if min_delta < -1e-6:
sub1[start], sub2[end] = sub2[end], sub1[start]
return min_delta
else:
return 0.0


@nb.njit(nb.float32(nb.float32[:, :], nb.float32[:], nb.int32[:], nb.float32[:], nb.float32[:], nb.int32[:], nb.int32[:], nb.int32), nogil=True)
def once_interU(adj, service, clss, demand, remain_demand, sub1, sub2, k):
start, end, min_delta = 0, 0, 0

best = np.zeros(k)
for t in range(1, k+1):
best[t-1] = max(reward_in(adj, service, clss, sub1, k=t),
reward_in(adj, service, clss, sub2, k=t))
length = np.zeros(k)
demand_best = calc_demand(demand, sub1[1:]), calc_demand(demand, sub2[1:])
for i in range(1, len(sub1)):
for j in range(1, len(sub2)):
candidate1 = sub1.copy()
candidate2 = sub2.copy()
candidate1[i], candidate2[j] = candidate2[j], candidate1[i]
candidate_demand = calc_demand(demand, candidate1[1:]), calc_demand(demand, candidate2[1:])
exceed_demand = (candidate_demand[0] - demand_best[0] > remain_demand[0]) or \
(candidate_demand[1] - demand_best[1] > remain_demand[1])

if exceed_demand:
continue
# demand_best = calc_demand(demand, sub1[1:]), calc_demand(demand, sub2[1:])

# for i in range(1, len(sub1)):
# for j in range(1, len(sub2)):
# candidate1 = sub1.copy()
# candidate2 = sub2.copy()
# candidate1[i], candidate2[j] = candidate2[j], candidate1[i]
# candidate_demand = calc_demand(demand, candidate1[1:]), calc_demand(demand, candidate2[1:])
# exceed_demand = (candidate_demand[0] - demand_best[0] > remain_demand[0]) or \
# (candidate_demand[1] - demand_best[1] > remain_demand[1])
# if exceed_demand:
# continue

# length = max(calc_length(adj, service, candidate1[1:]) + adj[candidate1[0], candidate1[1]],
# calc_length(adj, service, candidate2[1:]) + adj[candidate2[0], candidate2[1]])

# change = length - best
# if change < min_delta:
# start, end, min_delta, best, demand_best = i, j, change, length, candidate_demand

# if min_delta < -1e-6:
# sub1[start], sub2[end] = sub2[end], sub1[start]
# return min_delta
# else:
# return 0.0


# @nb.njit(nb.float32(nb.float32[:, :], nb.float32[:], nb.int32[:], nb.float32[:], nb.float32[:], nb.int32[:], nb.int32[:], nb.int32), nogil=True)
# def once_interU(adj, service, clss, demand, remain_demand, sub1, sub2, k):
# start, end, min_delta = 0, 0, 0

# best = np.zeros(k)
# for t in range(1, k+1):
# best[t-1] = max(reward_in(adj, service, clss, sub1, k=t),
# reward_in(adj, service, clss, sub2, k=t))
# length = np.zeros(k)
# demand_best = calc_demand(demand, sub1[1:]), calc_demand(demand, sub2[1:])
# for i in range(1, len(sub1)):
# for j in range(1, len(sub2)):
# candidate1 = sub1.copy()
# candidate2 = sub2.copy()
# candidate1[i], candidate2[j] = candidate2[j], candidate1[i]
# candidate_demand = calc_demand(demand, candidate1[1:]), calc_demand(demand, candidate2[1:])
# exceed_demand = (candidate_demand[0] - demand_best[0] > remain_demand[0]) or \
# (candidate_demand[1] - demand_best[1] > remain_demand[1])

# if exceed_demand:
# continue

for t in range(1, k+1):
length[t-1] = max(reward_in(adj, service, clss, candidate1, k = t),
reward_in(adj, service, clss, candidate2, k = t))
change = 0
for t in range(k):
c = length[t] - best[t]
if c > 0:
break
change += c*(10**(k-t))

if change < min_delta:
start, end, min_delta, best, demand_best = i, j, change, length, candidate_demand

if min_delta < -1e-6:
sub1[start], sub2[end] = sub2[end], sub1[start]
return min_delta
else:
return 0.0

@nb.njit(nb.int32[:,:](nb.int32[:, :], nb.float32[:, :], nb.float32[:], nb.int32[:], nb.float32[:], nb.int32), nogil=True)
def interP(tours, adj, service, clss, demand, k):
change = True
it = 0
remain_demand = np.ones(2, np.float32)
while change and it < EPS:
change = False
for i in range(len(tours) - 1):
for j in range(i + 1, len(tours)):
pos1 = np.where(clss[tours[i]] == k)[0]
pos2 = np.where(clss[tours[j]] == k)[0]
if len(pos1) <= 0 or len(pos2) <= 0:
continue

sub1 = tours[i][pos1[0] - 1: pos1[-1] + 1]
sub2 = tours[j][pos2[0] - 1: pos2[-1] + 1]

sub_change = -1.0
sub_it = 0
while sub_change < -1e-6 and sub_it < EPS:
remain_demand[0] = 1 - demand[tours[i]].sum()
remain_demand[1] = 1 - demand[tours[j]].sum()
sub_change = once_interP(adj, service, demand, remain_demand, sub1, sub2)
sub_it += 1
if sub_it >= 2:
change = True
it += 1
return tours

@nb.njit(nb.int32[:,:](nb.int32[:, :], nb.float32[:, :], nb.float32[:], nb.int32[:], nb.float32[:], nb.int32), nogil=True)
def interU(tours, adj, service, clss, demand, k):
change = True
it = 0
remain_demand = np.ones(2, np.float32)
while change and it < EPS:
change = False
for i in range(len(tours) - 1):
for j in range(i + 1, len(tours)):
pos1 = np.where(clss[tours[i]] == k)[0]
pos2 = np.where(clss[tours[j]] == k)[0]
if len(pos1) <= 0 or len(pos2) <= 0:
continue
# for t in range(1, k+1):
# length[t-1] = max(reward_in(adj, service, clss, candidate1, k = t),
# reward_in(adj, service, clss, candidate2, k = t))
# change = 0
# for t in range(k):
# c = length[t] - best[t]
# if c > 0:
# break
# change += c*(10**(k-t))

# if change < min_delta:
# start, end, min_delta, best, demand_best = i, j, change, length, candidate_demand

# if min_delta < -1e-6:
# sub1[start], sub2[end] = sub2[end], sub1[start]
# return min_delta
# else:
# return 0.0

# @nb.njit(nb.int32[:,:](nb.int32[:, :], nb.float32[:, :], nb.float32[:], nb.int32[:], nb.float32[:], nb.int32), nogil=True)
# def interP(tours, adj, service, clss, demand, k):
# change = True
# it = 0
# remain_demand = np.ones(2, np.float32)
# while change and it < EPS:
# change = False
# for i in range(len(tours) - 1):
# for j in range(i + 1, len(tours)):
# pos1 = np.where(clss[tours[i]] == k)[0]
# pos2 = np.where(clss[tours[j]] == k)[0]
# if len(pos1) <= 0 or len(pos2) <= 0:
# continue

# sub1 = tours[i][pos1[0] - 1: pos1[-1] + 1]
# sub2 = tours[j][pos2[0] - 1: pos2[-1] + 1]

# sub_change = -1.0
# sub_it = 0
# while sub_change < -1e-6 and sub_it < EPS:
# remain_demand[0] = 1 - demand[tours[i]].sum()
# remain_demand[1] = 1 - demand[tours[j]].sum()
# sub_change = once_interP(adj, service, demand, remain_demand, sub1, sub2)
# sub_it += 1
# if sub_it >= 2:
# change = True
# it += 1
# return tours

# @nb.njit(nb.int32[:,:](nb.int32[:, :], nb.float32[:, :], nb.float32[:], nb.int32[:], nb.float32[:], nb.int32), nogil=True)
# def interU(tours, adj, service, clss, demand, k):
# change = True
# it = 0
# remain_demand = np.ones(2, np.float32)
# while change and it < EPS:
# change = False
# for i in range(len(tours) - 1):
# for j in range(i + 1, len(tours)):
# pos1 = np.where(clss[tours[i]] == k)[0]
# pos2 = np.where(clss[tours[j]] == k)[0]
# if len(pos1) <= 0 or len(pos2) <= 0:
# continue

sub1 = tours[i][: pos1[-1] + 1]
sub2 = tours[j][: pos2[-1] + 1]

sub_change = -1.0
sub_it = 0
while sub_change < -1e-6 and sub_it < EPS:
remain_demand[0] = 1 - demand[tours[i]].sum()
remain_demand[1] = 1 - demand[tours[j]].sum()
sub_change = once_interU(adj, service, clss, demand, remain_demand, sub1, sub2, k)
sub_it += 1
if sub_it >= 2:
change = True
it += 1
return tours
# sub1 = tours[i][: pos1[-1] + 1]
# sub2 = tours[j][: pos2[-1] + 1]

# sub_change = -1.0
# sub_it = 0
# while sub_change < -1e-6 and sub_it < EPS:
# remain_demand[0] = 1 - demand[tours[i]].sum()
# remain_demand[1] = 1 - demand[tours[j]].sum()
# sub_change = once_interU(adj, service, clss, demand, remain_demand, sub1, sub2, k)
# sub_it += 1
# if sub_it >= 2:
# change = True
# it += 1
# return tours
Loading