From: François Fleuret Date: Sun, 24 Mar 2024 15:56:24 +0000 (+0100) Subject: Update. X-Git-Url: https://ant.fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=commitdiff_plain;h=690470307a7995cc3117eb54545f921eedcecba5;p=picoclvr.git Update. --- diff --git a/escape.py b/escape.py index a2e378d..da3e495 100755 --- a/escape.py +++ b/escape.py @@ -110,13 +110,22 @@ def episodes2seq(states, actions, rewards, lookahead_delta=None): actions = actions[:, :, None] + first_actions_code if lookahead_delta is not None: - r = rewards - u = F.pad(r, (0, lookahead_delta - 1)).as_strided( - (r.size(0), r.size(1), lookahead_delta), - (r.size(1) + lookahead_delta - 1, 1, 1), - ) - a = u[:, :, 1:].min(dim=-1).values - b = u[:, :, 1:].max(dim=-1).values + # r = rewards + # u = F.pad(r, (0, lookahead_delta - 1)).as_strided( + # (r.size(0), r.size(1), lookahead_delta), + # (r.size(1) + lookahead_delta - 1, 1, 1), + # ) + # a = u[:, :, 1:].min(dim=-1).values + # b = u[:, :, 1:].max(dim=-1).values + # s = (a < 0).long() * a + (a >= 0).long() * b + # lookahead_rewards = (1 + s[:, :, None]) + first_lookahead_rewards_code + + # a[n,t]=min_s>t r[n,s] + a = rewards.new_zeros(rewards.size()) + b = rewards.new_zeros(rewards.size()) + for t in range(a.size(1) - 1): + a[:, t] = rewards[:, t + 1 :].min(dim=-1).values + b[:, t] = rewards[:, t + 1 :].max(dim=-1).values s = (a < 0).long() * a + (a >= 0).long() * b lookahead_rewards = (1 + s[:, :, None]) + first_lookahead_rewards_code @@ -271,11 +280,11 @@ def episodes2str( ###################################################################### if __name__ == "__main__": - nb, height, width, T = 10, 4, 6, 20 + nb, height, width, T = 1000, 4, 6, 20 states, actions, rewards = generate_episodes(nb, height, width, T) seq = episodes2seq(states, actions, rewards, lookahead_delta=T) s, a, r, lr = seq2episodes(seq, height, width, lookahead=True) print(episodes2str(s, a, r, lookahead_rewards=lr, unicode=True, ansi_colors=True)) - print() - for s in seq2str(seq): - print(s) + # print() + # for s in seq2str(seq): + # print(s) diff --git a/main.py b/main.py index b4f831f..2edfa14 100755 --- a/main.py +++ b/main.py @@ -182,7 +182,7 @@ parser.add_argument("--escape_height", type=int, default=4) parser.add_argument("--escape_width", type=int, default=6) -parser.add_argument("--escape_T", type=int, default=20) +parser.add_argument("--escape_T", type=int, default=25) ###################################################################### diff --git a/tasks.py b/tasks.py index a4ef557..38c85ed 100755 --- a/tasks.py +++ b/tasks.py @@ -1885,10 +1885,10 @@ class Escape(Task): self.width = width states, actions, rewards = escape.generate_episodes( - nb_train_samples + nb_test_samples, height, width, 3 * T + nb_train_samples + nb_test_samples, height, width, T ) seq = escape.episodes2seq(states, actions, rewards, lookahead_delta=T) - seq = seq[:, seq.size(1) // 3 : 2 * seq.size(1) // 3] + # seq = seq[:, seq.size(1) // 3 : 2 * seq.size(1) // 3] self.train_input = seq[:nb_train_samples].to(self.device) self.test_input = seq[nb_train_samples:].to(self.device)