From: François Fleuret Date: Mon, 24 Jun 2024 10:13:01 +0000 (+0200) Subject: Update. X-Git-Url: https://ant.fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=commitdiff_plain;h=702e672dcf9ebcfad11ae4034e64117f2c67ead5;p=culture.git Update. --- diff --git a/mygpt.py b/mygpt.py index a178491..c58bea1 100755 --- a/mygpt.py +++ b/mygpt.py @@ -292,11 +292,16 @@ class MyGPT(nn.Module): ) # Needed to initialize the model's cache for s in range(to_generate.min(), to_generate.max() + 1): output = self(BracketedSequence(input, s, 1)).x - logits = output[:, s] / temperature + logits = output[:, s] + + logits = logits.log_softmax(dim=-1) / temperature + if forbidden_tokens is not None: logits = logits.masked_fill(forbidden_tokens, float("-inf")) + if forced_biases is not None: logits = logits + forced_biases[None, :] + if deterministic_synthesis: t_next = logits.argmax(1) else: diff --git a/tasks.py b/tasks.py index b967465..5edb472 100755 --- a/tasks.py +++ b/tasks.py @@ -274,6 +274,10 @@ class World(Task): device=self.device, ) + # Should not be necessary though, the autoregression is done + # in eval mode + sum_logits = sum_logits.detach() + average_logits = sum_logits / quizzes.numel() # It's a bit brutal to do it twice, we should probably have a