From: François Fleuret Date: Sat, 13 Jan 2024 21:25:09 +0000 (+0100) Subject: Update. X-Git-Url: https://ant.fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=commitdiff_plain;h=2434c00a82ebb0b23f45d891cc9f80324e3200bd;p=mygptrnn.git Update. --- diff --git a/main.py b/main.py index c22ae57..3e67a73 100755 --- a/main.py +++ b/main.py @@ -465,15 +465,15 @@ with os.popen("sha256sum *.py") as f: log_string(f"sha256sum {l.strip()}") now = time.strftime("%Y%m%d-%H%M%S", time.localtime()) -os.system(f"tar --ignore-failed-read zcvf {args.result_dir}/src-{now}.tgz *.py *.sh") +os.system(f"tar zcvf {args.result_dir}/src-{now}.tgz *.py *.sh") log_string(f"argv {' '.join(sys.argv)}") for n in vars(args): log_string(f"args.{n} {getattr(args, n)}") -for n in vars(sup_args): - log_string(f"sup_args.{n} {getattr(sup_args, n)}") +for k, v in sup_args.items(): + log_string(f'sup_args["{k}"] "{v}"') ###################################################################### diff --git a/mygpt.py b/mygpt.py index 7c9991f..099847c 100755 --- a/mygpt.py +++ b/mygpt.py @@ -493,14 +493,16 @@ class Caterpillar(nn.Module): self.proba_gate_dropout = 0.0 - default_b_G = kwargs.get("default_b_G") - if default_b_G is None: - default_b_G = -math.log(caterpillar_height - 1) + default_bg = kwargs.get("default_bg") + if default_bg is None: + default_bg = -math.log(caterpillar_height - 1) + else: + default_bg = float(default_bg) - logger(f"default_b_G {default_b_G}") + logger(f"default_bg {default_bg}") self.w_G = randw(nb_heads, caterpillar_height, dim_model) - self.b_G = nn.Parameter(torch.full((nb_heads, caterpillar_height), default_b_G)) + self.b_G = nn.Parameter(torch.full((nb_heads, caterpillar_height), default_bg)) self.w_K = randw(nb_heads, dim_qk, dim_model) self.w_V = randw(nb_heads, dim_v, dim_model)