From 99b8dd822927c0407c76c46c959fa7a96325e7f0 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Fri, 8 Nov 2019 19:39:43 +0100 Subject: [PATCH 01/89] Revamp emcee code due to bugs Current state has bugs which are not easy to debug due to complex code structure. Renew with clean structure, more comments, etc. --- src/fitting/ensemble-samplers/emcee.sl | 95 ++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/fitting/ensemble-samplers/emcee.sl diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl new file mode 100644 index 00000000..04c182be --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -0,0 +1,95 @@ + + +%%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY %{{{% + +% define inverse cumulative distribution function for generating +% random numbers following 1/z^2 when z in [1/a, a] +% TODO: should make this an adjustable thing +private define inverse_cdf (u, a) { + return (u*(a-1)+1)^2/a; +} + +% stretch move as of Goodman & Weare 2010 +% Move must evaluate the functions +private define stretch_move (fit_object, x, x_j, u, a) { + variable z = inverse_cdf(u[0], a); + variable xstat = 1e32, ystat = -1e32; % from mike, why doing it that way? + variable y; % step proposition + variable x_t1 = x; % resulting step + + % evaluate fit function for 'x' + xstat = fit_object.eval_statistic(x;nocopy); + + % calculate the new position (utilize array operations) + y = x_j + z*(x-x_j); + + % try evaluating, if out of bounds, does nothing + try { + % evaluate fit function for 'y' + ystat = fit_object.eval_statistic(y;nocopy); + + % caluculate if we accept the step based on the statistics of the + % model. We assume that the statistic is given as -2 log likelihood + if(u[1] <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) + x_t1 = y; + } catch IsisError; + + % return new walker position, proposed position, xstat, ystat + return (x_t1, y, xstat, ystat); +} + +private define get_move_args (m) { + return m.args; +} + +private define set_move_args () { + variable args, m; + if (_NARGS>2) { + args = __pop_list(_NARGS-1); + m = (); + } else + (m,args) = (); + if (length(args) != m.nargs) + throw UsageError: sprintf("%s expects %d arguments, set it with .set_move_args(...)!", m.name, m.nargs); + m.args = args; +} + +variable STRETCH_MOVE = struct { + name = "stretch move", + move = &stretch_move, % the function + nrands = 2, % the randoms needed + args = {2}, % additional arguments + nargs = 1; % number additional arguments + get = &get_move_args, + set = &set_move_args, +} + +%}}}% + +% calculate the move for the fit 'fit_object' based on the current walker position x, +% a randomly choosen walker x_j and an array of uniform random numbers enough to +% calculate the next step. move is the struct encapsulating the move function +private define __move (fit_object, x, x_j, u, move) { + % for alternative step functions, must be symmetric (that is, + % Pr(x -> y) = Pr(y -> x)) otherwise no detailed balance + + % push standard arguments & arguments for specified move + return move(__push_list(list_concat({fit_object, x, x_j, u}, move.get))); +} + +% start emcee walkers +% nwalkers is number walkers per parameter, nsteps is number of iterations +define emcee (nwalkers, nsteps) { + % setup user interaction + variable urand = qualifier("urand", &random_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) + variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps + variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps + + if (get_fit_fun() == NULL) + throw UsageError: "No fit function defined"; + if (all_data() == NULL) + throw UsageError: "No data is loaded"; + + % setup walkers + variable total_walkers = num_free_params()*nwalkers; + variable urands = \ No newline at end of file -- GitLab From 24bd17e3271d651041542ce610008c1c4c2e0f6b Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 9 Nov 2019 01:47:36 +0100 Subject: [PATCH 02/89] More of new emcee --- src/fitting/ensemble-samplers/emcee.sl | 71 ++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 04c182be..94645397 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -75,21 +75,84 @@ private define __move (fit_object, x, x_j, u, move) { % push standard arguments & arguments for specified move return move(__push_list(list_concat({fit_object, x, x_j, u}, move.get))); +} + +% get all free param values, index and min max +private define free_par_sets () { + variable all = get_params(); + variable i, ind = {}, v = {}, mi = {}, ma = {}; + _for i (0, length(all)-1, 1) { + ifnot (all[i].freeze==0 && all[i].tie==NULL && all[i].fun==NULL) + continue; + list_append(ind, all[i].index); + list_append(v, all[i].value); + list_append(mi, all[i].min); + list_append(ma, all[i].ma); + } + + return list_to_array(ind), list_to_array(v), list_to_array(mi), list_to_array(ma); } +% pick random parameter values within the boundaries +private define init_parameter_walker_uniform (n) { + variable walkers = Array_Type[n]; + variable i; + variable ind, p, pmin, pmax; + (ind, p, pmin, pmax) = free_par_sets(); + variable num_p = length(ind); + + _for i (0, n-1, 1) + walkers[i] = rand_uniform(num_p)*(p_max-p_min)+p_min; + + return walkers; +} + +% get walkers per node +private define distribute_walkers (nodes, number_walkers) { + variable walkers_per_node = Int_Type[nodes]; + variable n = number_walkers/nodes + 1; + variable missing = nodes - (number_walkers mod nodes); + + walkers_per_node[[0:nodes-missing-1]] = n; + walkers_per_node[[nodes-missing:nodes-1]] = n-1; + return walkers_per_node; +} + +% this function is the core and may be evaluated by multiple tasks +private define iterate_emcee (walkers, set1, set2, u) { + + % start emcee walkers % nwalkers is number walkers per parameter, nsteps is number of iterations -define emcee (nwalkers, nsteps) { +public define emcee (nwalkers, nsteps) { % setup user interaction variable urand = qualifier("urand", &random_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps + variable cycle = qualifier("cycle", 50); % number of evaluations per cycle (recalculates some numbers after each cycle) + variable init = qualifier("init", &init_parameter_walker_uniform); % init function, takes number of total walkers and returns initialized array of parameter arrays if (get_fit_fun() == NULL) throw UsageError: "No fit function defined"; if (all_data() == NULL) throw UsageError: "No data is loaded"; - + % setup walkers - variable total_walkers = num_free_params()*nwalkers; - variable urands = \ No newline at end of file + variable total_walkers = int(num_free_params()*nwalkers); + variable u = urand(cycle*total_walkers*move.nrands); + % to ensure independence between walker ensembles, roll dice + % and pick sets accordingly + variable sort = array_sort(urand(total_walkers)); + variable set1 = sort[0:total_walkers:2]; + variable set2 = sort[1:total_walkers:2]; + variable len_set1 = length(set1); + variable len_set2 = length(set2); + variable walkers = init(total_walkers); + + % setup for parallelization + variable nodes = 1; % number of parallel jobs + variable walkers_per_node = distribute_walkers(nodes, total_walkers); + + % loop over the walkers and update them + variable + \ No newline at end of file -- GitLab From 0d6e5d5ae13ee914341447e45aea373474f17b7b Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 9 Nov 2019 16:27:17 +0100 Subject: [PATCH 03/89] More of emcee_new --- src/fitting/ensemble-samplers/emcee.sl | 42 +++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 94645397..c600de75 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -13,7 +13,7 @@ private define inverse_cdf (u, a) { % Move must evaluate the functions private define stretch_move (fit_object, x, x_j, u, a) { variable z = inverse_cdf(u[0], a); - variable xstat = 1e32, ystat = -1e32; % from mike, why doing it that way? + variable xstat = 1e32, ystat = -1e32; % from mikes code variable y; % step proposition variable x_t1 = x; % resulting step @@ -57,7 +57,7 @@ private define set_move_args () { variable STRETCH_MOVE = struct { name = "stretch move", move = &stretch_move, % the function - nrands = 2, % the randoms needed + nrands = 3, % the randoms needed args = {2}, % additional arguments nargs = 1; % number additional arguments get = &get_move_args, @@ -118,9 +118,29 @@ private define distribute_walkers (nodes, number_walkers) { return walkers_per_node; } +% get walkers per handled up to this node +private define previous_number_walkers (walkers_per_node) { + variable l = length(walkers_per_node); + variable handled_walkers = Int_Type[l]; + variable i, c = 0; + + _for i (0, l-1, 1) { + handled_walkers[i] = c; + if (i Date: Sat, 9 Nov 2019 22:30:50 +0100 Subject: [PATCH 04/89] Fix some bugs in emcee_new --- src/fitting/ensemble-samplers/emcee.sl | 57 +++++++------------------- 1 file changed, 14 insertions(+), 43 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index c600de75..799f79c9 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1,4 +1,4 @@ - +require("rand"); %%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY %{{{% @@ -50,7 +50,7 @@ private define set_move_args () { } else (m,args) = (); if (length(args) != m.nargs) - throw UsageError: sprintf("%s expects %d arguments, set it with .set_move_args(...)!", m.name, m.nargs); + throw UsageError, sprintf("%s expects %d arguments, set it with .set_move_args(...)!", m.name, m.nargs); m.args = args; } @@ -59,10 +59,10 @@ variable STRETCH_MOVE = struct { move = &stretch_move, % the function nrands = 3, % the randoms needed args = {2}, % additional arguments - nargs = 1; % number additional arguments + nargs = 1, % number additional arguments get = &get_move_args, set = &set_move_args, -} +}; %}}}% @@ -144,47 +144,18 @@ private define iterate_emcee (walkers, set1, set2, u) {} % start emcee walkers % nwalkers is number walkers per parameter, nsteps is number of iterations -public define emcee (nwalkers, nsteps) { +define emcee_new (nwalkers, nsteps) { % setup user interaction - variable urand = qualifier("urand", &random_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) - variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps - variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps - variable cycle = qualifier("cycle", 50); % number of evaluations per cycle (recalculates some numbers after each cycle) - variable init = qualifier("init", &init_parameter_walker_uniform); % init function, takes number of total walkers and returns initialized array of parameter arrays + variable urand = qualifier("urand", &rand_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) + variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps + variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps if (get_fit_fun() == NULL) - throw UsageError: "No fit function defined"; + throw UsageError, "No fit function defined"; if (all_data() == NULL) - throw UsageError: "No data is loaded"; - + throw UsageError, "No data is loaded"; + % setup walkers - variable total_walkers = int(num_free_params()*nwalkers); - variable u = urand(cycle*total_walkers*move.nrands); - % to ensure independence between walker ensembles, roll dice - % and pick sets accordingly - variable sort = array_sort(urand(total_walkers)); - variable set1 = sort[0:total_walkers:2]; - variable set2 = sort[1:total_walkers:2]; - variable len_set1 = length(set1); - variable len_set2 = length(set2); - variable walkers = init(total_walkers); - variable u_set1 = rand_int(0, len_set1-1, cycle*total_walkers); - variable u_set2 = rand_int(0, len_set2-1, cycle*total_walkers); - - % setup for parallelization - variable nodes = 1; % number of parallel jobs - variable walkers_per_node = distribute_walkers(nodes, total_walkers); - variable handled_walkers = previous_number_walkers (walkers_per_node); - - % set the local variables for each node - variable node_set; % index of walkers the node handles - variable node_u; % random numbers used by the node - variable node_u_set1; % random number to select from set 1 - variable node_u_set2; % random number to select from set 2 - - % loop over the walkers and update them - variable c, steps = 0; - while (steps < nsteps) { - _for c (0, cycle-1, 1) { - - \ No newline at end of file + variable total_walkers = num_free_params()*nwalkers; + variable urands ; +} -- GitLab From 4c884625ddb8f270be57ce9c5f56c4160b8c1716 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 13 Nov 2019 16:22:45 +0100 Subject: [PATCH 05/89] Reimplement emcee_hammer New implementation is much more compact and hopefully simple to understand. Plus, it utilizes direct calls to the fit function instead of using eval_counts. This has the benefit of performance (at least the documentation claims this) and that the model evaluation takes the free parameters directly (so no calls to get_par or equiv. required). Currently the implementation uses the rcl-mpi module, which is not a clean implementation of the mpi routines. A future version should fix this if possible --- src/fitting/ensemble-samplers/emcee.sl | 171 +++++++++++++++++++++---- 1 file changed, 146 insertions(+), 25 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 799f79c9..316c5314 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -57,7 +57,7 @@ private define set_move_args () { variable STRETCH_MOVE = struct { name = "stretch move", move = &stretch_move, % the function - nrands = 3, % the randoms needed + nrands = 2, % the randoms needed args = {2}, % additional arguments nargs = 1, % number additional arguments get = &get_move_args, @@ -127,35 +127,156 @@ private define previous_number_walkers (walkers_per_node) { _for i (0, l-1, 1) { handled_walkers[i] = c; if (i>1; % number of walkers in set 2 + variable set1_len = total_walkers-set2_len; % number of walkers in set 1 + variable all_u = (@urand)(total_walkers*nrand); % all random numbers for the next step + variable all_pick1 = (@upick)(0, set2_len-1, set1_len); % pick for set 1 + variable all_pick2 = (@upick)(0, set1_len-1, set2_len); % pick for set 2 + variable this_walkers, this_pivots, this_randoms; + _for i (0, nodes-1, 1) { + this_walkers = node_walkers[[0:walkers_per_node[i]-1]+handled_walkers[i]]; + this_pivots = node_walkers[[all_pick1+set1_len, all_pick2]][[0:walkers_per_node[i]-1]+handled_walkers[i]]; + this_randoms = all_u[[0:walkers_per_node[i]*nrand-1]+handled_walkers[i]*nrand]; + if (i == 0) { % set master locals, we utilize that the walkers the master handles are the first in the array + (@node_pivots_ref) = this_pivots; + (@node_randoms_ref) = this_randoms; + } +#ifexists rcl_mpi_init + else { + _for j (0, length(this_walkers)-1, 1) { + () = rcl_mpi_org_isend_double(this_walkers[j], length(this_walkers[j]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j], length(this_walkers[j]), i, 1); % send pivots from other set with tag 1 + } + () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 3 + } + } + } else { + _for j (0, length(node_walkers)-1, 1) { + () = rcl_mpi_org_recv_double(node_walkers[j], length(node_walkers[j]), 0, 0); + () = rcl_mpi_org_recv_double(node_pivots[j], length(node_pivots[j]), 0, 1); + } + () = rcl_mpi_org_recv_double(node_randoms, length(node_randoms), 0, 2); +#endif + } +} + +private define catch_walkers_mpi (node, nodes, walkers_ref, + walkers_per_node, handled_walkers) { % <--- only relevant for master + variable walkers = @walkers_ref; + variable i,j; + variable npar = length(walkers[0]); % walkers are all equal + +#ifexists rcl_mpi_init + if (node == 0) { % master, collect all walkers + _for i (1, nodes-1, 1) { + _for j (0, walkers_per_node[i]-1, 1) + () = rcl_mpi_org_irecv_double(walkers[handled_walkers[i]+j], npar, i, i); + } + } else { + _for j (0, length(walkers)-1, 1) + () = rcl_mpi_org_isend_double(walkers[j], npar, 0, node); + } +#endif +} + +private define emcee_mpi (total_walkers, steps) { + variable init = qualifier("init", NULL); + variable move = qualifier("move", NULL); + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); +#ifexists rcl_mpi_init + variable node = rcl_mpi_init(); + variable nodes = rcl_mpi_numtasks(); + rcl_init_mpi_request(nodes); % this is a stupid memory leak!!!!!!!!! BUT: since we have a memory leak from the mpi module anyway we accept it currently ... +#else + variable node = 0; + variable nodes = 1; +#endif + if (NULL == init || NULL == move || NULL == urand || NULL == upick) + throw InternalError, "Initialization failed"; + + % master only variables + variable walkers_per_node; + variable handled_walkers; + variable sort; + + variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); + variable npar = num_free_params(); + variable node_walkers = Array_Type[node_walkers_len]; + variable node_pivots = Array_Type[node_walkers_len]; + variable node_randoms = Double_Type[node_walkers_len*move.nrands]; + variable new_pos, prop_pos, prev_stat, new_stat; + variable tmp; + + _for tmp (0, node_walkers_len-1, 1) { + node_walkers[tmp] = Double_Type[npar]; + node_pivots[tmp] = Double_Type[npar]; + } + + if (node == 0) { % setup master things + node_walkers = (@init)(total_walkers); % initialize the walkers + sort = array_sort(rand_uniform(total_walkers)); + node_walkers = node_walkers[sort]; % randomize them to be in the safe side, init may introduce bias + walkers_per_node = distribute_walkers(nodes, total_walkers); + handled_walkers = previous_number_walkers(walkers_per_node); + } else { + walkers_per_node = NULL; + handled_walkers = NULL; + } + + variable s; + variable fit_handle = open_fit(); % if no model is loaded this will crash + + _for s (0, steps-1, 1) { + if (node == 0) { _for tmp (0, total_walkers-1, 1) { () = printf("%d: ", s); print_array(node_walkers[tmp]);}} + release_walkers_mpi(node, nodes, total_walkers, move.nrands, walkers_per_node, handled_walkers, % release walkers to freedom ... + &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); + _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... + (new_pos, prop_pos, prev_stat, new_stat) = + __move(fit_handle, node_walkers[tmp], node_pivots[tmp], node_randoms[[0:move.nrands-1]+move.nrands*tmp], move); + node_walkers[tmp] = new_pos; + } +#ifexists rcl_mpi_init + catch_walkers_mpi(node, nodes, &node_walkers, walkers_per_node, handled_walkers); % ... and catch 'em! + () = rcl_mpi_barrier(); % keep the walkers in sync +#endif + } } -% this function is the core and may be evaluated by multiple tasks -private define iterate_emcee (walkers, set1, set2, u) {} - -% start emcee walkers -% nwalkers is number walkers per parameter, nsteps is number of iterations -define emcee_new (nwalkers, nsteps) { - % setup user interaction - variable urand = qualifier("urand", &rand_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) - variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps - variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps - - if (get_fit_fun() == NULL) - throw UsageError, "No fit function defined"; - if (all_data() == NULL) - throw UsageError, "No data is loaded"; - - % setup walkers - variable total_walkers = num_free_params()*nwalkers; - variable urands ; +define emcee (walkers_per_par, steps) { + variable move = qualifier("move", STRETCH_MOVE); % defined move + variable urand = qualifier("urand", &rand_uniform); % double random generator + variable upick = qualifier("upick", &rand_int); % int random generator + variable init = qualifier("init", &init_parameter_walker_uniform); % initialization function + + variable total_walkers = num_free_params()*walkers_per_par; + if (total_walkers > ((1<<29)-1)) + throw UsageError, "Unable to create ensemble for this large number of walkers"; + + () = printf("--- START ---\n"); + emcee_mpi(total_walkers, steps; move=move, urand=urand, upick=upick, init=init); + () = printf("--- END ---\n"); + rcl_mpi_finalize(); } -- GitLab From 730227f23ee340e7d6266fd2aa78702d369686f0 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 16 Nov 2019 10:17:22 +0100 Subject: [PATCH 06/89] MPI mcmc ensamble sempler New implementation of the emcee hammer. Should be simpler to change move algorithm and output routines. TODO: Help, load_qualifier --- src/fitting/ensemble-samplers/emcee.sl | 367 ++++++++++++++++++++++--- 1 file changed, 332 insertions(+), 35 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 316c5314..aaad0c67 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1,6 +1,9 @@ +% -*- mode: slang; mode: fold; -*- % + require("rand"); -%%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY %{{{% +%{{{% defined moves for the ensemble walkers +%%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY % define inverse cumulative distribution function for generating % random numbers following 1/z^2 when z in [1/a, a] @@ -54,7 +57,7 @@ private define set_move_args () { m.args = args; } -variable STRETCH_MOVE = struct { +variable EMCEE_STRETCH_MOVE = struct { name = "stretch move", move = &stretch_move, % the function nrands = 2, % the randoms needed @@ -64,8 +67,6 @@ variable STRETCH_MOVE = struct { set = &set_move_args, }; -%}}}% - % calculate the move for the fit 'fit_object' based on the current walker position x, % a randomly choosen walker x_j and an array of uniform random numbers enough to % calculate the next step. move is the struct encapsulating the move function @@ -77,6 +78,10 @@ private define __move (fit_object, x, x_j, u, move) { return move(__push_list(list_concat({fit_object, x, x_j, u}, move.get))); } +%}}}% + +%{{{% initialization of the parameters + % get all free param values, index and min max private define free_par_sets () { variable all = get_params(); @@ -107,6 +112,184 @@ private define init_parameter_walker_uniform (n) { return walkers; } +%}}}% + +%{{{% write and read functions +private define emcee_write_chain_fits_init (io, filename, total_walkers, create, sloppy) { + % write ensemble evolution to fits file +#ifexists rcl_mpi_init + variable modified_name = (io.all) ? sprintf("%s_%d%s", path_sans_extname(filename), rcl_mpi_rank(), path_extname(filename)) : filename; +#else + variable modified_name = filename; +#endif + variable data_info; + list_data(&data_info); + variable freep = freeParameters(); + if (create) { % create the file(s) initially + io.handle = fits_open_file(modified_name, "c"); + + % write first table + fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), ["FREE_PAR", "FREE_PAR_NAME"], ["J", "A"], [" parameter indices", " parameter names"]); + fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); + fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); + array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) + throw IOError; + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, + array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1])) + throw IOError; + + % write second table + fits_create_binary_table(io.handle, "MCMCCHAIN", 0, ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], + ["D", "J", ["D"][freep*0]], + [" fit statistics", " update indicator", [" parameter values"][freep*0]]); + fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); + fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); + fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); + + % write third table + fits_create_binary_table(io.handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + } else { % if loading, do some sanity checks + io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); + + if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current model and loaded chain model differ, unable to continue chain"; + return; + } + + variable tab = fits_read_table(io.handle); + ifnot (struct_field_exists(tab.free_par)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return; + } + if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current model and chain model have different free parameters"; + } + + variable fsloppy = fits_read_key(io.handle, "SLOPPY"); + sloppy = (fsloppy > sloppy) ? fsloppy : sloppy; % largest sloppyness + fits_update_key(io.handle, "SLOPPY", sloppy); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return; + } + + if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; + return; + } + fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); + } + + % IMPORTANT: stay on mcmcchain table + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "No a mcmc chain file"; + return; + } + + io.storage = fits_get_num_rows(io.handle); + + () = _fits_get_rowsize(io.handle, &(io.cycle)); + io.cycle = io.cycle/total_walkers; + if (io.cycle < 1) + io.cycle = 1; + io.msg = NULL; +} + +private define emcee_write_chain_fits (io, walkers_cycle, prop_cycle, stat_cycle, prev_stat_cycle) { + variable tmp, update; + variable freep = freeParameters(); + variable npar = length(freep); + variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle + variable i,j; + variable collen = fits_get_num_rows(io.handle); + _for j (0, npar-1, 1) { + tmp = Double_Type[steps_walkers]; + _for i (0, steps_walkers-1, 1) + tmp[i] = walkers_cycle[i][j]; + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", freep[j])), collen+1, 1, tmp); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); + tmp = Int_Type[steps_walkers]; + if (NULL == prop_cycle) { + tmp = Int_Type[steps_walkers]+1; + } else { + _for j (0, steps_walkers-1, 1) + tmp[j] = any(walkers_cycle[j] == prop_cycle[j]); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, tmp); +} + +private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter) { + variable tmp, tmp2; + variable total_walkers = walker_per_parameter*number_parameter; + variable reread; + variable collen = fits_get_num_rows(io.handle); % length of chain + variable all_steps = fits_read_key(io.handle, "NSTEPS"); + all_steps = (all_steps<0) ? steps : all_steps + steps; + fits_update_key(io.handle, "NSTEPS", all_steps); + fits_update_key(io.handle, "NWALKERS", walker_per_parameter); + fits_update_key(io.handle, "NFREEPAR", number_parameter); + + () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], + io.storage+1, collen-io.storage, &reread); + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); + collen = fits_get_num_rows(io.handle); % read length of chain summary + variable j; + variable frac_update = Double_Type[steps]; + variable min_stat = Double_Type[steps]; + variable med_stat = Double_Type[steps]; + variable max_stat = Double_Type[steps]; + + _for j (0, steps-1, 1) { + frac_update[j] = sum(reread[0][[0:total_walkers-1]+j*total_walkers])/total_walkers; + tmp2 = reread[1][[0:total_walkers-1]+j*total_walkers]; + min_stat[j] = min(tmp2); + max_stat[j] = max(tmp2); + med_stat[j] = median(tmp2); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FRAC_UPDATE"), collen+1, 1, frac_update); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); + + fits_close_file(io.handle); + io.handle = NULL; + io.storage = NULL; + io.msg = NULL; +} + +variable EMCEE_IO_FITS = struct { + open = &emcee_write_chain_fits_init, % open file for reading or appending, takes +4 arguments: 1: usually filename, 2: num walkers, 3: indicator if create or read, 4: sloppy level + write = &emcee_write_chain_fits, % write function, takes +? arguments + close = &emcee_finalize_chain_fits, % takes +1 argument, can be information only available after finish, i.e., calculation time + handle = NULL, % output handle, usually file pointer, if null after open() indicates error + all = 0, % if 1, all nodes will execute the function CARE: IF THIS IS THE CASE, THE WRITE FUNCTION MUST BE AWARE OF THE OTHER PROCESSES! + name = "emcee_io_fits", + storage = NULL, + msg = NULL, % error message + cycle = 1, % write every steps +}; + +%}}}% + +%{{{% some helpers ... % get walkers per node private define distribute_walkers (nodes, number_walkers) { variable walkers_per_node = Int_Type[nodes]; @@ -133,6 +316,10 @@ private define previous_number_walkers (walkers_per_node) { return handled_walkers; } +%}}}% + +%{{{% mpi functions + private define release_walkers_mpi (node, nodes, total_walkers, nrand, walkers_per_node, handled_walkers, % <--- those are only relevant for master process node_walkers_ref, node_pivots_ref, node_randoms_ref) { @@ -155,13 +342,16 @@ private define release_walkers_mpi (node, nodes, total_walkers, variable all_pick1 = (@upick)(0, set2_len-1, set1_len); % pick for set 1 variable all_pick2 = (@upick)(0, set1_len-1, set2_len); % pick for set 2 variable this_walkers, this_pivots, this_randoms; + _for i (0, nodes-1, 1) { this_walkers = node_walkers[[0:walkers_per_node[i]-1]+handled_walkers[i]]; this_pivots = node_walkers[[all_pick1+set1_len, all_pick2]][[0:walkers_per_node[i]-1]+handled_walkers[i]]; this_randoms = all_u[[0:walkers_per_node[i]*nrand-1]+handled_walkers[i]*nrand]; if (i == 0) { % set master locals, we utilize that the walkers the master handles are the first in the array - (@node_pivots_ref) = this_pivots; - (@node_randoms_ref) = this_randoms; + _for j (0, walkers_per_node[i]-1, 1) { + node_randoms[j] = this_randoms[j]; + node_pivots[j] = this_pivots[j]; + } } #ifexists rcl_mpi_init else { @@ -182,30 +372,47 @@ private define release_walkers_mpi (node, nodes, total_walkers, } } -private define catch_walkers_mpi (node, nodes, walkers_ref, +private define catch_walkers_mpi (node, nodes, walkers_ref, prop_walkers_ref, stat_ref, prev_stat_ref, walkers_per_node, handled_walkers) { % <--- only relevant for master variable walkers = @walkers_ref; + variable prop_walkers = @prop_walkers_ref; + variable stat = @stat_ref; + variable prev_stat = @prev_stat_ref; variable i,j; variable npar = length(walkers[0]); % walkers are all equal #ifexists rcl_mpi_init if (node == 0) { % master, collect all walkers _for i (1, nodes-1, 1) { - _for j (0, walkers_per_node[i]-1, 1) - () = rcl_mpi_org_irecv_double(walkers[handled_walkers[i]+j], npar, i, i); + _for j (0, walkers_per_node[i]-1, 1) { + () = rcl_mpi_org_recv_double(walkers[handled_walkers[i]+j], npar, i, i); + () = rcl_mpi_org_recv_double(prop_walkers[handled_walkers[i]+j], npar, i, i); + } + () = rcl_mpi_org_recv_double(stat[[0:walkers_per_node[i]-1]+handled_walkers[i]], walkers_per_node[i], i, i); + () = rcl_mpi_org_recv_double(prev_stat[[0:walkers_per_node[i]-1]+handled_walkers[i]], walkers_per_node[i], i, i); } } else { - _for j (0, length(walkers)-1, 1) + _for j (0, length(walkers)-1, 1) { () = rcl_mpi_org_isend_double(walkers[j], npar, 0, node); + () = rcl_mpi_org_isend_double(prop_walkers[j], npar, 0, node); + } + () = rcl_mpi_org_isend_double(stat, length(stat), 0, node); + () = rcl_mpi_org_isend_double(prev_stat, length(prev_stat), 0, node); } #endif } -private define emcee_mpi (total_walkers, steps) { +private define emcee_mpi (walker_per_par, number_par, steps) { + variable total_walkers = walker_per_par*number_par; variable init = qualifier("init", NULL); variable move = qualifier("move", NULL); variable urand = qualifier("urand", NULL); variable upick = qualifier("upick", NULL); + variable output = qualifier("output", NULL); + variable io = qualifier("write_hook", NULL); + variable sloppy = qualifier("sloppy", NULL); + variable cont = qualifier("continue", NULL); + #ifexists rcl_mpi_init variable node = rcl_mpi_init(); variable nodes = rcl_mpi_numtasks(); @@ -217,26 +424,52 @@ private define emcee_mpi (total_walkers, steps) { if (NULL == init || NULL == move || NULL == urand || NULL == upick) throw InternalError, "Initialization failed"; + if (io.all || node == 0) { + io.open(output, total_walkers, cont == NULL, sloppy); + if (NULL == io.handle) + throw IOError, (NULL == io.msg) ? "" : io.msg; + } + % master only variables variable walkers_per_node; variable handled_walkers; variable sort; - - variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); - variable npar = num_free_params(); - variable node_walkers = Array_Type[node_walkers_len]; - variable node_pivots = Array_Type[node_walkers_len]; - variable node_randoms = Double_Type[node_walkers_len*move.nrands]; variable new_pos, prop_pos, prev_stat, new_stat; variable tmp; + variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); + variable node_walker_array_len = (node == 0) ? total_walkers : node_walkers_len; + variable npar = num_free_params(); - _for tmp (0, node_walkers_len-1, 1) { - node_walkers[tmp] = Double_Type[npar]; + variable node_walkers; % send & recv (differ between master and slaves) + variable node_prop_walkers; % recv (differ between master and slaves) + variable node_pivots = Array_Type[node_walkers_len]; % send (all the same) + variable node_randoms = Double_Type[node_walkers_len*move.nrands]; % send (all the same) + variable node_stat; % recv (differ between master and slaves) + variable node_prev_stat; % recv (differ between master and slaves) + + variable s; + variable fit_handle = open_fit(); % if no model is loaded this will crash + variable collector_len = 0; + if (io.all || node == 0) + collector_len = node_walkers_len*io.cycle; + variable walker_cycle; % enough to collect the walkers for one cycle + variable prop_cycle; + variable stat_cycle; + variable prev_stat_cycle; + variable cycle_step = 0; + + _for tmp (0, node_walkers_len-1, 1) node_pivots[tmp] = Double_Type[npar]; - } if (node == 0) { % setup master things - node_walkers = (@init)(total_walkers); % initialize the walkers + node_walkers = (@init)(total_walkers); % initialize the walkers & other collectors + node_prop_walkers = Array_Type[total_walkers]; + node_stat = Double_Type[total_walkers]+_Inf; + node_prev_stat = Double_Type[total_walkers]+_Inf; + + _for tmp (0, total_walkers-1, 1) + node_prop_walkers[tmp] = Double_Type[npar]; + sort = array_sort(rand_uniform(total_walkers)); node_walkers = node_walkers[sort]; % randomize them to be in the safe side, init may introduce bias walkers_per_node = distribute_walkers(nodes, total_walkers); @@ -244,39 +477,103 @@ private define emcee_mpi (total_walkers, steps) { } else { walkers_per_node = NULL; handled_walkers = NULL; + + node_walkers = Array_Type[node_walkers_len]; + node_prop_walkers = Array_Type[node_walkers_len]; + node_stat = Double_Type[node_walkers_len]+_Inf; + node_prev_stat = Double_Type[node_walkers_len]+_Inf; + + _for tmp (0, node_walkers_len-1, 1) { + node_walkers[tmp] = Double_Type[npar]; + node_prop_walkers[tmp] = Double_Type[npar]; + } } - variable s; - variable fit_handle = open_fit(); % if no model is loaded this will crash + if (io.all || node==0) { % setup collector and write initial + walker_cycle = Array_Type[collector_len]; % enough to collect the walkers for one cycle + prop_cycle = Array_Type[collector_len]; + stat_cycle = Double_Type[collector_len]+_Inf; + prev_stat_cycle = Double_Type[collector_len]+_Inf; + + _for tmp (0, collector_len-1, 1) { + walker_cycle[tmp] = Double_Type[npar]; + prop_cycle[tmp] = Double_Type[npar]; + } + _for tmp (0, node_walker_array_len-1, 1) { + walker_cycle[tmp] = node_walkers[tmp]; + prop_cycle[tmp] = node_prop_walkers[tmp]; + } + } - _for s (0, steps-1, 1) { - if (node == 0) { _for tmp (0, total_walkers-1, 1) { () = printf("%d: ", s); print_array(node_walkers[tmp]);}} + % the main loop where the magic happens + _for s (1, steps, 1) { + cycle_step = s mod io.cycle; release_walkers_mpi(node, nodes, total_walkers, move.nrands, walkers_per_node, handled_walkers, % release walkers to freedom ... &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... (new_pos, prop_pos, prev_stat, new_stat) = - __move(fit_handle, node_walkers[tmp], node_pivots[tmp], node_randoms[[0:move.nrands-1]+move.nrands*tmp], move); + __move(fit_handle, + node_walkers[tmp], + node_pivots[tmp], + node_randoms[[0:move.nrands-1]+move.nrands*tmp], + move); node_walkers[tmp] = new_pos; + node_prop_walkers[tmp] = prop_pos; + node_prev_stat[tmp] = prev_stat; + node_stat[tmp] = new_stat; + } + catch_walkers_mpi(node, nodes, &node_walkers, &node_prop_walkers, &node_stat, &node_prev_stat, + walkers_per_node, handled_walkers); % ... and catch 'em! + + if (io.all || node == 0) { + _for tmp (0, node_walker_array_len-1, 1) { + walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; + prop_cycle[tmp+cycle_step*node_walker_array_len] = node_prop_walkers[tmp]; + stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; + prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; + } + + ifnot (cycle_step) + io.write(walker_cycle, prop_cycle, stat_cycle, prev_stat_cycle); } -#ifexists rcl_mpi_init - catch_walkers_mpi(node, nodes, &node_walkers, walkers_per_node, handled_walkers); % ... and catch 'em! - () = rcl_mpi_barrier(); % keep the walkers in sync -#endif + } + + % write missing pieces + variable write_initial = (steps < io.cycle); % if we have not written out any cycle, we have to treat the init walkers special + if (io.all || node == 0) { + if (cycle_step) + io.write(walker_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], + prop_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], + stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], + prev_stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]]); + + io.close(steps, walker_per_par, number_par); } } -define emcee (walkers_per_par, steps) { - variable move = qualifier("move", STRETCH_MOVE); % defined move +%}}}% + +define emcee_new (walkers_per_par, steps) { + variable move = qualifier("move", EMCEE_STRETCH_MOVE); % defined move variable urand = qualifier("urand", &rand_uniform); % double random generator variable upick = qualifier("upick", &rand_int); % int random generator variable init = qualifier("init", &init_parameter_walker_uniform); % initialization function + variable write_hook = qualifier("write_hook", EMCEE_IO_FITS); % output routine + variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); + variable sloppy = qualifier("sloppy", 0); + + if (NULL == get_fit_fun()) + throw UsageError, "No fit function loaded"; + if (NULL == all_data()) + throw UsageError, "No data set loaded"; + ifnot (0 ((1<<29)-1)) throw UsageError, "Unable to create ensemble for this large number of walkers"; - () = printf("--- START ---\n"); - emcee_mpi(total_walkers, steps; move=move, urand=urand, upick=upick, init=init); - () = printf("--- END ---\n"); + emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, + init=init, write_hook=write_hook, output=output, sloppy=sloppy); rcl_mpi_finalize(); } -- GitLab From d0290f8644fa13145c6b770650695b1552974a55 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 30 Nov 2019 20:13:24 +0100 Subject: [PATCH 07/89] Fix bugs and add continue qualifier --- src/fitting/ensemble-samplers/emcee.sl | 155 +++++++++++++++---------- 1 file changed, 93 insertions(+), 62 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index aaad0c67..c299b140 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -19,6 +19,7 @@ private define stretch_move (fit_object, x, x_j, u, a) { variable xstat = 1e32, ystat = -1e32; % from mikes code variable y; % step proposition variable x_t1 = x; % resulting step + variable update = 0; % update indicator % evaluate fit function for 'x' xstat = fit_object.eval_statistic(x;nocopy); @@ -33,12 +34,14 @@ private define stretch_move (fit_object, x, x_j, u, a) { % caluculate if we accept the step based on the statistics of the % model. We assume that the statistic is given as -2 log likelihood - if(u[1] <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) + if(u[1] <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { x_t1 = y; + update = 1; + } } catch IsisError; % return new walker position, proposed position, xstat, ystat - return (x_t1, y, xstat, ystat); + return (x_t1, update, xstat, ystat); } private define get_move_args (m) { @@ -122,7 +125,9 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, #else variable modified_name = filename; #endif + variable init_values; variable data_info; + variable i; list_data(&data_info); variable freep = freeParameters(); if (create) { % create the file(s) initially @@ -152,6 +157,8 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + + init_values = NULL; } else { % if loading, do some sanity checks io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); @@ -159,15 +166,15 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, fits_close_file(io.handle); io.handle = NULL; io.msg = "Current model and loaded chain model differ, unable to continue chain"; - return; + return NULL; } variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab.free_par)) { + ifnot (struct_field_exists(tab, "free_par")) { fits_close_file(io.handle); io.handle = NULL; io.msg = "Not a mcmc chain file"; - return; + return NULL; } if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { fits_close_file(io.handle); @@ -183,16 +190,35 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, fits_close_file(io.handle); io.handle = NULL; io.msg = "Not a mcmc chain file"; - return; + return NULL; } if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { fits_close_file(io.handle); io.handle = NULL; io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; - return; + return NULL; } fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return NULL; + } + + init_values = Array_Type[total_walkers]; + tab = fits_read_table(io.handle); + variable j; + variable names = get_struct_field_names(tab); + print(names); + variable l = length(names)-2; + _for j (0, total_walkers-1, 1) { + init_values[j] = Double_Type[l]; + _for i (0, l-1, 1) + init_values[j][i] = get_struct_field(tab, names[i+2])[-total_walkers+j]; + } } % IMPORTANT: stay on mcmcchain table @@ -200,7 +226,7 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, fits_close_file(io.handle); io.handle = NULL; io.msg = "No a mcmc chain file"; - return; + return NULL; } io.storage = fits_get_num_rows(io.handle); @@ -210,9 +236,11 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, if (io.cycle < 1) io.cycle = 1; io.msg = NULL; + + return init_values; } -private define emcee_write_chain_fits (io, walkers_cycle, prop_cycle, stat_cycle, prev_stat_cycle) { +private define emcee_write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, prev_stat_cycle) { variable tmp, update; variable freep = freeParameters(); variable npar = length(freep); @@ -227,13 +255,8 @@ private define emcee_write_chain_fits (io, walkers_cycle, prop_cycle, stat_cycle } () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); tmp = Int_Type[steps_walkers]; - if (NULL == prop_cycle) { - tmp = Int_Type[steps_walkers]+1; - } else { - _for j (0, steps_walkers-1, 1) - tmp[j] = any(walkers_cycle[j] == prop_cycle[j]); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, tmp); + + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); } private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter) { @@ -276,7 +299,7 @@ private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, numbe } variable EMCEE_IO_FITS = struct { - open = &emcee_write_chain_fits_init, % open file for reading or appending, takes +4 arguments: 1: usually filename, 2: num walkers, 3: indicator if create or read, 4: sloppy level + open = &emcee_write_chain_fits_init, % open file for reading or appending, takes +4 arguments: 1: usually filename, 2: num walkers, 3: indicator if create or read, 4: sloppy level, returns null or last walkers if in open mode write = &emcee_write_chain_fits, % write function, takes +? arguments close = &emcee_finalize_chain_fits, % takes +1 argument, can be information only available after finish, i.e., calculation time handle = NULL, % output handle, usually file pointer, if null after open() indicates error @@ -372,30 +395,36 @@ private define release_walkers_mpi (node, nodes, total_walkers, } } -private define catch_walkers_mpi (node, nodes, walkers_ref, prop_walkers_ref, stat_ref, prev_stat_ref, +private define catch_walkers_mpi (node, nodes, walkers_ref, update_ref, stat_ref, prev_stat_ref, walkers_per_node, handled_walkers) { % <--- only relevant for master variable walkers = @walkers_ref; - variable prop_walkers = @prop_walkers_ref; + variable update = @update_ref; variable stat = @stat_ref; variable prev_stat = @prev_stat_ref; variable i,j; variable npar = length(walkers[0]); % walkers are all equal + variable stat_part; + variable update_part; + #ifexists rcl_mpi_init if (node == 0) { % master, collect all walkers _for i (1, nodes-1, 1) { - _for j (0, walkers_per_node[i]-1, 1) { + stat_part = Double_Type[walkers_per_node[i]]; + update_part = Int_Type[walkers_per_node[i]]; + _for j (0, walkers_per_node[i]-1, 1) () = rcl_mpi_org_recv_double(walkers[handled_walkers[i]+j], npar, i, i); - () = rcl_mpi_org_recv_double(prop_walkers[handled_walkers[i]+j], npar, i, i); - } - () = rcl_mpi_org_recv_double(stat[[0:walkers_per_node[i]-1]+handled_walkers[i]], walkers_per_node[i], i, i); - () = rcl_mpi_org_recv_double(prev_stat[[0:walkers_per_node[i]-1]+handled_walkers[i]], walkers_per_node[i], i, i); + () = rcl_mpi_org_recv_int(update_part, walkers_per_node[i], i, i); + update[[0:walkers_per_node[i]-1]+handled_walkers[i]] = update_part; + () = rcl_mpi_org_recv_double(stat_part, walkers_per_node[i], i, i); + stat[[0:walkers_per_node[i]-1]+handled_walkers[i]] = stat_part; + () = rcl_mpi_org_recv_double(stat_part, walkers_per_node[i], i, i); + prev_stat[[0:walkers_per_node[i]-1]+handled_walkers[i]] = stat_part; } } else { - _for j (0, length(walkers)-1, 1) { + _for j (0, length(walkers)-1, 1) () = rcl_mpi_org_isend_double(walkers[j], npar, 0, node); - () = rcl_mpi_org_isend_double(prop_walkers[j], npar, 0, node); - } + () = rcl_mpi_org_isend_int(update, length(update), 0, node); () = rcl_mpi_org_isend_double(stat, length(stat), 0, node); () = rcl_mpi_org_isend_double(prev_stat, length(prev_stat), 0, node); } @@ -412,6 +441,9 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable io = qualifier("write_hook", NULL); variable sloppy = qualifier("sloppy", NULL); variable cont = qualifier("continue", NULL); + variable init_walkers; + if (cont != NULL) + output = cont; #ifexists rcl_mpi_init variable node = rcl_mpi_init(); @@ -425,7 +457,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { throw InternalError, "Initialization failed"; if (io.all || node == 0) { - io.open(output, total_walkers, cont == NULL, sloppy); + init_walkers = io.open(output, total_walkers, cont == NULL, sloppy); if (NULL == io.handle) throw IOError, (NULL == io.msg) ? "" : io.msg; } @@ -434,14 +466,14 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable walkers_per_node; variable handled_walkers; variable sort; - variable new_pos, prop_pos, prev_stat, new_stat; + variable new_pos, update_pos, prev_stat, new_stat; variable tmp; variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); variable node_walker_array_len = (node == 0) ? total_walkers : node_walkers_len; variable npar = num_free_params(); variable node_walkers; % send & recv (differ between master and slaves) - variable node_prop_walkers; % recv (differ between master and slaves) + variable node_update; % recv (differ between master and slaves) variable node_pivots = Array_Type[node_walkers_len]; % send (all the same) variable node_randoms = Double_Type[node_walkers_len*move.nrands]; % send (all the same) variable node_stat; % recv (differ between master and slaves) @@ -451,9 +483,9 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable fit_handle = open_fit(); % if no model is loaded this will crash variable collector_len = 0; if (io.all || node == 0) - collector_len = node_walkers_len*io.cycle; + collector_len = node_walker_array_len*io.cycle; variable walker_cycle; % enough to collect the walkers for one cycle - variable prop_cycle; + variable update_cycle; variable stat_cycle; variable prev_stat_cycle; variable cycle_step = 0; @@ -462,14 +494,14 @@ private define emcee_mpi (walker_per_par, number_par, steps) { node_pivots[tmp] = Double_Type[npar]; if (node == 0) { % setup master things - node_walkers = (@init)(total_walkers); % initialize the walkers & other collectors - node_prop_walkers = Array_Type[total_walkers]; + if (cont != NULL) + node_walkers = init_walkers; + else + node_walkers = (@init)(total_walkers); % initialize the walkers & other collectors + node_update = Int_Type[total_walkers]; node_stat = Double_Type[total_walkers]+_Inf; node_prev_stat = Double_Type[total_walkers]+_Inf; - _for tmp (0, total_walkers-1, 1) - node_prop_walkers[tmp] = Double_Type[npar]; - sort = array_sort(rand_uniform(total_walkers)); node_walkers = node_walkers[sort]; % randomize them to be in the safe side, init may introduce bias walkers_per_node = distribute_walkers(nodes, total_walkers); @@ -479,30 +511,22 @@ private define emcee_mpi (walker_per_par, number_par, steps) { handled_walkers = NULL; node_walkers = Array_Type[node_walkers_len]; - node_prop_walkers = Array_Type[node_walkers_len]; + node_update = Int_Type[node_walkers_len]; node_stat = Double_Type[node_walkers_len]+_Inf; node_prev_stat = Double_Type[node_walkers_len]+_Inf; - _for tmp (0, node_walkers_len-1, 1) { + _for tmp (0, node_walkers_len-1, 1) node_walkers[tmp] = Double_Type[npar]; - node_prop_walkers[tmp] = Double_Type[npar]; - } } if (io.all || node==0) { % setup collector and write initial walker_cycle = Array_Type[collector_len]; % enough to collect the walkers for one cycle - prop_cycle = Array_Type[collector_len]; + update_cycle = Int_Type[collector_len]; stat_cycle = Double_Type[collector_len]+_Inf; prev_stat_cycle = Double_Type[collector_len]+_Inf; - _for tmp (0, collector_len-1, 1) { + _for tmp (0, collector_len-1, 1) walker_cycle[tmp] = Double_Type[npar]; - prop_cycle[tmp] = Double_Type[npar]; - } - _for tmp (0, node_walker_array_len-1, 1) { - walker_cycle[tmp] = node_walkers[tmp]; - prop_cycle[tmp] = node_prop_walkers[tmp]; - } } % the main loop where the magic happens @@ -511,30 +535,36 @@ private define emcee_mpi (walker_per_par, number_par, steps) { release_walkers_mpi(node, nodes, total_walkers, move.nrands, walkers_per_node, handled_walkers, % release walkers to freedom ... &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... - (new_pos, prop_pos, prev_stat, new_stat) = + (new_pos, update_pos, prev_stat, new_stat) = __move(fit_handle, node_walkers[tmp], node_pivots[tmp], node_randoms[[0:move.nrands-1]+move.nrands*tmp], move); node_walkers[tmp] = new_pos; - node_prop_walkers[tmp] = prop_pos; + node_update[tmp] = update_pos; node_prev_stat[tmp] = prev_stat; node_stat[tmp] = new_stat; } - catch_walkers_mpi(node, nodes, &node_walkers, &node_prop_walkers, &node_stat, &node_prev_stat, + catch_walkers_mpi(node, nodes, &node_walkers, &node_update, &node_stat, &node_prev_stat, walkers_per_node, handled_walkers); % ... and catch 'em! if (io.all || node == 0) { - _for tmp (0, node_walker_array_len-1, 1) { - walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; - prop_cycle[tmp+cycle_step*node_walker_array_len] = node_prop_walkers[tmp]; - stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; - prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; - } + if (cont != NULL) { + % if we continue chain, do not write initial walkers + cont = NULL; + s -= 1; + } else { + _for tmp (0, node_walker_array_len-1, 1) { + walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; + update_cycle[tmp+cycle_step*node_walker_array_len] = node_update[tmp]; + stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; + prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; + } - ifnot (cycle_step) - io.write(walker_cycle, prop_cycle, stat_cycle, prev_stat_cycle); + ifnot (cycle_step) + io.write(walker_cycle, update_cycle, stat_cycle, prev_stat_cycle); + } } } @@ -543,7 +573,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { if (io.all || node == 0) { if (cycle_step) io.write(walker_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], - prop_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], + update_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], prev_stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]]); @@ -561,6 +591,7 @@ define emcee_new (walkers_per_par, steps) { variable write_hook = qualifier("write_hook", EMCEE_IO_FITS); % output routine variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); variable sloppy = qualifier("sloppy", 0); + variable cont = qualifier("continue", NULL); if (NULL == get_fit_fun()) throw UsageError, "No fit function loaded"; @@ -574,6 +605,6 @@ define emcee_new (walkers_per_par, steps) { throw UsageError, "Unable to create ensemble for this large number of walkers"; emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, - init=init, write_hook=write_hook, output=output, sloppy=sloppy); + init=init, write_hook=write_hook, output=output, sloppy=sloppy, continue=cont); rcl_mpi_finalize(); } -- GitLab From 9291655774dc433e44f5576c040bf94bac26f046 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 5 Feb 2020 16:35:45 +0100 Subject: [PATCH 08/89] Fix incomplete parameter names in fits write routine The table attached to the fits file specified the column for the parameter names as 'A'. This caused that only the first character of each string was stored. Now the table is initialized as 'nA' where n is the maximum length of the parameter names. --- src/fitting/ensemble-samplers/emcee.sl | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index c299b140..16775a13 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -13,7 +13,7 @@ private define inverse_cdf (u, a) { } % stretch move as of Goodman & Weare 2010 -% Move must evaluate the functions +% Move must evaluate the fit function private define stretch_move (fit_object, x, x_j, u, a) { variable z = inverse_cdf(u[0], a); variable xstat = 1e32, ystat = -1e32; % from mikes code @@ -127,6 +127,7 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, #endif variable init_values; variable data_info; + variable par_names; variable i; list_data(&data_info); variable freep = freeParameters(); @@ -134,20 +135,24 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, io.handle = fits_open_file(modified_name, "c"); % write first table - fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), ["FREE_PAR", "FREE_PAR_NAME"], ["J", "A"], [" parameter indices", " parameter names"]); + parnames = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; + fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), + ["FREE_PAR", "FREE_PAR_NAME"], + ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, parnames)))], + [" parameter indices", " parameter names"]); fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, - array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1])) + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, parnames)) throw IOError; % write second table - fits_create_binary_table(io.handle, "MCMCCHAIN", 0, ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], - ["D", "J", ["D"][freep*0]], - [" fit statistics", " update indicator", [" parameter values"][freep*0]]); + fits_create_binary_table(io.handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], + ["D", "J", ["D"][freep*0]], + [" fit statistics", " update indicator", [" parameter values"][freep*0]]); fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); @@ -503,7 +508,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { node_prev_stat = Double_Type[total_walkers]+_Inf; sort = array_sort(rand_uniform(total_walkers)); - node_walkers = node_walkers[sort]; % randomize them to be in the safe side, init may introduce bias + node_walkers = node_walkers[sort]; % randomize them to be on the safe side, init may introduce bias walkers_per_node = distribute_walkers(nodes, total_walkers); handled_walkers = previous_number_walkers(walkers_per_node); } else { @@ -606,5 +611,7 @@ define emcee_new (walkers_per_par, steps) { emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, init=init, write_hook=write_hook, output=output, sloppy=sloppy, continue=cont); +#ifexists rcl_mpi_init rcl_mpi_finalize(); +#endif } -- GitLab From 36892735a8ec55b8c80bb4012bd14fad36aa171a Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 5 Feb 2020 18:38:39 +0100 Subject: [PATCH 09/89] Fix emcee fits write routine when all nodes write results Finalize function failed to write result because it tried to allways write 'total_walkers' entries. This does not work when each node is supposed to write the handled walkers. --- src/fitting/ensemble-samplers/emcee.sl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 16775a13..e67005e0 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -264,10 +264,10 @@ private define emcee_write_chain_fits (io, walkers_cycle, update_cycle, stat_cyc () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); } -private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter) { +private define emcee_finalize_chain_fits (io, steps, this_walkers, walker_per_parameter, number_parameter) { variable tmp, tmp2; - variable total_walkers = walker_per_parameter*number_parameter; variable reread; + variable total_walkers = walker_per_parameter*number_parameter; variable collen = fits_get_num_rows(io.handle); % length of chain variable all_steps = fits_read_key(io.handle, "NSTEPS"); all_steps = (all_steps<0) ? steps : all_steps + steps; @@ -286,8 +286,8 @@ private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, numbe variable max_stat = Double_Type[steps]; _for j (0, steps-1, 1) { - frac_update[j] = sum(reread[0][[0:total_walkers-1]+j*total_walkers])/total_walkers; - tmp2 = reread[1][[0:total_walkers-1]+j*total_walkers]; + frac_update[j] = sum(reread[0][[0:this_walkers-1]+j*this_walkers])/this_walkers; + tmp2 = reread[1][[0:this_walkers-1]+j*this_walkers]; min_stat[j] = min(tmp2); max_stat[j] = max(tmp2); med_stat[j] = median(tmp2); @@ -582,7 +582,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], prev_stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]]); - io.close(steps, walker_per_par, number_par); + io.close(steps, node_walker_array_len, walker_per_par, number_par); } } -- GitLab From d603c9a5df3cfeeca21501ff490e3d5730583890 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Fri, 7 Feb 2020 14:39:29 +0100 Subject: [PATCH 10/89] Fix proposal test Step rejection was compared with linear random number although the probability was given as log likelihood. This caused many steps to get rejected and resulted in wired "static" walkers. --- src/fitting/ensemble-samplers/emcee.sl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index e67005e0..a7b881f4 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -34,7 +34,7 @@ private define stretch_move (fit_object, x, x_j, u, a) { % caluculate if we accept the step based on the statistics of the % model. We assume that the statistic is given as -2 log likelihood - if(u[1] <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { + if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { x_t1 = y; update = 1; } @@ -135,17 +135,17 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, io.handle = fits_open_file(modified_name, "c"); % write first table - parnames = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; + par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), ["FREE_PAR", "FREE_PAR_NAME"], - ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, parnames)))], + ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], [" parameter indices", " parameter names"]); fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, parnames)) + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) throw IOError; % write second table -- GitLab From 467370d91b541b73e067e767425c8547fb42f597 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 20 Feb 2020 15:41:38 +0100 Subject: [PATCH 11/89] Clean new emcee code for better overview Code structure should allow for simpler implementation of additional move algorithms and other functions. Still missing: Description and help --- src/fitting/ensemble-samplers/emcee-init.sl | 68 ++++ src/fitting/ensemble-samplers/emcee-io.sl | 226 ++++++++++++ src/fitting/ensemble-samplers/emcee-moves.sl | 89 +++++ src/fitting/ensemble-samplers/emcee.sl | 348 ++----------------- 4 files changed, 409 insertions(+), 322 deletions(-) create mode 100644 src/fitting/ensemble-samplers/emcee-init.sl create mode 100644 src/fitting/ensemble-samplers/emcee-io.sl create mode 100644 src/fitting/ensemble-samplers/emcee-moves.sl diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl new file mode 100644 index 00000000..cb265ec7 --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -0,0 +1,68 @@ +% -*- mode: slang; mode: fold; -*- + +require("rand"); + +% WALKER INIT FUNCTIONS +% Allow different functions for initializing walkers. Follows the same idea +% as for the move steps. +% +% Distribution is only done by the master process, so we don't have to care +% about the random numbers. +% +% Init function takes one argument directly which is the number of walkers + +%{{{% helpers +private define emcee_init_uniform(); +private variable EMCEE_INIT = Assoc_Type[Ref_Type, &emcee_init_uniform]; +private define __init_globals () { return struct_combine( + struct { + name = "unspecified init", + init = NULL, + }, + __qualifiers()); +} +define emcee_get_init(key) { return (@EMCEE_INIT[key])(;; __qualifiers); } +define emcee_get_inits () { return assoc_get_keys(EMCEE_INIT); } + +% get all free param values, index and min max +private define free_par_sets () { + variable all = get_params(); + variable i, ind = {}, v = {}, mi = {}, ma = {}; + _for i (0, length(all)-1, 1) { + ifnot (all[i].freeze==0 && all[i].tie==NULL && all[i].fun==NULL) + continue; + list_append(ind, all[i].index); + list_append(v, all[i].value); + list_append(mi, all[i].min); + list_append(ma, all[i].max); + } + + return list_to_array(ind), list_to_array(v), list_to_array(mi), list_to_array(ma); +} +%}}}% + +%{{{% Uniform initialization function +% pick random parameter values within the boundaries +private define init_parameter_walker_uniform (n) { + variable walkers = Array_Type[n]; + variable i; + variable ind, p, pmin, pmax; + (ind, p, pmin, pmax) = free_par_sets(); + variable num_p = length(ind); + + _for i (0, n-1, 1) + walkers[i] = rand_uniform(num_p)*(pmax-pmin)+pmin; + + return walkers; +} +private define emcee_init_uniform () { + variable settings = (_NARGS==1) ? () : NULL; + variable defaults = __init_globals(; + name="uniform init", + init=&init_parameter_walker_uniform + ); + return struct_combine(struct_combine(defaults, __qualifiers()), settings); +} +%}}}% + +EMCEE_INIT["uniform"] = &emcee_init_uniform; diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl new file mode 100644 index 00000000..433206ea --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -0,0 +1,226 @@ +% -*- mode: slang; mode: fold; -*- % + +% EMCEE INPUT OUTPUT ROUTINES +% Same structure as moves. Routines for output and input are describeed +% here. + +%{{{% helpers +private define emcee_io_fits(); +private variable EMCEE_IO = Assoc_Type[Ref_Type, &emcee_io_fits]; +define emcee_get_io (key) { return (@EMCEE_IO[key])(;; __qualifiers); } +define emcee_get_ios () { return assoc_get_keys(EMCEE_IO); } +private define __io_globals () { + return struct_combine(struct { + name = "unspecified io", + open = NULL, % open function, takes 'io-object', 'filename', 'number walkers', 'create flag', 'sloppy flag' + write = NULL, % write to file, takes 'io-object', 'walkers array', 'update array', 'statistic array', 'pre statistisc array' + close = NULL, % finalizes output, takes 'io-object', 'current number steps', 'number walkers', 'walker per parameter', 'number parameter' + handle = NULL, % io access (usually file pointer) + all = 0, % flag indicating if all nodes execute io or just master + msg = NULL, % current io message (used only for feedback) + cycle = 1, % number of steps to perform before write + }, __qualifiers()); +} +%}}}% +%{{{% FITS input output routines +private define write_chain_fits_init (io, filename, total_walkers, create, sloppy) { + % write ensemble evolution to fits file +#ifexists rcl_mpi_init + variable modified_name = (io.all) ? sprintf("%s_%d%s", path_sans_extname(filename), rcl_mpi_rank(), path_extname(filename)) : filename; +#else + variable modified_name = filename; +#endif + variable init_values; + variable data_info; + variable par_names; + variable i; + list_data(&data_info); + variable freep = freeParameters(); + if (create) { % create the file(s) initially + io.handle = fits_open_file(modified_name, "c"); + + % write first table + par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; + fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), + ["FREE_PAR", "FREE_PAR_NAME"], + ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], + [" parameter indices", " parameter names"]); + fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); + fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); + array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) + throw IOError; + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) + throw IOError; + + % write second table + fits_create_binary_table(io.handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], + ["D", "J", ["D"][freep*0]], + [" fit statistics", " update indicator", [" parameter values"][freep*0]]); + fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); + fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); + fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); + + % write third table + fits_create_binary_table(io.handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + + init_values = NULL; + } else { % if loading, do some sanity checks + io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); + + if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current model and loaded chain model differ, unable to continue chain"; + return NULL; + } + + variable tab = fits_read_table(io.handle); + ifnot (struct_field_exists(tab, "free_par")) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return NULL; + } + if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current model and chain model have different free parameters"; + } + + variable fsloppy = fits_read_key(io.handle, "SLOPPY"); + sloppy = (fsloppy > sloppy) ? fsloppy : sloppy; % largest sloppyness + fits_update_key(io.handle, "SLOPPY", sloppy); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return NULL; + } + + if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; + return NULL; + } + fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return NULL; + } + + init_values = Array_Type[total_walkers]; + tab = fits_read_table(io.handle); + variable j; + variable names = get_struct_field_names(tab); + print(names); + variable l = length(names)-2; + _for j (0, total_walkers-1, 1) { + init_values[j] = Double_Type[l]; + _for i (0, l-1, 1) + init_values[j][i] = get_struct_field(tab, names[i+2])[-total_walkers+j]; + } + } + + % IMPORTANT: stay on mcmcchain table + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "No a mcmc chain file"; + return NULL; + } + + io.storage = fits_get_num_rows(io.handle); + + () = _fits_get_rowsize(io.handle, &(io.cycle)); + io.cycle = io.cycle/total_walkers; + if (io.cycle < 1) + io.cycle = 1; + io.msg = NULL; + + return init_values; +} + +private define write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, prev_stat_cycle) { + variable tmp, update; + variable freep = freeParameters(); + variable npar = length(freep); + variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle + variable i,j; + variable collen = fits_get_num_rows(io.handle); + _for j (0, npar-1, 1) { + tmp = Double_Type[steps_walkers]; + _for i (0, steps_walkers-1, 1) + tmp[i] = walkers_cycle[i][j]; + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", freep[j])), collen+1, 1, tmp); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); + tmp = Int_Type[steps_walkers]; + + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); +} + +private define finalize_chain_fits (io, steps, this_walkers, walker_per_parameter, number_parameter) { + variable tmp, tmp2; + variable reread; + variable total_walkers = walker_per_parameter*number_parameter; + variable collen = fits_get_num_rows(io.handle); % length of chain + variable all_steps = fits_read_key(io.handle, "NSTEPS"); + all_steps = (all_steps<0) ? steps : all_steps + steps; + fits_update_key(io.handle, "NSTEPS", all_steps); + fits_update_key(io.handle, "NWALKERS", walker_per_parameter); + fits_update_key(io.handle, "NFREEPAR", number_parameter); + + () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], + io.storage+1, collen-io.storage, &reread); + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); + collen = fits_get_num_rows(io.handle); % read length of chain summary + variable j; + variable frac_update = Double_Type[steps]; + variable min_stat = Double_Type[steps]; + variable med_stat = Double_Type[steps]; + variable max_stat = Double_Type[steps]; + + _for j (0, steps-1, 1) { + frac_update[j] = sum(reread[0][[0:this_walkers-1]+j*this_walkers])/this_walkers; + tmp2 = reread[1][[0:this_walkers-1]+j*this_walkers]; + min_stat[j] = min(tmp2); + max_stat[j] = max(tmp2); + med_stat[j] = median(tmp2); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FRAC_UPDATE"), collen+1, 1, frac_update); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); + + fits_close_file(io.handle); + io.handle = NULL; + io.storage = NULL; + io.msg = NULL; +} + +private define emcee_io_fits () { + variable settings = (_NARGS==1) ? () : NULL; + variable defaults = __io_globals(; + name = "io fits", + open = &write_chain_fits_init, + write = &write_chain_fits, + close = &finalize_chain_fits, + storage = NULL, + ); + return struct_combine(struct_combine(defaults, __qualifiers()), settings); +} + +%}}}% + +% Provide io functions +EMCEE_IO["fits"] = &emcee_io_fits; diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl new file mode 100644 index 00000000..81bbd91d --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -0,0 +1,89 @@ +% -*- mode: slang; mode: fold; -*- % + +% DEFINED MOVES FOR THE EMCEE ENSEMBLE WALKER +% Walker moves should be defined as functions where the function returns a +% structure suitable to describe the step algorithm. For adjustment the +% function should combine the qualifiers with the default structure and +% should handle an argument that may be NULL or a structure with the same +% properties, where the argument should overwrite any qualifiers given to +% the function. The move function MUST evaluate the fit model (in the +% simplest case by using the fit object) and return new position and +% statistics. +% +% Besides additional arguments given in the structure the algorithm must +% process the fit object, current position, pivot position and an array of +% random numbers. +% +% Functions should be private and only accessible through the access function +% 'get_emcee' or 'get_emcee_move'. +% Required default parameters can be retrieved with '__move_globals'. +% +% For an example see the STRETCH_MOVE +%{{{% Helpers for move steps +private define __move_globals () { + return struct_combine ( struct { + name = "unspecified move", % name of the step function to be used in output files + move = NULL, % the move function itself, NULL == Error + nrands = 0, % number of required random numbers for each step + }, __qualifiers); +} +private define emcee_move_stretch(); % for default value +private variable EMCEE_MOVES = Assoc_Type[Ref_Type, &emcee_move_stretch]; +define emcee_get_move (key) { return (@EMCEE_MOVES[key])(;;__qualifiers); } +define emcee_get_moves () { return assoc_get_keys(EMCEE_MOVES); } +%}}}% + +%{{{% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY +% define inverse cumulative distribution function for generating +% random numbers following 1/z^2 when z in [1/a, a] +% TODO: should make this an adjustable thing +private define inverse_cdf (u, a) { + return (u*(a-1.)+1.)^2./a; +} + +% stretch move as of Goodman & Weare 2010 +% Move must evaluate the fit function +private define stretch_move (move, fit_object, x, x_j, u) { + variable z = inverse_cdf(u[0], move.a); + variable xstat = 1e32, ystat = -1e32; % from mikes code + variable y; % step proposition + variable x_t1 = x; % resulting step + variable update = 0; % update indicator + + % evaluate fit function for 'x' + xstat = fit_object.eval_statistic(x;nocopy); + + % calculate the new position (utilize array operations) + y = x_j + z*(x-x_j); + + % try evaluating, if out of bounds, does nothing + try { + % evaluate fit function for 'y' + ystat = fit_object.eval_statistic(y;nocopy); + + % caluculate if we accept the step based on the statistics of the + % model. We assume that the statistic is given as -2 log likelihood + if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { + x_t1 = y; + update = 1; + } + } catch IsisError; + + % return new walker position, proposed position, xstat, ystat + return (x_t1, update, xstat, ystat); +} + +private define emcee_move_stretch () { + variable settings = (_NARGS==1)? () : NULL; + variable defaults = __move_globals(; + name="stretch move", + move=&stretch_move, + nrands=2, + a=2. % move scaling + ); + return struct_combine(struct_combine(defaults, __qualifiers), settings); % combine settings +} +%}}}% + +% Provide moves: +EMCEE_MOVES["stretch"] = &emcee_move_stretch; diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index a7b881f4..3c5bb5b2 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1,321 +1,6 @@ % -*- mode: slang; mode: fold; -*- % require("rand"); - -%{{{% defined moves for the ensemble walkers -%%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY - -% define inverse cumulative distribution function for generating -% random numbers following 1/z^2 when z in [1/a, a] -% TODO: should make this an adjustable thing -private define inverse_cdf (u, a) { - return (u*(a-1)+1)^2/a; -} - -% stretch move as of Goodman & Weare 2010 -% Move must evaluate the fit function -private define stretch_move (fit_object, x, x_j, u, a) { - variable z = inverse_cdf(u[0], a); - variable xstat = 1e32, ystat = -1e32; % from mikes code - variable y; % step proposition - variable x_t1 = x; % resulting step - variable update = 0; % update indicator - - % evaluate fit function for 'x' - xstat = fit_object.eval_statistic(x;nocopy); - - % calculate the new position (utilize array operations) - y = x_j + z*(x-x_j); - - % try evaluating, if out of bounds, does nothing - try { - % evaluate fit function for 'y' - ystat = fit_object.eval_statistic(y;nocopy); - - % caluculate if we accept the step based on the statistics of the - % model. We assume that the statistic is given as -2 log likelihood - if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { - x_t1 = y; - update = 1; - } - } catch IsisError; - - % return new walker position, proposed position, xstat, ystat - return (x_t1, update, xstat, ystat); -} - -private define get_move_args (m) { - return m.args; -} - -private define set_move_args () { - variable args, m; - if (_NARGS>2) { - args = __pop_list(_NARGS-1); - m = (); - } else - (m,args) = (); - if (length(args) != m.nargs) - throw UsageError, sprintf("%s expects %d arguments, set it with .set_move_args(...)!", m.name, m.nargs); - m.args = args; -} - -variable EMCEE_STRETCH_MOVE = struct { - name = "stretch move", - move = &stretch_move, % the function - nrands = 2, % the randoms needed - args = {2}, % additional arguments - nargs = 1, % number additional arguments - get = &get_move_args, - set = &set_move_args, -}; - -% calculate the move for the fit 'fit_object' based on the current walker position x, -% a randomly choosen walker x_j and an array of uniform random numbers enough to -% calculate the next step. move is the struct encapsulating the move function -private define __move (fit_object, x, x_j, u, move) { - % for alternative step functions, must be symmetric (that is, - % Pr(x -> y) = Pr(y -> x)) otherwise no detailed balance - - % push standard arguments & arguments for specified move - return move(__push_list(list_concat({fit_object, x, x_j, u}, move.get))); -} - -%}}}% - -%{{{% initialization of the parameters - -% get all free param values, index and min max -private define free_par_sets () { - variable all = get_params(); - variable i, ind = {}, v = {}, mi = {}, ma = {}; - _for i (0, length(all)-1, 1) { - ifnot (all[i].freeze==0 && all[i].tie==NULL && all[i].fun==NULL) - continue; - list_append(ind, all[i].index); - list_append(v, all[i].value); - list_append(mi, all[i].min); - list_append(ma, all[i].ma); - } - - return list_to_array(ind), list_to_array(v), list_to_array(mi), list_to_array(ma); -} - -% pick random parameter values within the boundaries -private define init_parameter_walker_uniform (n) { - variable walkers = Array_Type[n]; - variable i; - variable ind, p, pmin, pmax; - (ind, p, pmin, pmax) = free_par_sets(); - variable num_p = length(ind); - - _for i (0, n-1, 1) - walkers[i] = rand_uniform(num_p)*(p_max-p_min)+p_min; - - return walkers; -} - -%}}}% - -%{{{% write and read functions -private define emcee_write_chain_fits_init (io, filename, total_walkers, create, sloppy) { - % write ensemble evolution to fits file -#ifexists rcl_mpi_init - variable modified_name = (io.all) ? sprintf("%s_%d%s", path_sans_extname(filename), rcl_mpi_rank(), path_extname(filename)) : filename; -#else - variable modified_name = filename; -#endif - variable init_values; - variable data_info; - variable par_names; - variable i; - list_data(&data_info); - variable freep = freeParameters(); - if (create) { % create the file(s) initially - io.handle = fits_open_file(modified_name, "c"); - - % write first table - par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; - fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), - ["FREE_PAR", "FREE_PAR_NAME"], - ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], - [" parameter indices", " parameter names"]); - fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); - fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); - array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) - throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) - throw IOError; - - % write second table - fits_create_binary_table(io.handle, "MCMCCHAIN", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], - ["D", "J", ["D"][freep*0]], - [" fit statistics", " update indicator", [" parameter values"][freep*0]]); - fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); - fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); - fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); - - % write third table - fits_create_binary_table(io.handle, "CHAINSTATS", 0, - ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], - [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); - fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); - - init_values = NULL; - } else { % if loading, do some sanity checks - io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); - - if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current model and loaded chain model differ, unable to continue chain"; - return NULL; - } - - variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab, "free_par")) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current model and chain model have different free parameters"; - } - - variable fsloppy = fits_read_key(io.handle, "SLOPPY"); - sloppy = (fsloppy > sloppy) ? fsloppy : sloppy; % largest sloppyness - fits_update_key(io.handle, "SLOPPY", sloppy); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - - if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; - return NULL; - } - fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - - init_values = Array_Type[total_walkers]; - tab = fits_read_table(io.handle); - variable j; - variable names = get_struct_field_names(tab); - print(names); - variable l = length(names)-2; - _for j (0, total_walkers-1, 1) { - init_values[j] = Double_Type[l]; - _for i (0, l-1, 1) - init_values[j][i] = get_struct_field(tab, names[i+2])[-total_walkers+j]; - } - } - - % IMPORTANT: stay on mcmcchain table - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "No a mcmc chain file"; - return NULL; - } - - io.storage = fits_get_num_rows(io.handle); - - () = _fits_get_rowsize(io.handle, &(io.cycle)); - io.cycle = io.cycle/total_walkers; - if (io.cycle < 1) - io.cycle = 1; - io.msg = NULL; - - return init_values; -} - -private define emcee_write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, prev_stat_cycle) { - variable tmp, update; - variable freep = freeParameters(); - variable npar = length(freep); - variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle - variable i,j; - variable collen = fits_get_num_rows(io.handle); - _for j (0, npar-1, 1) { - tmp = Double_Type[steps_walkers]; - _for i (0, steps_walkers-1, 1) - tmp[i] = walkers_cycle[i][j]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", freep[j])), collen+1, 1, tmp); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); - tmp = Int_Type[steps_walkers]; - - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); -} - -private define emcee_finalize_chain_fits (io, steps, this_walkers, walker_per_parameter, number_parameter) { - variable tmp, tmp2; - variable reread; - variable total_walkers = walker_per_parameter*number_parameter; - variable collen = fits_get_num_rows(io.handle); % length of chain - variable all_steps = fits_read_key(io.handle, "NSTEPS"); - all_steps = (all_steps<0) ? steps : all_steps + steps; - fits_update_key(io.handle, "NSTEPS", all_steps); - fits_update_key(io.handle, "NWALKERS", walker_per_parameter); - fits_update_key(io.handle, "NFREEPAR", number_parameter); - - () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], - io.storage+1, collen-io.storage, &reread); - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); - collen = fits_get_num_rows(io.handle); % read length of chain summary - variable j; - variable frac_update = Double_Type[steps]; - variable min_stat = Double_Type[steps]; - variable med_stat = Double_Type[steps]; - variable max_stat = Double_Type[steps]; - - _for j (0, steps-1, 1) { - frac_update[j] = sum(reread[0][[0:this_walkers-1]+j*this_walkers])/this_walkers; - tmp2 = reread[1][[0:this_walkers-1]+j*this_walkers]; - min_stat[j] = min(tmp2); - max_stat[j] = max(tmp2); - med_stat[j] = median(tmp2); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FRAC_UPDATE"), collen+1, 1, frac_update); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); - - fits_close_file(io.handle); - io.handle = NULL; - io.storage = NULL; - io.msg = NULL; -} - -variable EMCEE_IO_FITS = struct { - open = &emcee_write_chain_fits_init, % open file for reading or appending, takes +4 arguments: 1: usually filename, 2: num walkers, 3: indicator if create or read, 4: sloppy level, returns null or last walkers if in open mode - write = &emcee_write_chain_fits, % write function, takes +? arguments - close = &emcee_finalize_chain_fits, % takes +1 argument, can be information only available after finish, i.e., calculation time - handle = NULL, % output handle, usually file pointer, if null after open() indicates error - all = 0, % if 1, all nodes will execute the function CARE: IF THIS IS THE CASE, THE WRITE FUNCTION MUST BE AWARE OF THE OTHER PROCESSES! - name = "emcee_io_fits", - storage = NULL, - msg = NULL, % error message - cycle = 1, % write every steps -}; - -%}}}% %{{{% some helpers ... % get walkers per node @@ -502,7 +187,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { if (cont != NULL) node_walkers = init_walkers; else - node_walkers = (@init)(total_walkers); % initialize the walkers & other collectors + node_walkers = init.init(total_walkers); % initialize the walkers & other collectors node_update = Int_Type[total_walkers]; node_stat = Double_Type[total_walkers]+_Inf; node_prev_stat = Double_Type[total_walkers]+_Inf; @@ -541,11 +226,10 @@ private define emcee_mpi (walker_per_par, number_par, steps) { &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... (new_pos, update_pos, prev_stat, new_stat) = - __move(fit_handle, + move.move(fit_handle, node_walkers[tmp], node_pivots[tmp], - node_randoms[[0:move.nrands-1]+move.nrands*tmp], - move); + node_randoms[[0:move.nrands-1]+move.nrands*tmp]); node_walkers[tmp] = new_pos; node_update[tmp] = update_pos; node_prev_stat[tmp] = prev_stat; @@ -588,12 +272,32 @@ private define emcee_mpi (walker_per_par, number_par, steps) { %}}}% +define emcee_get (s) { + variable split = strchop(s, '/', 0); + if (split[0] == "move" || split[0] == "moves") { + if (length(split)==1) return emcee_get_moves(); + else if (any(emcee_get_moves() == split[1])) return emcee_get_move(split[1];; __qualifiers()); + else vmessage("*** unknown move: '%s'", split[1]); + } else if(split[0] == "init" || split[0] == "inits") { + if (length(split)==1) return emcee_get_inits(); + else if (any(emcee_get_inits() == split[1])) return emcee_get_init(split[1];; __qualifiers()); + else vmessage("*** unknown init: '%s'", split[1]); + } else if (split[0] == "io" || split[0] == "ios") { + if (length(split)==1) return emcee_get_ios(); + else if (any(emcee_get_moves() == split[1])) return emcee_get_io(split[1];; __qualifiers()); + else vmessage("*** unknown io: '%s'", split[1]); + } else { + vmessage("*** unknown target: '%s'", s); + } + return NULL; +} + define emcee_new (walkers_per_par, steps) { - variable move = qualifier("move", EMCEE_STRETCH_MOVE); % defined move + variable move = qualifier("move", emcee_get_move("stretch")); % defined move variable urand = qualifier("urand", &rand_uniform); % double random generator variable upick = qualifier("upick", &rand_int); % int random generator - variable init = qualifier("init", &init_parameter_walker_uniform); % initialization function - variable write_hook = qualifier("write_hook", EMCEE_IO_FITS); % output routine + variable init = qualifier("init", emcee_get_init("uniform")); % initialization function + variable write_hook = qualifier("io", emcee_get_io("fits")); % output routine variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); variable sloppy = qualifier("sloppy", 0); variable cont = qualifier("continue", NULL); -- GitLab From caea700a8f53d3082d14517c5af9fc65e28bdfe6 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 9 Apr 2020 20:08:47 +0200 Subject: [PATCH 12/89] Fix objective code bug Uniform init function did not respect object oriented slang syntax and was defined with one missing argument --- src/fitting/ensemble-samplers/emcee-init.sl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index cb265ec7..b0cb80fe 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -43,7 +43,7 @@ private define free_par_sets () { %{{{% Uniform initialization function % pick random parameter values within the boundaries -private define init_parameter_walker_uniform (n) { +private define init_parameter_walker_uniform (init, n) { variable walkers = Array_Type[n]; variable i; variable ind, p, pmin, pmax; -- GitLab From f603747b472145de5a810cbda656d699c3b982ff Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 9 Apr 2020 20:19:07 +0200 Subject: [PATCH 13/89] Add Emcee init for gaussian sphere Initial walker parameters are drawn around the current parameter set with gaussian distribution. Sigma defaults to 1/10 of the parameter range. --- src/fitting/ensemble-samplers/emcee-init.sl | 40 ++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index b0cb80fe..1b9e8c32 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -61,8 +61,46 @@ private define emcee_init_uniform () { name="uniform init", init=&init_parameter_walker_uniform ); - return struct_combine(struct_combine(defaults, __qualifiers()), settings); + return struct { @defaults, @__qualifiers(), @settings }; +} +%}}}% + +%{{{% Sphere initialization function with exponential decresing probability +% pick random parameters from gauss((x-x0 +private define init_parameter_walker_gauss_sphere (init, n) { + variable walkers = Array_Type[n]; + variable i,j; + variable ind, p, pmin, pmax; + (ind, p, pmin, pmax) = free_par_sets(); + variable num_p = length(ind); + variable sigma = qualifier("sigma", 10); % default to p-pmin = 10 sigma (pmax-p = 10 sigma) if p-pmin>(<)pmax-p + variable relative = qualifier_exists("relative") || + not qualifier_exists("sigma") || + not (typeof(sigma) == Array_Type); % imply relative if sigma is not given or not given as arrays + + variable s_par; + variable s_len = length(sigma); + if (Array_Type == typeof(sigma) && s_len != num_p) + throw UsageError, "Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p; + _for i (0, n-1, 1) { + if (relative) + s_par = _min(p-pmin, pmax-p)/sigma; + else + s_par = sigma; + walkers[i] = rand_gauss(1, num_par)*s_par+p; + } + + return walkers; +} +private define emcee_init_gauss_sphere () { + variable settings = (_NARGS==1) ? () : NULL; + variable defaults = __init_globals(; + name="gauss sphere init", + init=&init_parameter_walker_gauss_sphere + ); + return struct { @defaults, @__qualifier(), @settings }; } %}}}% EMCEE_INIT["uniform"] = &emcee_init_uniform; +EMCEE_INIT["gauss-sphere"] = &emcee_init_gauss_sphere; -- GitLab From 233c218de05a49ec6961c42e1e44f950bfb07714 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 9 Apr 2020 22:47:51 +0200 Subject: [PATCH 14/89] Fix EMCEE initial chain output The write routine was called after the first collector entry was already replaced. Now we write first if necessary and then update the cycle buffer. Also removed useless symbol s-=1. The write routine did not properly handle a load chain. Now we check if we have load one and if so we do not write initialized walkers. --- src/fitting/ensemble-samplers/emcee.sl | 39 +++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 3c5bb5b2..885fdc92 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -215,7 +215,8 @@ private define emcee_mpi (walker_per_par, number_par, steps) { stat_cycle = Double_Type[collector_len]+_Inf; prev_stat_cycle = Double_Type[collector_len]+_Inf; - _for tmp (0, collector_len-1, 1) + walker_cycle[0] = node_walkers; + _for tmp (1, collector_len-1, 1) walker_cycle[tmp] = Double_Type[npar]; } @@ -239,32 +240,32 @@ private define emcee_mpi (walker_per_par, number_par, steps) { walkers_per_node, handled_walkers); % ... and catch 'em! if (io.all || node == 0) { - if (cont != NULL) { - % if we continue chain, do not write initial walkers - cont = NULL; - s -= 1; - } else { - _for tmp (0, node_walker_array_len-1, 1) { - walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; - update_cycle[tmp+cycle_step*node_walker_array_len] = node_update[tmp]; - stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; - prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; - } - - ifnot (cycle_step) + ifnot (cycle_step) { + if (cont != NULL) { % if we continue do not write the initial step as they will be doubled + io.write(walker_cycle[[1:]], update_cycle[[1:]], stat_cycle[[1:]], prev_stat_cycle[[1:]]); + cont = NULL; + } else io.write(walker_cycle, update_cycle, stat_cycle, prev_stat_cycle); } + + _for tmp (0, node_walker_array_len-1, 1) { + walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; + update_cycle[tmp+cycle_step*node_walker_array_len] = node_update[tmp]; + stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; + prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; + } } } % write missing pieces - variable write_initial = (steps < io.cycle); % if we have not written out any cycle, we have to treat the init walkers special + variable write_initial = (steps < io.cycle) && (cont == NULL); % if we have not written out any cycle, we have to treat the init walkers special + variable skip_load = (cont != NULL); if (io.all || node == 0) { if (cycle_step) - io.write(walker_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], - update_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], - stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], - prev_stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]]); + io.write(walker_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], + update_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], + stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], + prev_stat_cycle[[skip:load:(cycle_step+write_initial)*node_walker_array_len-1]]); io.close(steps, node_walker_array_len, walker_per_par, number_par); } -- GitLab From b76ff0785573ffbbe95ff5e6eb356a077ea86d18 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 9 Apr 2020 22:57:56 +0200 Subject: [PATCH 15/89] Fix typos in emcee and emcee-init --- src/fitting/ensemble-samplers/emcee-init.sl | 6 +++--- src/fitting/ensemble-samplers/emcee.sl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index 1b9e8c32..fe17b7dc 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -81,13 +81,13 @@ private define init_parameter_walker_gauss_sphere (init, n) { variable s_par; variable s_len = length(sigma); if (Array_Type == typeof(sigma) && s_len != num_p) - throw UsageError, "Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p; + throw UsageError, sprintf("Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p); _for i (0, n-1, 1) { if (relative) s_par = _min(p-pmin, pmax-p)/sigma; else s_par = sigma; - walkers[i] = rand_gauss(1, num_par)*s_par+p; + walkers[i] = rand_gauss(1, num_p)*s_par+p; } return walkers; @@ -98,7 +98,7 @@ private define emcee_init_gauss_sphere () { name="gauss sphere init", init=&init_parameter_walker_gauss_sphere ); - return struct { @defaults, @__qualifier(), @settings }; + return struct { @defaults, @__qualifiers(), @settings }; } %}}}% diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 885fdc92..957f3b69 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -265,7 +265,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { io.write(walker_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], update_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], - prev_stat_cycle[[skip:load:(cycle_step+write_initial)*node_walker_array_len-1]]); + prev_stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]]); io.close(steps, node_walker_array_len, walker_per_par, number_par); } -- GitLab From e1f2c6c94b33d9918f1b2a3c94640e27202b88b5 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 13 Apr 2020 16:46:45 +0200 Subject: [PATCH 16/89] Fix wrong use of array copy Slang has a natural copy mechanism of the form array_a[[0:n]] = array_b[[m:m+n]] and copies the entries for natrual types accordingly. However if array_a is of type Array_Type it sets *all* indices to array_b[[m:m+n]]. We have to copy each element individualy to do it correctly. Also removed a print statement used for debugging. --- src/fitting/ensemble-samplers/emcee-io.sl | 1 - src/fitting/ensemble-samplers/emcee.sl | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index 433206ea..e8fd8671 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -122,7 +122,6 @@ private define write_chain_fits_init (io, filename, total_walkers, create, slopp tab = fits_read_table(io.handle); variable j; variable names = get_struct_field_names(tab); - print(names); variable l = length(names)-2; _for j (0, total_walkers-1, 1) { init_values[j] = Double_Type[l]; diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 957f3b69..e6b21a4f 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -215,9 +215,8 @@ private define emcee_mpi (walker_per_par, number_par, steps) { stat_cycle = Double_Type[collector_len]+_Inf; prev_stat_cycle = Double_Type[collector_len]+_Inf; - walker_cycle[0] = node_walkers; - _for tmp (1, collector_len-1, 1) - walker_cycle[tmp] = Double_Type[npar]; + _for tmp (0, length(node_walkers)-1, 1) + walker_cycle[tmp] = node_walkers[tmp]; } % the main loop where the magic happens -- GitLab From 54befaf9a1bafff1bedfe7415f1b0530d8284483 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 18 Apr 2020 10:41:52 +0200 Subject: [PATCH 17/89] Optimization of emcee_new Since for most models the function evaluation is the computationally expensive part it is not smart to evaluate it twice per step. Now each step should just evaluate the fit model once except for the very first for each walker chain. --- src/fitting/ensemble-samplers/emcee-moves.sl | 8 +++++--- src/fitting/ensemble-samplers/emcee.sl | 7 +++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl index 81bbd91d..1345337a 100644 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -43,15 +43,17 @@ private define inverse_cdf (u, a) { % stretch move as of Goodman & Weare 2010 % Move must evaluate the fit function -private define stretch_move (move, fit_object, x, x_j, u) { +private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { variable z = inverse_cdf(u[0], move.a); variable xstat = 1e32, ystat = -1e32; % from mikes code variable y; % step proposition variable x_t1 = x; % resulting step variable update = 0; % update indicator - % evaluate fit function for 'x' - xstat = fit_object.eval_statistic(x;nocopy); + if (isinf(prev_stat)) + xstat = fit_object.eval_statistic(x;nocopy); + else + xstat = prev_stat; % calculate the new position (utilize array operations) y = x_j + z*(x-x_j); diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index e6b21a4f..00cddefc 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -156,7 +156,9 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable walkers_per_node; variable handled_walkers; variable sort; - variable new_pos, update_pos, prev_stat, new_stat; + variable new_pos, update_pos, prev_stat, new_stat; % only first step runs eval_statistic twice (if prev_stat[i] == Inf) + % if the fit statistic evaluates to Inf this also happens, but this should not be the case + % for any good statistical function variable tmp; variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); variable node_walker_array_len = (node == 0) ? total_walkers : node_walkers_len; @@ -229,7 +231,8 @@ private define emcee_mpi (walker_per_par, number_par, steps) { move.move(fit_handle, node_walkers[tmp], node_pivots[tmp], - node_randoms[[0:move.nrands-1]+move.nrands*tmp]); + node_randoms[[0:move.nrands-1]+move.nrands*tmp], + node_prev_stat[tmp]); node_walkers[tmp] = new_pos; node_update[tmp] = update_pos; node_prev_stat[tmp] = prev_stat; -- GitLab From 26b4c275be4fab1918ca7b9e4f3a71522d08414c Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 18 Apr 2020 12:14:18 +0200 Subject: [PATCH 18/89] Fix emcee when called with unconstrained parameter ranges Throw a warning now instead of calculating with infinities --- src/fitting/ensemble-samplers/emcee-init.sl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index fe17b7dc..9a5b7ae1 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -49,7 +49,11 @@ private define init_parameter_walker_uniform (init, n) { variable ind, p, pmin, pmax; (ind, p, pmin, pmax) = free_par_sets(); variable num_p = length(ind); - + + % throw an error on unspecified bounds + if (any(pmin == -DOUBLE_MAX) || any(pmax == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges"; + _for i (0, n-1, 1) walkers[i] = rand_uniform(num_p)*(pmax-pmin)+pmin; @@ -69,7 +73,7 @@ private define emcee_init_uniform () { % pick random parameters from gauss((x-x0 private define init_parameter_walker_gauss_sphere (init, n) { variable walkers = Array_Type[n]; - variable i,j; + variable i,j,w; variable ind, p, pmin, pmax; (ind, p, pmin, pmax) = free_par_sets(); variable num_p = length(ind); @@ -78,6 +82,10 @@ private define init_parameter_walker_gauss_sphere (init, n) { not qualifier_exists("sigma") || not (typeof(sigma) == Array_Type); % imply relative if sigma is not given or not given as arrays + % throw an error on unspecified bounds + if (any(pmin == -DOUBLE_MAX) || any(pmax == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges"; + variable s_par; variable s_len = length(sigma); if (Array_Type == typeof(sigma) && s_len != num_p) @@ -88,6 +96,10 @@ private define init_parameter_walker_gauss_sphere (init, n) { else s_par = sigma; walkers[i] = rand_gauss(1, num_p)*s_par+p; + w = where(walkers[i]pmax); + walkers[i][w] = pmax[w]; } return walkers; -- GitLab From 9ec81082a9c8a10e16bde19444af0b748043306d Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 27 Apr 2020 15:53:55 +0200 Subject: [PATCH 19/89] Fix processor macro When the rcl_mpi routines are not available a brace was missing. --- src/fitting/ensemble-samplers/emcee.sl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 00cddefc..37a98a19 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -74,7 +74,9 @@ private define release_walkers_mpi (node, nodes, total_walkers, } () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 3 } +#endif } +#ifexists rcl_mpi_init } else { _for j (0, length(node_walkers)-1, 1) { () = rcl_mpi_org_recv_double(node_walkers[j], length(node_walkers[j]), 0, 0); -- GitLab From 0dc483b116ed1c05ee1f3f231de9899a38fd2ece Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 30 Apr 2020 01:51:25 +0200 Subject: [PATCH 20/89] Fix bug when continue chain When continue a chain it was intended that the loaded walkers are not written again to the chain, but only the first walker was skipped. The change in steps was not passed to the closing function causing index error. --- src/fitting/ensemble-samplers/emcee-io.sl | 6 ++++-- src/fitting/ensemble-samplers/emcee.sl | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index e8fd8671..bea154b4 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -188,10 +188,12 @@ private define finalize_chain_fits (io, steps, this_walkers, walker_per_paramete variable min_stat = Double_Type[steps]; variable med_stat = Double_Type[steps]; variable max_stat = Double_Type[steps]; + variable step_stat = reread[1]; + variable step_update = reread[0]; _for j (0, steps-1, 1) { - frac_update[j] = sum(reread[0][[0:this_walkers-1]+j*this_walkers])/this_walkers; - tmp2 = reread[1][[0:this_walkers-1]+j*this_walkers]; + frac_update[j] = sum(step_update[[0:this_walkers-1]+j*this_walkers])/this_walkers; + tmp2 = step_stat[[0:this_walkers-1]+j*this_walkers]; min_stat[j] = min(tmp2); max_stat[j] = max(tmp2); med_stat[j] = median(tmp2); diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 37a98a19..73a48397 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -134,8 +134,11 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable sloppy = qualifier("sloppy", NULL); variable cont = qualifier("continue", NULL); variable init_walkers; - if (cont != NULL) + variable is_cont = 0; + if (cont != NULL) { + is_cont = 1; output = cont; + } #ifexists rcl_mpi_init variable node = rcl_mpi_init(); @@ -246,7 +249,10 @@ private define emcee_mpi (walker_per_par, number_par, steps) { if (io.all || node == 0) { ifnot (cycle_step) { if (cont != NULL) { % if we continue do not write the initial step as they will be doubled - io.write(walker_cycle[[1:]], update_cycle[[1:]], stat_cycle[[1:]], prev_stat_cycle[[1:]]); + io.write(walker_cycle[[node_walker_array_len:]], + update_cycle[[node_walker_array_len:]], + stat_cycle[[node_walker_array_len:]], + prev_stat_cycle[[node_walker_array_len:]]); cont = NULL; } else io.write(walker_cycle, update_cycle, stat_cycle, prev_stat_cycle); @@ -266,12 +272,12 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable skip_load = (cont != NULL); if (io.all || node == 0) { if (cycle_step) - io.write(walker_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], - update_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], - stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], - prev_stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]]); + io.write(walker_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], + update_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], + stat_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], + prev_stat_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]]); - io.close(steps, node_walker_array_len, walker_per_par, number_par); + io.close(steps-is_cont, node_walker_array_len, walker_per_par, number_par); } } -- GitLab From b5d4d168f9049a3fde4758f1fce38e8f4bda821d Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 4 May 2020 19:13:40 +0200 Subject: [PATCH 21/89] Emcee overhaul Changed sub function calls, comparable to set_fit_method. Fixed ordering bug of parameters thanks to new isis version. Improvements in function handling. Removed unecessary calculations in loop. --- src/fitting/ensemble-samplers/emcee-init.sl | 93 ++-- src/fitting/ensemble-samplers/emcee-io.sl | 307 ++++++------ src/fitting/ensemble-samplers/emcee-moves.sl | 42 +- src/fitting/ensemble-samplers/emcee.sl | 469 ++++++++++--------- 4 files changed, 455 insertions(+), 456 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index 9a5b7ae1..0895ceab 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -9,110 +9,87 @@ require("rand"); % Distribution is only done by the master process, so we don't have to care % about the random numbers. % -% Init function takes one argument directly which is the number of walkers +% Init function takes an initialized walker array, and the fit handle +% setup function must be called emcee_init_ %{{{% helpers -private define emcee_init_uniform(); -private variable EMCEE_INIT = Assoc_Type[Ref_Type, &emcee_init_uniform]; private define __init_globals () { return struct_combine( struct { name = "unspecified init", - init = NULL, + __f = NULL, }, __qualifiers()); } -define emcee_get_init(key) { return (@EMCEE_INIT[key])(;; __qualifiers); } -define emcee_get_inits () { return assoc_get_keys(EMCEE_INIT); } -% get all free param values, index and min max -private define free_par_sets () { - variable all = get_params(); - variable i, ind = {}, v = {}, mi = {}, ma = {}; - _for i (0, length(all)-1, 1) { - ifnot (all[i].freeze==0 && all[i].tie==NULL && all[i].fun==NULL) - continue; - list_append(ind, all[i].index); - list_append(v, all[i].value); - list_append(mi, all[i].min); - list_append(ma, all[i].max); - } - - return list_to_array(ind), list_to_array(v), list_to_array(mi), list_to_array(ma); -} %}}}% %{{{% Uniform initialization function % pick random parameter values within the boundaries -private define init_parameter_walker_uniform (init, n) { - variable walkers = Array_Type[n]; +private define init_parameter_walker_uniform (init, walkers, fit_handle) { variable i; - variable ind, p, pmin, pmax; - (ind, p, pmin, pmax) = free_par_sets(); - variable num_p = length(ind); + variable par = __parameters(fit_handle.object); + variable num_p = length(par.value); % throw an error on unspecified bounds - if (any(pmin == -DOUBLE_MAX) || any(pmax == DOUBLE_MAX)) + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) throw UsageError, "Some parameters have unspecified parameter ranges"; - _for i (0, n-1, 1) - walkers[i] = rand_uniform(num_p)*(pmax-pmin)+pmin; - - return walkers; + _for i (0, length(walkers)-1, 1) + walkers[i] = rand_uniform(num_p)*(par.max-par.min)+par.min; } -private define emcee_init_uniform () { - variable settings = (_NARGS==1) ? () : NULL; +public define emcee_init_uniform () { + if (qualifier_exists("help")) { + help("emcee_init_uniform"); + return NULL; + } + variable defaults = __init_globals(; name="uniform init", - init=&init_parameter_walker_uniform ); - return struct { @defaults, @__qualifiers(), @settings }; + return struct { @defaults, @__qualifiers(), __f=&init_parameter_walker_uniform }; } %}}}% %{{{% Sphere initialization function with exponential decresing probability % pick random parameters from gauss((x-x0 -private define init_parameter_walker_gauss_sphere (init, n) { - variable walkers = Array_Type[n]; +private define init_parameter_walker_gauss_sphere (init, walkers, fit_handle) { variable i,j,w; - variable ind, p, pmin, pmax; - (ind, p, pmin, pmax) = free_par_sets(); - variable num_p = length(ind); + variable par = __parameters(fit_handle.object); + variable num_p = length(par.value); variable sigma = qualifier("sigma", 10); % default to p-pmin = 10 sigma (pmax-p = 10 sigma) if p-pmin>(<)pmax-p variable relative = qualifier_exists("relative") || not qualifier_exists("sigma") || not (typeof(sigma) == Array_Type); % imply relative if sigma is not given or not given as arrays % throw an error on unspecified bounds - if (any(pmin == -DOUBLE_MAX) || any(pmax == DOUBLE_MAX)) + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) throw UsageError, "Some parameters have unspecified parameter ranges"; variable s_par; variable s_len = length(sigma); if (Array_Type == typeof(sigma) && s_len != num_p) throw UsageError, sprintf("Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p); - _for i (0, n-1, 1) { + _for i (0, length(walkers)-1, 1) { if (relative) - s_par = _min(p-pmin, pmax-p)/sigma; + s_par = _min(par.value-par.min, par.max-par.value)/sigma; else s_par = sigma; - walkers[i] = rand_gauss(1, num_p)*s_par+p; - w = where(walkers[i]pmax); - walkers[i][w] = pmax[w]; + walkers[i] = rand_gauss(1, num_p)*s_par+par.value; + w = where(walkers[i]par.max); + walkers[i][w] = par.max[w]; } - - return walkers; } -private define emcee_init_gauss_sphere () { - variable settings = (_NARGS==1) ? () : NULL; +public define emcee_init_gauss () { + if (qualifier_exists("help")) { + help("emcee_init_gauss"); + return NULL; + } + variable defaults = __init_globals(; - name="gauss sphere init", - init=&init_parameter_walker_gauss_sphere + name="gauss init", ); - return struct { @defaults, @__qualifiers(), @settings }; + return struct { @defaults, @__qualifiers(), __f=&init_parameter_walker_gauss_sphere }; } %}}}% - -EMCEE_INIT["uniform"] = &emcee_init_uniform; -EMCEE_INIT["gauss-sphere"] = &emcee_init_gauss_sphere; diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index bea154b4..684241bf 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -5,154 +5,185 @@ % here. %{{{% helpers -private define emcee_io_fits(); -private variable EMCEE_IO = Assoc_Type[Ref_Type, &emcee_io_fits]; -define emcee_get_io (key) { return (@EMCEE_IO[key])(;; __qualifiers); } -define emcee_get_ios () { return assoc_get_keys(EMCEE_IO); } private define __io_globals () { - return struct_combine(struct { + return struct { name = "unspecified io", - open = NULL, % open function, takes 'io-object', 'filename', 'number walkers', 'create flag', 'sloppy flag' - write = NULL, % write to file, takes 'io-object', 'walkers array', 'update array', 'statistic array', 'pre statistisc array' - close = NULL, % finalizes output, takes 'io-object', 'current number steps', 'number walkers', 'walker per parameter', 'number parameter' + __f_create = NULL, % create function, takes 'io-object', 'filename', 'fit_handle', 'total walkers' + __f_open = NULL, % open function, takes 'io-object', 'filename', 'initialized walkers'. + __f_read = NULL, % same as open but is used for seting the walkers, requires less consitency + __f_write = NULL, % write to file, takes 'io-object', 'walkers array', 'update array', 'statistic array' + __f_finalize = NULL, % finalizes output, takes 'io-object', 'current number steps', 'walker per parameter', 'number parameter' + __f_close = NULL, % close any handles if necessary, takes 'io-object' handle = NULL, % io access (usually file pointer) - all = 0, % flag indicating if all nodes execute io or just master - msg = NULL, % current io message (used only for feedback) cycle = 1, % number of steps to perform before write - }, __qualifiers()); + @__qualifiers() }; } %}}}% + %{{{% FITS input output routines -private define write_chain_fits_init (io, filename, total_walkers, create, sloppy) { +private define emcee_init_chain_fits (io, filename, fit_handle, total_walkers) { % write ensemble evolution to fits file -#ifexists rcl_mpi_init - variable modified_name = (io.all) ? sprintf("%s_%d%s", path_sans_extname(filename), rcl_mpi_rank(), path_extname(filename)) : filename; -#else - variable modified_name = filename; -#endif variable init_values; variable data_info; variable par_names; variable i; list_data(&data_info); - variable freep = freeParameters(); - if (create) { % create the file(s) initially - io.handle = fits_open_file(modified_name, "c"); - - % write first table - par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; - fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), - ["FREE_PAR", "FREE_PAR_NAME"], - ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], - [" parameter indices", " parameter names"]); - fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); - fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); - array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) - throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) - throw IOError; - - % write second table - fits_create_binary_table(io.handle, "MCMCCHAIN", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], - ["D", "J", ["D"][freep*0]], - [" fit statistics", " update indicator", [" parameter values"][freep*0]]); - fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); - fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); - fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); - - % write third table - fits_create_binary_table(io.handle, "CHAINSTATS", 0, - ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], - [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); - fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); - - init_values = NULL; - } else { % if loading, do some sanity checks - io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); - - if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current model and loaded chain model differ, unable to continue chain"; - return NULL; - } - - variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab, "free_par")) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current model and chain model have different free parameters"; - } - - variable fsloppy = fits_read_key(io.handle, "SLOPPY"); - sloppy = (fsloppy > sloppy) ? fsloppy : sloppy; % largest sloppyness - fits_update_key(io.handle, "SLOPPY", sloppy); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - - if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; - return NULL; - } - fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - - init_values = Array_Type[total_walkers]; - tab = fits_read_table(io.handle); - variable j; - variable names = get_struct_field_names(tab); - variable l = length(names)-2; - _for j (0, total_walkers-1, 1) { - init_values[j] = Double_Type[l]; - _for i (0, l-1, 1) - init_values[j][i] = get_struct_field(tab, names[i+2])[-total_walkers+j]; - } + variable par = __parameters(fit_handle.object); + io.handle = fits_open_file(filename, "c"); + + % write first table + par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[par.index-1]; + fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), + ["FREE_PAR", "FREE_PAR_NAME"], + ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], + [" parameter indices", " parameter names"]); + fits_update_key(io.handle, "MODEL", get_fit_fun(), "model function"); + fits_update_key(io.handle, "SLOPPY", 0, " sloppy level"); + array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, par.index[array_sort(par.index)])) % sort here, so at least they are in index order + throw IOError; + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) + throw IOError; + + % write second table + fits_create_binary_table(io.handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], + ["D", "J", ["D"][par.index*0]], + [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); + fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); + fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); + fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); + + % write third table + fits_create_binary_table(io.handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + + % move back to chain table + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); + + io.num_steps = 0; + io.sloppy = 0; + + () = _fits_get_rowsize(io.handle, &(io.cycle)); + io.cycle = io.cycle/total_walkers; + if (io.cycle < 1) + io.cycle = 1; +} + +private define emcee_open_chain_fits (io, filename, fit_handle, walkers) { + io.handle = fits_open_file(filename+"[PARAMETERS]", "w"); + + if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { + fits_close_file(io.handle); + io.handle = NULL; + throw IsisError, "Current model and chain model do not match"; + } + + variable tab = fits_read_table(io.handle); + ifnot (struct_field_exists(tab, "free_par")) { + fits_close_file(io.handle); + io.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + variable par = __parameters(fit_handle.object); + if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != par.index[array_sort(par.index)])) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, "Free parameters and chain parameters differ"; + } + + variable fsloppy = fits_read_key(io.handle, "SLOPPY"); + io.sloppy = (fsloppy > io.sloppy) ? fsloppy : io.sloppy; % largest sloppyness + fits_update_key(io.handle, "SLOPPY", io.sloppy); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (io.sloppy<1)) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; } + fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); - % IMPORTANT: stay on mcmcchain table if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { fits_close_file(io.handle); io.handle = NULL; - io.msg = "No a mcmc chain file"; - return NULL; + throw IOError, "Not a emcee chain file"; + } + + variable file_nw = fits_read_key(io.handle, "NWALKERS")*fits_read_key(io.handle, "NFREEPAR"); + if (file_nw != length(walkers)) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, sprintf("Unable to continue chain with %d walkers with chain with %d walkers", file_nw, length(walkers)); } - io.storage = fits_get_num_rows(io.handle); + tab = fits_read_table(io.handle); + variable i,j; + variable names = get_struct_field_names(tab); + variable l = length(names)-2; + _for j (0, length(walkers)-1, 1) + _for i (0, l-1, 1) + walkers[j][i] = get_struct_field(tab, names[i+2])[-length(walkers)+j]; () = _fits_get_rowsize(io.handle, &(io.cycle)); - io.cycle = io.cycle/total_walkers; + io.cycle = io.cycle/length(walkers); if (io.cycle < 1) io.cycle = 1; - io.msg = NULL; - return init_values; + io.num_steps = fits_get_num_rows(io.handle); } -private define write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, prev_stat_cycle) { +private define emcee_read_chain_fits (io, filename, fit_handle, walkers) { + io.handle = fits_open_file(filename+"[PARAMETERS]", "r"); + + if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { + fits_close_file(io.handle); + io.handle = NULL; + throw IsisError, "Current model and chain model do not match"; + } + + variable tab = fits_read_table(io.handle); + ifnot (struct_field_exists(tab, "free_par")) { + fits_close_file(io.handle); + io.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + variable par = __parameters(fit_handle.object); + if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != par.index[array_sort(par.index)])) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, "Free parameters and chain parameters differ"; + } + + variable file_nw = fits_read_key(io.handle, "NWALKERS")*fits_read_key(io.handle, "NFREEPAR"); + if (file_nw != length(walkers)) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, sprintf("Unable to set chain with %d walkers from file with %d walkers", file_nw, length(walkers)); + } + + tab = fits_read_table(io.handle); + variable i,j; + variable names = get_struct_field_names(tab); + variable l = length(names)-2; + _for j (0, length(walkers)-1, 1) + _for i (0, l-1, 1) + walkers[j][i] = get_struct_field(tab, names[i+2])[-length(walkers)+j]; +} + +private define emcee_write_chain_fits (io, fit_handle, walkers_cycle, update_cycle, stat_cycle) { variable tmp, update; - variable freep = freeParameters(); - variable npar = length(freep); + variable par = __parameters(fit_handle.object); + variable npar = length(par.index); variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle variable i,j; variable collen = fits_get_num_rows(io.handle); @@ -160,15 +191,13 @@ private define write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, pr tmp = Double_Type[steps_walkers]; _for i (0, steps_walkers-1, 1) tmp[i] = walkers_cycle[i][j]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", freep[j])), collen+1, 1, tmp); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", par.index[j])), collen+1, 1, tmp); } () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); - tmp = Int_Type[steps_walkers]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); } -private define finalize_chain_fits (io, steps, this_walkers, walker_per_parameter, number_parameter) { +private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter, fit_handle) { variable tmp, tmp2; variable reread; variable total_walkers = walker_per_parameter*number_parameter; @@ -180,7 +209,7 @@ private define finalize_chain_fits (io, steps, this_walkers, walker_per_paramete fits_update_key(io.handle, "NFREEPAR", number_parameter); () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], - io.storage+1, collen-io.storage, &reread); + io.num_steps+1, collen-io.num_steps, &reread); () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); collen = fits_get_num_rows(io.handle); % read length of chain summary variable j; @@ -192,8 +221,8 @@ private define finalize_chain_fits (io, steps, this_walkers, walker_per_paramete variable step_update = reread[0]; _for j (0, steps-1, 1) { - frac_update[j] = sum(step_update[[0:this_walkers-1]+j*this_walkers])/this_walkers; - tmp2 = step_stat[[0:this_walkers-1]+j*this_walkers]; + frac_update[j] = sum(step_update[[0:total_walkers-1]+j*total_walkers])/total_walkers; + tmp2 = step_stat[[0:total_walkers-1]+j*total_walkers]; min_stat[j] = min(tmp2); max_stat[j] = max(tmp2); med_stat[j] = median(tmp2); @@ -202,26 +231,28 @@ private define finalize_chain_fits (io, steps, this_walkers, walker_per_paramete () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); +} +private define emcee_close_chain_fits (io) { fits_close_file(io.handle); io.handle = NULL; - io.storage = NULL; - io.msg = NULL; } -private define emcee_io_fits () { +public define emcee_io_fits () { variable settings = (_NARGS==1) ? () : NULL; variable defaults = __io_globals(; name = "io fits", - open = &write_chain_fits_init, - write = &write_chain_fits, - close = &finalize_chain_fits, - storage = NULL, + num_steps = NULL, + sloppy = 0, ); - return struct_combine(struct_combine(defaults, __qualifiers()), settings); + return struct { @defaults, @__qualifiers(), + __f_create=&emcee_init_chain_fits, + __f_open=&emcee_open_chain_fits, + __f_write=&emcee_write_chain_fits, + __f_read=&emcee_read_chain_fits, + __f_finalize=&emcee_finalize_chain_fits, + __f_close=&emcee_close_chain_fits, + }; } %}}}% - -% Provide io functions -EMCEE_IO["fits"] = &emcee_io_fits; diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl index 1345337a..c1a933f6 100644 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -3,20 +3,16 @@ % DEFINED MOVES FOR THE EMCEE ENSEMBLE WALKER % Walker moves should be defined as functions where the function returns a % structure suitable to describe the step algorithm. For adjustment the -% function should combine the qualifiers with the default structure and -% should handle an argument that may be NULL or a structure with the same -% properties, where the argument should overwrite any qualifiers given to -% the function. The move function MUST evaluate the fit model (in the -% simplest case by using the fit object) and return new position and -% statistics. +% function should combine the qualifiers with the default structure. The +% move function MUST evaluate the fit model by using the fit handle and +% return new position and statistics. % % Besides additional arguments given in the structure the algorithm must % process the fit object, current position, pivot position and an array of % random numbers. % -% Functions should be private and only accessible through the access function -% 'get_emcee' or 'get_emcee_move'. -% Required default parameters can be retrieved with '__move_globals'. +% For constructing the structure the __move_globals function is convenient. +% The constructur function must be names emcee_move_. % % For an example see the STRETCH_MOVE %{{{% Helpers for move steps @@ -27,10 +23,6 @@ private define __move_globals () { nrands = 0, % number of required random numbers for each step }, __qualifiers); } -private define emcee_move_stretch(); % for default value -private variable EMCEE_MOVES = Assoc_Type[Ref_Type, &emcee_move_stretch]; -define emcee_get_move (key) { return (@EMCEE_MOVES[key])(;;__qualifiers); } -define emcee_get_moves () { return assoc_get_keys(EMCEE_MOVES); } %}}}% %{{{% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY @@ -45,47 +37,37 @@ private define inverse_cdf (u, a) { % Move must evaluate the fit function private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { variable z = inverse_cdf(u[0], move.a); - variable xstat = 1e32, ystat = -1e32; % from mikes code + variable ystat = -1e32; % from mikes code variable y; % step proposition variable x_t1 = x; % resulting step variable update = 0; % update indicator - if (isinf(prev_stat)) - xstat = fit_object.eval_statistic(x;nocopy); - else - xstat = prev_stat; - % calculate the new position (utilize array operations) y = x_j + z*(x-x_j); - % try evaluating, if out of bounds, does nothing + % try evaluating, if out of bounds, does nothing TODO: This is biasing the result, check how to do this correct try { % evaluate fit function for 'y' ystat = fit_object.eval_statistic(y;nocopy); % caluculate if we accept the step based on the statistics of the % model. We assume that the statistic is given as -2 log likelihood - if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { + if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(prev_stat-ystat)/2.)) { x_t1 = y; update = 1; } } catch IsisError; - % return new walker position, proposed position, xstat, ystat - return (x_t1, update, xstat, ystat); + % return new walker position, update, new statistic + return (x_t1, update, ystat); } -private define emcee_move_stretch () { - variable settings = (_NARGS==1)? () : NULL; +public define emcee_move_stretch () { variable defaults = __move_globals(; name="stretch move", - move=&stretch_move, nrands=2, a=2. % move scaling ); - return struct_combine(struct_combine(defaults, __qualifiers), settings); % combine settings + return struct { @defaults, @__qualifiers(), __f=&stretch_move }; % combine settings } %}}}% - -% Provide moves: -EMCEE_MOVES["stretch"] = &emcee_move_stretch; diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 73a48397..b6c6bcf1 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -14,7 +14,7 @@ private define distribute_walkers (nodes, number_walkers) { return walkers_per_node; } -% get walkers per handled up to this node +% get walkers handled up to this node private define previous_number_walkers (walkers_per_node) { variable l = length(walkers_per_node); variable handled_walkers = Int_Type[l]; @@ -22,295 +22,304 @@ private define previous_number_walkers (walkers_per_node) { _for i (0, l-1, 1) { handled_walkers[i] = c; - if (i 2) + throw UsageError, sprintf("Failed parsing option '%s'", exec_string); + + variable fname = strtrim(s[0]); + variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); + if (NULL == f) + throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); + + variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; + variable call = (@f)(;; opt); + + if (typeof(call) != Struct_Type) + throw UsageError, sprintf("Function '%s' not returning valid type"); + + return call; +} + +private define setup_node (node, num_nodes, total_walkers, nrands) { + variable walkers_per_node = distribute_walkers(num_nodes, total_walkers); + variable handled_walkers = previous_number_walkers(walkers_per_node); + variable set1 = total_walkers >> 1; + variable j; + variable N = struct { + walkers, % current walker position + pivots, % current pivot positions + randoms, % required randoms + update, % updater track + stat, % step statistic + total_walkers, % number of totals walkers + walkers_per_node = walkers_per_node, % number of walkers handled by each node + handled_walkers = handled_walkers, % handled_walkers[i] = sum(walkers_per_node[[0:i-1]]) + set1 = set1, % size of set 1 + set2 = total_walkers - set1, % size of set 2 + fit = open_fit(), % the fit object, will fail if no model is loaded + num_pars = num_free_params(), % number of fit parameters + num_rands = nrands, % number of random numbers per walker + }; + if (node) { % setup for slaves + N.walkers = Array_Type[walkers_per_node[node]]; + N.pivots = Array_Type[walkers_per_node[node]]; + N.update = Int_Type[walkers_per_node[node]]; + N.stat = Double_Type[walkers_per_node[node]]; + N.randoms = Double_Type[walkers_per_node[node]*nrands]; + } else { % setup for master + N.walkers = Array_Type[total_walkers]; + N.pivots = Array_Type[total_walkers]; + N.update = Int_Type[total_walkers]; + N.stat = Double_Type[total_walkers]; + N.randoms = Double_Type[total_walkers*nrands]; + } + + _for j (0, length(N.walkers)-1, 1) { + N.walkers[j] = Double_Type[N.num_pars]; + N.pivots[j] = Double_Type[N.num_pars]; + } + + return N; +} + %}}}% %{{{% mpi functions -private define release_walkers_mpi (node, nodes, total_walkers, - nrand, walkers_per_node, handled_walkers, % <--- those are only relevant for master process - node_walkers_ref, node_pivots_ref, node_randoms_ref) { - variable node_walkers = @(node_walkers_ref); - variable node_pivots = @(node_pivots_ref); - variable node_randoms = @(node_randoms_ref); +private define release_walkers_mpi_master (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable urand = qualifier("urand", NULL); + if (NULL == urand) + throw InternalError, "No random generator given"; + variable upick = qualifier("upick", NULL); + if (NULL == upick) + throw InternalError, "No random generator given"; + +% variable set2_len = total_walkers>>1; % number of walkers in set 2 + % variable set1_len = total_walkers-set2_len; % number of walkers in set 1 + + variable all_pick1 = (@upick)(0, N.set2-1, N.set1)+N.set1; % pick pivot *for* set 1 + variable all_pick2 = (@upick)(0, N.set1-1, N.set2); % pick pivot *for* set 2 + variable pick = [all_pick1, all_pick2]; + variable this_walkers, this_pivots, this_randoms; variable i,j; - if (node == 0) { % master - variable urand = qualifier("urand", NULL); - if (NULL == urand) - throw InternalError, "No random generator given"; - variable upick = qualifier("upick", NULL); - if (NULL == upick) - throw InternalError, "No random generator given"; - - variable set2_len = total_walkers>>1; % number of walkers in set 2 - variable set1_len = total_walkers-set2_len; % number of walkers in set 1 - variable all_u = (@urand)(total_walkers*nrand); % all random numbers for the next step - variable all_pick1 = (@upick)(0, set2_len-1, set1_len); % pick for set 1 - variable all_pick2 = (@upick)(0, set1_len-1, set2_len); % pick for set 2 - variable this_walkers, this_pivots, this_randoms; - - _for i (0, nodes-1, 1) { - this_walkers = node_walkers[[0:walkers_per_node[i]-1]+handled_walkers[i]]; - this_pivots = node_walkers[[all_pick1+set1_len, all_pick2]][[0:walkers_per_node[i]-1]+handled_walkers[i]]; - this_randoms = all_u[[0:walkers_per_node[i]*nrand-1]+handled_walkers[i]*nrand]; - if (i == 0) { % set master locals, we utilize that the walkers the master handles are the first in the array - _for j (0, walkers_per_node[i]-1, 1) { - node_randoms[j] = this_randoms[j]; - node_pivots[j] = this_pivots[j]; - } - } -#ifexists rcl_mpi_init - else { - _for j (0, length(this_walkers)-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j], length(this_walkers[j]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j], length(this_walkers[j]), i, 1); % send pivots from other set with tag 1 - } - () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 3 - } -#endif + _for i (0, length(N.walkers)-1, 1) + N.pivots[i] = N.walkers[pick[i]]; % get the pivot points + N.randoms[*] = (@urand)(length(N.walkers)*N.num_rands); % get new random numbers + + _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data + % set the walkers for node i + this_walkers = N.walkers[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]]; + % pick the pivots for node i + this_pivots = N.pivots[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]]; + % set the randoms for node i + this_randoms = N.randoms[[0:N.walkers_per_node[i]*N.num_rands-1]+N.handled_walkers[i]*N.num_rands]; + _for j (0, length(this_walkers)-1, 1) { + () = rcl_mpi_org_isend_double(this_walkers[j], length(this_walkers[j]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j], length(this_walkers[j]), i, 1); % send pivots from other set with tag 1 } -#ifexists rcl_mpi_init - } else { - _for j (0, length(node_walkers)-1, 1) { - () = rcl_mpi_org_recv_double(node_walkers[j], length(node_walkers[j]), 0, 0); - () = rcl_mpi_org_recv_double(node_pivots[j], length(node_pivots[j]), 0, 1); - } - () = rcl_mpi_org_recv_double(node_randoms, length(node_randoms), 0, 2); + () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 2 + } #endif +} + +private define release_walkers_mpi_slave (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable j; + _for j (0, length(N.walkers)-1, 1) { + () = rcl_mpi_org_recv_double(N.walkers[j], length(N.walkers[j]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(N.pivots[j], length(N.pivots[j]), 0, 1); % receive pivot points (tag 1) } + () = rcl_mpi_org_recv_double(N.randoms, length(N.randoms), 0, 2); % receive random numbers (tag 2) +#endif } -private define catch_walkers_mpi (node, nodes, walkers_ref, update_ref, stat_ref, prev_stat_ref, - walkers_per_node, handled_walkers) { % <--- only relevant for master - variable walkers = @walkers_ref; - variable update = @update_ref; - variable stat = @stat_ref; - variable prev_stat = @prev_stat_ref; - variable i,j; - variable npar = length(walkers[0]); % walkers are all equal +private define release_walkers_mpi (node, num_nodes, N) { + % distribute walkers, pivots and random numbers - variable stat_part; - variable update_part; + if (node) + release_walkers_mpi_slave(node, num_nodes, N); + else + release_walkers_mpi_master(node, num_nodes, N;; __qualifiers()); +} +private define catch_walkers_mpi_master (node, num_nodes, N) +{ #ifexists rcl_mpi_init - if (node == 0) { % master, collect all walkers - _for i (1, nodes-1, 1) { - stat_part = Double_Type[walkers_per_node[i]]; - update_part = Int_Type[walkers_per_node[i]]; - _for j (0, walkers_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(walkers[handled_walkers[i]+j], npar, i, i); - () = rcl_mpi_org_recv_int(update_part, walkers_per_node[i], i, i); - update[[0:walkers_per_node[i]-1]+handled_walkers[i]] = update_part; - () = rcl_mpi_org_recv_double(stat_part, walkers_per_node[i], i, i); - stat[[0:walkers_per_node[i]-1]+handled_walkers[i]] = stat_part; - () = rcl_mpi_org_recv_double(stat_part, walkers_per_node[i], i, i); - prev_stat[[0:walkers_per_node[i]-1]+handled_walkers[i]] = stat_part; - } - } else { - _for j (0, length(walkers)-1, 1) - () = rcl_mpi_org_isend_double(walkers[j], npar, 0, node); - () = rcl_mpi_org_isend_int(update, length(update), 0, node); - () = rcl_mpi_org_isend_double(stat, length(stat), 0, node); - () = rcl_mpi_org_isend_double(prev_stat, length(prev_stat), 0, node); + variable i,j; + variable this_stat, this_update; % we have to use intermediate storage, slang creates a copy of an array when addressed by index + _for i (1, num_nodes-1, 1) { + this_stat = Double_Type[N.walkers_per_node[i]]; + this_update = Int_Type[N.walkers_per_node[i]]; + _for j (0, N.walkers_per_node[i]-1, 1) + () = rcl_mpi_org_recv_double(N.walkers[N.handled_walkers[i]+j], length(N.walkers[0]), i, i); + () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); + () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); + N.update[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]] = this_update; + N.stat[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]] = this_stat; } #endif } +private define catch_walkers_mpi_slave (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable i; + _for i (0, length(N.walkers)-1, 1) + () = rcl_mpi_org_isend_double(N.walkers[i], length(N.walkers[0]), 0, node); + () = rcl_mpi_org_isend_int(N.update, length(N.update), 0, node); + () = rcl_mpi_org_isend_double(N.stat, length(N.stat), 0, node); +#endif +} + +private define catch_walkers_mpi (node, num_nodes, N) +{ + if (node) + catch_walkers_mpi_slave(node, num_nodes, N); + else + catch_walkers_mpi_master(node, num_nodes, N); +} + private define emcee_mpi (walker_per_par, number_par, steps) { variable total_walkers = walker_per_par*number_par; variable init = qualifier("init", NULL); variable move = qualifier("move", NULL); variable urand = qualifier("urand", NULL); variable upick = qualifier("upick", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write_hook", NULL); - variable sloppy = qualifier("sloppy", NULL); variable cont = qualifier("continue", NULL); - variable init_walkers; - variable is_cont = 0; - if (cont != NULL) { - is_cont = 1; - output = cont; - } + variable output = qualifier("output", NULL); + variable io = qualifier("write", NULL); + variable load_hook = qualifier("read", NULL); + variable load = qualifier("load", NULL); + + variable node, num_nodes; #ifexists rcl_mpi_init - variable node = rcl_mpi_init(); - variable nodes = rcl_mpi_numtasks(); - rcl_init_mpi_request(nodes); % this is a stupid memory leak!!!!!!!!! BUT: since we have a memory leak from the mpi module anyway we accept it currently ... + node = rcl_mpi_init(); + num_nodes = rcl_mpi_numtasks(); + num_nodes = (num_nodes<1) ? 1 : num_nodes; + rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! BUT: since we have a memory leak from the mpi module anyway we accept it currently ... #else - variable node = 0; - variable nodes = 1; + node = 0; + num_nodes = 1; #endif - if (NULL == init || NULL == move || NULL == urand || NULL == upick) - throw InternalError, "Initialization failed"; - - if (io.all || node == 0) { - init_walkers = io.open(output, total_walkers, cont == NULL, sloppy); - if (NULL == io.handle) - throw IOError, (NULL == io.msg) ? "" : io.msg; - } - % master only variables - variable walkers_per_node; - variable handled_walkers; - variable sort; - variable new_pos, update_pos, prev_stat, new_stat; % only first step runs eval_statistic twice (if prev_stat[i] == Inf) - % if the fit statistic evaluates to Inf this also happens, but this should not be the case - % for any good statistical function - variable tmp; - variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); - variable node_walker_array_len = (node == 0) ? total_walkers : node_walkers_len; - variable npar = num_free_params(); - - variable node_walkers; % send & recv (differ between master and slaves) - variable node_update; % recv (differ between master and slaves) - variable node_pivots = Array_Type[node_walkers_len]; % send (all the same) - variable node_randoms = Double_Type[node_walkers_len*move.nrands]; % send (all the same) - variable node_stat; % recv (differ between master and slaves) - variable node_prev_stat; % recv (differ between master and slaves) - - variable s; - variable fit_handle = open_fit(); % if no model is loaded this will crash - variable collector_len = 0; - if (io.all || node == 0) - collector_len = node_walker_array_len*io.cycle; - variable walker_cycle; % enough to collect the walkers for one cycle - variable update_cycle; - variable stat_cycle; - variable prev_stat_cycle; - variable cycle_step = 0; + move = emcee_call_setup_fun(move, "move"); + variable this = setup_node(node, num_nodes, total_walkers, move.nrands); - _for tmp (0, node_walkers_len-1, 1) - node_pivots[tmp] = Double_Type[npar]; + % read the settings, if one of them is NULL help was called + io = emcee_call_setup_fun(io, "io"); + load_hook = emcee_call_setup_fun(load_hook, "io"); + init = emcee_call_setup_fun(init, "init"); + if (NULL == io || NULL == load_hook || NULL == init) return; - if (node == 0) { % setup master things + ifnot (node) { % master only if (cont != NULL) - node_walkers = init_walkers; - else - node_walkers = init.init(total_walkers); % initialize the walkers & other collectors - node_update = Int_Type[total_walkers]; - node_stat = Double_Type[total_walkers]+_Inf; - node_prev_stat = Double_Type[total_walkers]+_Inf; - - sort = array_sort(rand_uniform(total_walkers)); - node_walkers = node_walkers[sort]; % randomize them to be on the safe side, init may introduce bias - walkers_per_node = distribute_walkers(nodes, total_walkers); - handled_walkers = previous_number_walkers(walkers_per_node); - } else { - walkers_per_node = NULL; - handled_walkers = NULL; - - node_walkers = Array_Type[node_walkers_len]; - node_update = Int_Type[node_walkers_len]; - node_stat = Double_Type[node_walkers_len]+_Inf; - node_prev_stat = Double_Type[node_walkers_len]+_Inf; - - _for tmp (0, node_walkers_len-1, 1) - node_walkers[tmp] = Double_Type[npar]; + io.__f_open(cont, this.walkers); + else if (load != NULL) { + load_hook.__f_read(load, this.walkers); + load_hook.__f_close(); + io.__f_create(output, this.fit, total_walkers); + } else { + io.__f_create(output, this.fit, total_walkers); + init.__f(this.walkers, this.fit); + } } - if (io.all || node==0) { % setup collector and write initial - walker_cycle = Array_Type[collector_len]; % enough to collect the walkers for one cycle - update_cycle = Int_Type[collector_len]; - stat_cycle = Double_Type[collector_len]+_Inf; - prev_stat_cycle = Double_Type[collector_len]+_Inf; + variable collector_length; % the collector so we can skip turns before writing to disk + variable walker_cycle; + variable update_cycle; + variable stat_cycle; + variable cycle_step = 0; - _for tmp (0, length(node_walkers)-1, 1) - walker_cycle[tmp] = node_walkers[tmp]; + % setup space + variable j; + if (node) % slave + collector_length = 0; + else % master + collector_length = length(this.walkers)*io.cycle; + + walker_cycle = Array_Type[collector_length]; + update_cycle = Int_Type[collector_length]; + stat_cycle = Double_Type[collector_length]; + + if (NULL == urand || NULL == upick) + throw InternalError, "Missing random number generator"; + + % evaluate the model at the walker positions to get the statistics + % and if not continuing a chain, write them out + _for j (0, length(this.walkers)-1, 1) { + this.stat[j] = this.fit.eval_statistic(this.walkers[j]); + this.update[j] = 1; } + if ((cont == NULL) && (node == 0)) % only master is writing the initials + io.__f_write(this.fit, this.walkers, this.update, this.stat); % the main loop where the magic happens + variable s, walker, update, stat; _for s (1, steps, 1) { cycle_step = s mod io.cycle; - release_walkers_mpi(node, nodes, total_walkers, move.nrands, walkers_per_node, handled_walkers, % release walkers to freedom ... - &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); - _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... - (new_pos, update_pos, prev_stat, new_stat) = - move.move(fit_handle, - node_walkers[tmp], - node_pivots[tmp], - node_randoms[[0:move.nrands-1]+move.nrands*tmp], - node_prev_stat[tmp]); - node_walkers[tmp] = new_pos; - node_update[tmp] = update_pos; - node_prev_stat[tmp] = prev_stat; - node_stat[tmp] = new_stat; + release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... + _for j (0, this.walkers_per_node[node]-1, 1) { % ... let them move ... + (walker, update, stat) = move.__f(this.fit, this.walkers[j], this.pivots[j], + this.randoms[[0:this.num_rands-1]+j*this.num_rands], this.stat[j]); + this.walkers[j] = walker; + this.update[j] = update; + this.stat[j] = stat; } - catch_walkers_mpi(node, nodes, &node_walkers, &node_update, &node_stat, &node_prev_stat, - walkers_per_node, handled_walkers); % ... and catch 'em! - - if (io.all || node == 0) { - ifnot (cycle_step) { - if (cont != NULL) { % if we continue do not write the initial step as they will be doubled - io.write(walker_cycle[[node_walker_array_len:]], - update_cycle[[node_walker_array_len:]], - stat_cycle[[node_walker_array_len:]], - prev_stat_cycle[[node_walker_array_len:]]); - cont = NULL; - } else - io.write(walker_cycle, update_cycle, stat_cycle, prev_stat_cycle); - } - - _for tmp (0, node_walker_array_len-1, 1) { - walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; - update_cycle[tmp+cycle_step*node_walker_array_len] = node_update[tmp]; - stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; - prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; + catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! + + % if cycle end is reached write the chain + ifnot (node) { % master only + ifnot (cycle_step) + io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); + _for j (0, length(this.walkers)-1, 1) { + walker_cycle[j+(cycle_step-1)*length(this.walkers)] = @(this.walkers[j]); + update_cycle[j+(cycle_step-1)*length(this.walkers)] = this.update[j]; + stat_cycle[j+(cycle_step-1)*length(this.walkers)] = this.stat[j]; } } } - % write missing pieces - variable write_initial = (steps < io.cycle) && (cont == NULL); % if we have not written out any cycle, we have to treat the init walkers special - variable skip_load = (cont != NULL); - if (io.all || node == 0) { + % we might have unwritten steps left, so better write them here + ifnot (node) { if (cycle_step) - io.write(walker_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], - update_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], - stat_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], - prev_stat_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]]); - - io.close(steps-is_cont, node_walker_array_len, walker_per_par, number_par); + io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], + update_cycle[[:cycle_step*length(this.walkers)-1]], + stat_cycle[[:cycle_step*length(this.walkers)-1]]); } -} -%}}}% - -define emcee_get (s) { - variable split = strchop(s, '/', 0); - if (split[0] == "move" || split[0] == "moves") { - if (length(split)==1) return emcee_get_moves(); - else if (any(emcee_get_moves() == split[1])) return emcee_get_move(split[1];; __qualifiers()); - else vmessage("*** unknown move: '%s'", split[1]); - } else if(split[0] == "init" || split[0] == "inits") { - if (length(split)==1) return emcee_get_inits(); - else if (any(emcee_get_inits() == split[1])) return emcee_get_init(split[1];; __qualifiers()); - else vmessage("*** unknown init: '%s'", split[1]); - } else if (split[0] == "io" || split[0] == "ios") { - if (length(split)==1) return emcee_get_ios(); - else if (any(emcee_get_moves() == split[1])) return emcee_get_io(split[1];; __qualifiers()); - else vmessage("*** unknown io: '%s'", split[1]); - } else { - vmessage("*** unknown target: '%s'", s); + % and finally call the finalizing function + ifnot (node) { % master only + io.__f_finalize(steps, walker_per_par, number_par, this.fit); + io.__f_close(); } - return NULL; } +%}}}% define emcee_new (walkers_per_par, steps) { - variable move = qualifier("move", emcee_get_move("stretch")); % defined move + variable move = qualifier("move", "stretch"); % defined move variable urand = qualifier("urand", &rand_uniform); % double random generator variable upick = qualifier("upick", &rand_int); % int random generator - variable init = qualifier("init", emcee_get_init("uniform")); % initialization function - variable write_hook = qualifier("io", emcee_get_io("fits")); % output routine + variable init = qualifier("init", "uniform"); % initialization function + variable load = qualifier("load", NULL); % initialize from file + variable read_hook = qualifier("read", "fits"); + variable io = qualifier("write", "fits"); % output routine variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); - variable sloppy = qualifier("sloppy", 0); variable cont = qualifier("continue", NULL); if (NULL == get_fit_fun()) @@ -325,7 +334,7 @@ define emcee_new (walkers_per_par, steps) { throw UsageError, "Unable to create ensemble for this large number of walkers"; emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, - init=init, write_hook=write_hook, output=output, sloppy=sloppy, continue=cont); + init=init, output=output, continue=cont, write=io, load=load, read=read_hook); #ifexists rcl_mpi_init rcl_mpi_finalize(); #endif -- GitLab From a1cface0110ceeb1bf22b0deecb3812c1742d3e0 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 4 May 2020 23:30:25 +0200 Subject: [PATCH 22/89] Some minor code restructure & help for sub-functions --- src/fitting/ensemble-samplers/emcee-io.sl | 4 +++ src/fitting/ensemble-samplers/emcee-moves.sl | 4 +++ src/fitting/ensemble-samplers/emcee.sl | 30 +++++++++----------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index 684241bf..8e433788 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -239,6 +239,10 @@ private define emcee_close_chain_fits (io) { } public define emcee_io_fits () { + if (qualifier_exists("help")) { + help("emcee_init_uniform"); + return NULL; + } variable settings = (_NARGS==1) ? () : NULL; variable defaults = __io_globals(; name = "io fits", diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl index c1a933f6..5dc52fa4 100644 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -63,6 +63,10 @@ private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { } public define emcee_move_stretch () { + if (qualifier_exists("help")) { + help("emcee_init_uniform"); + return NULL; + } variable defaults = __move_globals(; name="stretch move", nrands=2, diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index b6c6bcf1..820dcbb9 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -43,12 +43,7 @@ private define emcee_call_setup_fun (exec_string, type) % string similar to stat throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; - variable call = (@f)(;; opt); - - if (typeof(call) != Struct_Type) - throw UsageError, sprintf("Function '%s' not returning valid type"); - - return call; + return (@f)(;; opt); } private define setup_node (node, num_nodes, total_walkers, nrands) { @@ -312,15 +307,17 @@ private define emcee_mpi (walker_per_par, number_par, steps) { %}}}% define emcee_new (walkers_per_par, steps) { - variable move = qualifier("move", "stretch"); % defined move - variable urand = qualifier("urand", &rand_uniform); % double random generator - variable upick = qualifier("upick", &rand_int); % int random generator - variable init = qualifier("init", "uniform"); % initialization function - variable load = qualifier("load", NULL); % initialize from file - variable read_hook = qualifier("read", "fits"); - variable io = qualifier("write", "fits"); % output routine - variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); - variable cont = qualifier("continue", NULL); + variable qs = struct { + move = "stretch", % defined move + urand = &rand_uniform, % double random generator + upick = &rand_int, % int random generator + init = "uniform", % initialization function + load = NULL, % initialize from file + read = "fits", % specifier for read + write = "fits", % specifier for write + output = strftime("%Y%m%d-%H%M%S_mcmc_chain.fits"), % output file + continue = NULL, % continue file + }; if (NULL == get_fit_fun()) throw UsageError, "No fit function loaded"; @@ -333,8 +330,7 @@ define emcee_new (walkers_per_par, steps) { if (total_walkers > ((1<<29)-1)) throw UsageError, "Unable to create ensemble for this large number of walkers"; - emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, - init=init, output=output, continue=cont, write=io, load=load, read=read_hook); + emcee_mpi(walkers_per_par, num_free_params(), steps;; struct { @qs, @__qualifiers() }); #ifexists rcl_mpi_init rcl_mpi_finalize(); #endif -- GitLab From a8b4e310e7421a5912b29cad13a11b15341321c1 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 5 May 2020 17:28:17 +0200 Subject: [PATCH 23/89] Fix write call Indexing was wrong, write function was trying to write uninitialized array entires. --- src/fitting/ensemble-samplers/emcee.sl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 820dcbb9..b47cb27a 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -266,7 +266,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { % the main loop where the magic happens variable s, walker, update, stat; - _for s (1, steps, 1) { + _for s (0, steps-1, 1) { cycle_step = s mod io.cycle; release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... _for j (0, this.walkers_per_node[node]-1, 1) { % ... let them move ... @@ -280,12 +280,12 @@ private define emcee_mpi (walker_per_par, number_par, steps) { % if cycle end is reached write the chain ifnot (node) { % master only - ifnot (cycle_step) + if (not cycle_step && s > 0) io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); _for j (0, length(this.walkers)-1, 1) { - walker_cycle[j+(cycle_step-1)*length(this.walkers)] = @(this.walkers[j]); - update_cycle[j+(cycle_step-1)*length(this.walkers)] = this.update[j]; - stat_cycle[j+(cycle_step-1)*length(this.walkers)] = this.stat[j]; + walker_cycle[j+cycle_step*length(this.walkers)] = @(this.walkers[j]); + update_cycle[j+cycle_step*length(this.walkers)] = this.update[j]; + stat_cycle[j+cycle_step*length(this.walkers)] = this.stat[j]; } } } -- GitLab From 9fc1b571c4863438e256738ad36ed3e4c4b64b2b Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 12 May 2020 09:50:48 +0200 Subject: [PATCH 24/89] Fix eval_statistic call from slaves Wrong if clause caused slaves to try to evaluate fit statistic on not initialized values. --- src/fitting/ensemble-samplers/emcee.sl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index b47cb27a..324779b6 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -257,12 +257,14 @@ private define emcee_mpi (walker_per_par, number_par, steps) { % evaluate the model at the walker positions to get the statistics % and if not continuing a chain, write them out - _for j (0, length(this.walkers)-1, 1) { - this.stat[j] = this.fit.eval_statistic(this.walkers[j]); - this.update[j] = 1; + ifnot (node) { + _for j (0, length(this.walkers)-1, 1) { + this.stat[j] = this.fit.eval_statistic(this.walkers[j]); + this.update[j] = 1; + } + if (cont == NULL) + io.__f_write(this.fit, this.walkers, this.update, this.stat); } - if ((cont == NULL) && (node == 0)) % only master is writing the initials - io.__f_write(this.fit, this.walkers, this.update, this.stat); % the main loop where the magic happens variable s, walker, update, stat; -- GitLab From 5d8be749b561fe56386ebda8010a68af7748c6ed Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 24 Jun 2020 11:00:45 +0200 Subject: [PATCH 25/89] True seperate sets --- src/fitting/ensemble-samplers/emcee.sl | 192 +++++++++++++++++-------- 1 file changed, 133 insertions(+), 59 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 324779b6..5e41b6f2 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -8,24 +8,30 @@ private define distribute_walkers (nodes, number_walkers) { variable walkers_per_node = Int_Type[nodes]; variable n = number_walkers/nodes + 1; variable missing = nodes - (number_walkers mod nodes); + variable set1_per_node, set2_per_node; walkers_per_node[[0:nodes-missing-1]] = n; walkers_per_node[[nodes-missing:nodes-1]] = n-1; - return walkers_per_node; + set2_per_node = walkers_per_node >> 1; % half walkers handled by each node per set + set1_per_node = walkers_per_node - set2_per_node; + return set1_per_node, set2_per_node; } % get walkers handled up to this node -private define previous_number_walkers (walkers_per_node) { - variable l = length(walkers_per_node); - variable handled_walkers = Int_Type[l]; - variable i, c = 0; +private define previous_number_walkers (set1_per_node, set2_per_node) { + variable l = length(set1_per_node); + variable set1_handled = Int_Type[l]; + variable set2_handled = Int_Type[l]; + variable i, c1 = 0, c2 = 0; _for i (0, l-1, 1) { - handled_walkers[i] = c; - c += walkers_per_node[i]; + set1_handled = c1; + set2_handled = c2; + c1 += set1_handled[i]; + c2 += set2_handled[i]; } - return handled_walkers; + return set1_handled, set2_handled; } private define emcee_call_setup_fun (exec_string, type) % string similar to statistic or fit method definitions "name; option1=foo, option2=bar" @@ -47,31 +53,39 @@ private define emcee_call_setup_fun (exec_string, type) % string similar to stat } private define setup_node (node, num_nodes, total_walkers, nrands) { - variable walkers_per_node = distribute_walkers(num_nodes, total_walkers); - variable handled_walkers = previous_number_walkers(walkers_per_node); - variable set1 = total_walkers >> 1; + variable set1_per_node, set2_per_node; + variable set1_handled, set2_handled; + variable set2_len = total_walkers >> 1; % split walker in two sets + variable set1_len = total_walkers - set2_len; variable j; + (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); + (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); + variable N = struct { walkers, % current walker position - pivots, % current pivot positions + pivots, % current pivot positions, should not change for one loop! randoms, % required randoms update, % updater track stat, % step statistic total_walkers, % number of totals walkers - walkers_per_node = walkers_per_node, % number of walkers handled by each node - handled_walkers = handled_walkers, % handled_walkers[i] = sum(walkers_per_node[[0:i-1]]) - set1 = set1, % size of set 1 - set2 = total_walkers - set1, % size of set 2 + set1 = set1_len, % walkers for 1st update + set2 = set2_len, % walkers set for 2nd update + set1_per_node = set1_per_node, % walkers of set1 processed per node + set2_per_node = set2_per_node, % walkers of set2 processed per node + set1_handled = set1_handled, % walkers of set1 handled by all previous nodes + set2_handled = set2_handled, % walkers of set2 handled by all previous nodes + set = 0, % indicates which set to update (1: first set, 2: second set) fit = open_fit(), % the fit object, will fail if no model is loaded num_pars = num_free_params(), % number of fit parameters num_rands = nrands, % number of random numbers per walker }; + if (node) { % setup for slaves - N.walkers = Array_Type[walkers_per_node[node]]; - N.pivots = Array_Type[walkers_per_node[node]]; - N.update = Int_Type[walkers_per_node[node]]; - N.stat = Double_Type[walkers_per_node[node]]; - N.randoms = Double_Type[walkers_per_node[node]*nrands]; + N.walkers = Array_Type[set1_per_node[node]+set2_per_node[node]]; + N.pivots = Array_Type[set1_per_node[node]+set2_per_node[node]]; + N.update = Int_Type[set1_per_node[node]+set2_per_node[node]]; + N.stat = Double_Type[set1_per_node[node]+set2_per_node[node]]; + N.randoms = Double_Type[(set1_per_node[node]+set2_per_node[node])*nrands]; } else { % setup for master N.walkers = Array_Type[total_walkers]; N.pivots = Array_Type[total_walkers]; @@ -102,31 +116,45 @@ private define release_walkers_mpi_master (node, num_nodes, N) if (NULL == upick) throw InternalError, "No random generator given"; -% variable set2_len = total_walkers>>1; % number of walkers in set 2 - % variable set1_len = total_walkers-set2_len; % number of walkers in set 1 + % select pivot walkers for current set + variable set_len, complement_len, len_offset; + variable set_per_node, set_handled; + variable pick; + if (1 == N.set) { + set_len = N.set1; + complement_len = N.set2; + len_offset = 0; + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; + } else if (2 == N.set) { + set_len = N.set2; + complement_len = N.set1; + len_offset = N.set1; + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; + } + pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set - variable all_pick1 = (@upick)(0, N.set2-1, N.set1)+N.set1; % pick pivot *for* set 1 - variable all_pick2 = (@upick)(0, N.set1-1, N.set2); % pick pivot *for* set 2 - variable pick = [all_pick1, all_pick2]; variable this_walkers, this_pivots, this_randoms; variable i,j; - _for i (0, length(N.walkers)-1, 1) - N.pivots[i] = N.walkers[pick[i]]; % get the pivot points - N.randoms[*] = (@urand)(length(N.walkers)*N.num_rands); % get new random numbers + % set current walkers and complement pivots + _for i (0, set_len-1, 1) + N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = (@urand)(set_len*N.num_rands); % get new random numbers for current set _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data % set the walkers for node i - this_walkers = N.walkers[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]]; + this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; % pick the pivots for node i - this_pivots = N.pivots[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]]; + this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; % set the randoms for node i - this_randoms = N.randoms[[0:N.walkers_per_node[i]*N.num_rands-1]+N.handled_walkers[i]*N.num_rands]; - _for j (0, length(this_walkers)-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j], length(this_walkers[j]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j], length(this_walkers[j]), i, 1); % send pivots from other set with tag 1 + this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1]+(set_handled[i]+len_offset)*N.num_rands]; + _for j (0, set_len-1, 1) { + () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 } - () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 2 + () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands], set_len*N.num_rands, i, 2); % send random numbers with tag 2 } #endif } @@ -134,12 +162,24 @@ private define release_walkers_mpi_master (node, num_nodes, N) private define release_walkers_mpi_slave (node, num_nodes, N) { #ifexists rcl_mpi_init + variable set_len, len_offset; + variable this_randoms; + if (1 == N.set) { + set_len = N.set1_per_node; + len_offset = 0; + } else if (2 == N.set) { + set_len = N.set2_per_node; + len_offset = N.set1_per_node[node]; + } + variable j; - _for j (0, length(N.walkers)-1, 1) { - () = rcl_mpi_org_recv_double(N.walkers[j], length(N.walkers[j]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(N.pivots[j], length(N.pivots[j]), 0, 1); % receive pivot points (tag 1) + this_randoms = Double_Type[set_len*N.num_rands]; + _for j (0, set_len-1, 1) { + () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) } - () = rcl_mpi_org_recv_double(N.randoms, length(N.randoms), 0, 2); % receive random numbers (tag 2) + () = rcl_mpi_org_recv_double(this_randoms, set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; #endif } @@ -155,17 +195,28 @@ private define release_walkers_mpi (node, num_nodes, N) { private define catch_walkers_mpi_master (node, num_nodes, N) { #ifexists rcl_mpi_init + variable set_len, len_offset, set_per_node, set_handled; + if (1 == N.set) { + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; + len_offset = 0; + } else if (2 == N.set) { + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; + len_offset = N.set1; + } + variable i,j; variable this_stat, this_update; % we have to use intermediate storage, slang creates a copy of an array when addressed by index _for i (1, num_nodes-1, 1) { - this_stat = Double_Type[N.walkers_per_node[i]]; - this_update = Int_Type[N.walkers_per_node[i]]; - _for j (0, N.walkers_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(N.walkers[N.handled_walkers[i]+j], length(N.walkers[0]), i, i); + this_stat = Double_Type[set_per_node[i]]; + this_update = Int_Type[set_per_node[i]]; + _for j (0, set_per_node[i]-1, 1) + () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], length(N.walkers[0]), i, i); () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); - N.update[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]] = this_update; - N.stat[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]] = this_stat; + N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; + N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; } #endif } @@ -173,11 +224,20 @@ private define catch_walkers_mpi_master (node, num_nodes, N) private define catch_walkers_mpi_slave (node, num_nodes, N) { #ifexists rcl_mpi_init + variable set_len, len_offset; + if (1 == N.set) { + set_len = N.set1_per_node[node]; + len_offset = 0; + } else if (2 == N.set) { + set_len = N.set2_per_node[node]; + len_offset = N.set1_per_node[node]; + } + variable i; - _for i (0, length(N.walkers)-1, 1) - () = rcl_mpi_org_isend_double(N.walkers[i], length(N.walkers[0]), 0, node); - () = rcl_mpi_org_isend_int(N.update, length(N.update), 0, node); - () = rcl_mpi_org_isend_double(N.stat, length(N.stat), 0, node); + _for i (0, set_len-1, 1) + () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], length(N.walkers[len_offset]), 0, node); + () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); + () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); #endif } @@ -267,18 +327,32 @@ private define emcee_mpi (walker_per_par, number_par, steps) { } % the main loop where the magic happens - variable s, walker, update, stat; + variable s, walker, update, stat, set, set_len, len_offset; _for s (0, steps-1, 1) { cycle_step = s mod io.cycle; - release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... - _for j (0, this.walkers_per_node[node]-1, 1) { % ... let them move ... - (walker, update, stat) = move.__f(this.fit, this.walkers[j], this.pivots[j], - this.randoms[[0:this.num_rands-1]+j*this.num_rands], this.stat[j]); - this.walkers[j] = walker; - this.update[j] = update; - this.stat[j] = stat; + _for set (1, 2, 1) { + this.set = set; + + release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... + + if (1 == set) { + set_len = this.set1_per_node[node]; + len_offset = 0; + } else if (2 == set) { + set_len = this.set2_per_node[node]; + len_offset = this.set1_per_node[node]; + } + + _for j (0, set_len-1, 1) { % ... let them move ... + (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], this.pivots[j+len_offset], + this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], this.stat[j+len_offset]); + this.walkers[j+len_offset] = walker; + this.update[j+len_offset] = update; + this.stat[j+len_offset] = stat; + } + + catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! } - catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! % if cycle end is reached write the chain ifnot (node) { % master only -- GitLab From 5016d87356accd4b9925d4e0df35e78d06f270cb Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 29 Jun 2020 13:03:28 +0200 Subject: [PATCH 26/89] Fix statistic bug in stretch_move Stretch move returned wrong statistic when rejecting the step. This caused a wired bias in the resulting distribution. Kudos to Philipp T. finding this! --- src/fitting/ensemble-samplers/emcee-moves.sl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl index 5dc52fa4..145c5027 100644 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -37,7 +37,7 @@ private define inverse_cdf (u, a) { % Move must evaluate the fit function private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { variable z = inverse_cdf(u[0], move.a); - variable ystat = -1e32; % from mikes code + variable ystat, ret_stat = prev_stat; variable y; % step proposition variable x_t1 = x; % resulting step variable update = 0; % update indicator @@ -53,13 +53,14 @@ private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { % caluculate if we accept the step based on the statistics of the % model. We assume that the statistic is given as -2 log likelihood if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(prev_stat-ystat)/2.)) { + ret_stat = ystat; x_t1 = y; update = 1; } } catch IsisError; % return new walker position, update, new statistic - return (x_t1, update, ystat); + return (x_t1, update, ret_stat); } public define emcee_move_stretch () { -- GitLab From 1fa0b2b5bbbe9f7ef15dc97e6f097a4eeecb02ef Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 29 Jun 2020 19:30:07 +0200 Subject: [PATCH 27/89] Fix missing argument when continuing chain call to io.__fopen was missing the fit handle argument --- src/fitting/ensemble-samplers/emcee.sl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 5e41b6f2..ee3c194e 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -284,7 +284,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { ifnot (node) { % master only if (cont != NULL) - io.__f_open(cont, this.walkers); + io.__f_open(cont, this.fit, this.walkers); else if (load != NULL) { load_hook.__f_read(load, this.walkers); load_hook.__f_close(); -- GitLab From 09e102d5b8fc47cb5f095c5df6bd6c915a0c30fa Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sun, 5 Jul 2020 12:32:12 +0200 Subject: [PATCH 28/89] Fix missing step write at the end of chain Index confusion lead to missing step write causing a index error when trying to continue a chain. --- src/fitting/ensemble-samplers/emcee.sl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index ee3c194e..9de50f2a 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -367,11 +367,11 @@ private define emcee_mpi (walker_per_par, number_par, steps) { } % we might have unwritten steps left, so better write them here + cycle_step++; % the last step is never written in the loop, we have to deal with it here ifnot (node) { - if (cycle_step) - io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], - update_cycle[[:cycle_step*length(this.walkers)-1]], - stat_cycle[[:cycle_step*length(this.walkers)-1]]); + io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], + update_cycle[[:cycle_step*length(this.walkers)-1]], + stat_cycle[[:cycle_step*length(this.walkers)-1]]); } % and finally call the finalizing function @@ -408,6 +408,6 @@ define emcee_new (walkers_per_par, steps) { emcee_mpi(walkers_per_par, num_free_params(), steps;; struct { @qs, @__qualifiers() }); #ifexists rcl_mpi_init - rcl_mpi_finalize(); +% rcl_mpi_finalize(); #endif } -- GitLab From 5948fc01f6552648c35a9558036248a6958d9264 Mon Sep 17 00:00:00 2001 From: Philipp Thalhammer Date: Wed, 8 Jul 2020 23:35:05 +0200 Subject: [PATCH 29/89] Fix missing statistic of last step Changed emcee_finalize_chain_fits to be able to deal with the varying number of written steps for new and continued chains. Also adjusted the value of NSTEPS that is written to the header accordingly. --- src/fitting/ensemble-samplers/emcee-io.sl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index 8e433788..b1357255 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -200,27 +200,29 @@ private define emcee_write_chain_fits (io, fit_handle, walkers_cycle, update_cyc private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter, fit_handle) { variable tmp, tmp2; variable reread; + variable written_steps; variable total_walkers = walker_per_parameter*number_parameter; variable collen = fits_get_num_rows(io.handle); % length of chain variable all_steps = fits_read_key(io.handle, "NSTEPS"); - all_steps = (all_steps<0) ? steps : all_steps + steps; - fits_update_key(io.handle, "NSTEPS", all_steps); fits_update_key(io.handle, "NWALKERS", walker_per_parameter); fits_update_key(io.handle, "NFREEPAR", number_parameter); () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], io.num_steps+1, collen-io.num_steps, &reread); + written_steps = length(reread[0])/total_walkers; % need not be equal to steps + all_steps = (all_steps<0) ? written_steps : all_steps + written_steps; + fits_update_key(io.handle, "NSTEPS", all_steps); () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); collen = fits_get_num_rows(io.handle); % read length of chain summary variable j; - variable frac_update = Double_Type[steps]; - variable min_stat = Double_Type[steps]; - variable med_stat = Double_Type[steps]; - variable max_stat = Double_Type[steps]; + variable frac_update = Double_Type[written_steps]; + variable min_stat = Double_Type[written_steps]; + variable med_stat = Double_Type[written_steps]; + variable max_stat = Double_Type[written_steps]; variable step_stat = reread[1]; variable step_update = reread[0]; - _for j (0, steps-1, 1) { + _for j (0, written_steps-1, 1) { frac_update[j] = sum(step_update[[0:total_walkers-1]+j*total_walkers])/total_walkers; tmp2 = step_stat[[0:total_walkers-1]+j*total_walkers]; min_stat[j] = min(tmp2); -- GitLab From cc115014e830996ac17e9547b6c90bbee59fdcc0 Mon Sep 17 00:00:00 2001 From: Philipp Thalhammer Date: Mon, 13 Jul 2020 13:29:37 +0200 Subject: [PATCH 30/89] Revert "Fix missing statistic of last step" This reverts commit c691bdbdff21c63064a5a52ef8a8bf11120a9122. --- src/fitting/ensemble-samplers/emcee-io.sl | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index b1357255..8e433788 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -200,29 +200,27 @@ private define emcee_write_chain_fits (io, fit_handle, walkers_cycle, update_cyc private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter, fit_handle) { variable tmp, tmp2; variable reread; - variable written_steps; variable total_walkers = walker_per_parameter*number_parameter; variable collen = fits_get_num_rows(io.handle); % length of chain variable all_steps = fits_read_key(io.handle, "NSTEPS"); + all_steps = (all_steps<0) ? steps : all_steps + steps; + fits_update_key(io.handle, "NSTEPS", all_steps); fits_update_key(io.handle, "NWALKERS", walker_per_parameter); fits_update_key(io.handle, "NFREEPAR", number_parameter); () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], io.num_steps+1, collen-io.num_steps, &reread); - written_steps = length(reread[0])/total_walkers; % need not be equal to steps - all_steps = (all_steps<0) ? written_steps : all_steps + written_steps; - fits_update_key(io.handle, "NSTEPS", all_steps); () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); collen = fits_get_num_rows(io.handle); % read length of chain summary variable j; - variable frac_update = Double_Type[written_steps]; - variable min_stat = Double_Type[written_steps]; - variable med_stat = Double_Type[written_steps]; - variable max_stat = Double_Type[written_steps]; + variable frac_update = Double_Type[steps]; + variable min_stat = Double_Type[steps]; + variable med_stat = Double_Type[steps]; + variable max_stat = Double_Type[steps]; variable step_stat = reread[1]; variable step_update = reread[0]; - _for j (0, written_steps-1, 1) { + _for j (0, steps-1, 1) { frac_update[j] = sum(step_update[[0:total_walkers-1]+j*total_walkers])/total_walkers; tmp2 = step_stat[[0:total_walkers-1]+j*total_walkers]; min_stat[j] = min(tmp2); -- GitLab From e57203b907069a1bddd4d135bf35a392a26a22a0 Mon Sep 17 00:00:00 2001 From: Philipp Thalhammer Date: Mon, 13 Jul 2020 14:48:18 +0200 Subject: [PATCH 31/89] Add function to write random initial walkers to seperate extension Added function emcee_write_init_step to write the first randomly chosen set of walkers to the seperate extension INITWALKER. This should ensure, that the actual chain in the extention MCMCCHAIN is always steps*num_walkers long. --- src/fitting/ensemble-samplers/emcee-io.sl | 28 +++++++++++++++++++++++ src/fitting/ensemble-samplers/emcee.sl | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index 8e433788..0bd8a35b 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -141,6 +141,33 @@ private define emcee_open_chain_fits (io, filename, fit_handle, walkers) { io.num_steps = fits_get_num_rows(io.handle); } + +private define emcee_write_init_step (io, fit_handle, init_walkers, init_update, init_stat){ + variable par = __parameters(fit_handle.object); + fits_create_binary_table(io.handle, "INITWALKER", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], + ["D", "J", ["D"][par.index*0]], + [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "INITWALKER", 0); + variable tmp, update; + variable npar = length(par.index); + variable nwalkers = length(init_walkers); + fits_update_key(io.handle, "NFREEPAR", npar, " Number of free parameters"); + fits_update_key(io.handle, "NWALKERS", nwalkers/npar, " Number of walkers per free parameter"); + + variable i,j; + _for j (0, npar-1, 1) { + tmp = Double_Type[nwalkers]; + _for i (0, nwalkers-1, 1) + tmp[i] = init_walkers[i][j]; + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", par.index[j])), 1, 1, tmp); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), 1, 1, init_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), 1, 1, init_update); + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); +} + + private define emcee_read_chain_fits (io, filename, fit_handle, walkers) { io.handle = fits_open_file(filename+"[PARAMETERS]", "r"); @@ -256,6 +283,7 @@ public define emcee_io_fits () { __f_read=&emcee_read_chain_fits, __f_finalize=&emcee_finalize_chain_fits, __f_close=&emcee_close_chain_fits, + __f_initwrite=&emcee_write_init_step, }; } diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 9de50f2a..0f40f092 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -323,7 +323,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { this.update[j] = 1; } if (cont == NULL) - io.__f_write(this.fit, this.walkers, this.update, this.stat); + io.__f_initwrite(this.fit, this.walkers, this.update, this.stat); } % the main loop where the magic happens -- GitLab From 9a87707cfa0292db8cb1e619893de69ec14a8337 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 27 Jan 2021 04:24:14 +0100 Subject: [PATCH 32/89] Fix bug in previous_number_walkers Variables got overwriten instead of written into the array --- src/fitting/ensemble-samplers/emcee.sl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 0f40f092..e5c95624 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -25,8 +25,8 @@ private define previous_number_walkers (set1_per_node, set2_per_node) { variable i, c1 = 0, c2 = 0; _for i (0, l-1, 1) { - set1_handled = c1; - set2_handled = c2; + set1_handled[i] = c1; + set2_handled[i] = c2; c1 += set1_handled[i]; c2 += set2_handled[i]; } -- GitLab From b94a24f943301657c156fa5555bceafb632ec828 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 27 Jan 2021 14:20:43 +0100 Subject: [PATCH 33/89] A bit of code cleaning --- src/fitting/ensemble-samplers/emcee.sl | 172 ++++++++++++++++--------- 1 file changed, 108 insertions(+), 64 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index e5c95624..17bd8e27 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -6,14 +6,16 @@ require("rand"); % get walkers per node private define distribute_walkers (nodes, number_walkers) { variable walkers_per_node = Int_Type[nodes]; - variable n = number_walkers/nodes + 1; - variable missing = nodes - (number_walkers mod nodes); + variable n = number_walkers/nodes + 1; + variable missing = nodes - (number_walkers mod nodes); variable set1_per_node, set2_per_node; walkers_per_node[[0:nodes-missing-1]] = n; walkers_per_node[[nodes-missing:nodes-1]] = n-1; + set2_per_node = walkers_per_node >> 1; % half walkers handled by each node per set set1_per_node = walkers_per_node - set2_per_node; + return set1_per_node, set2_per_node; } @@ -40,15 +42,18 @@ private define emcee_call_setup_fun (exec_string, type) % string similar to stat throw UsageError, sprintf("Unable to parse qualifier for %s", type); variable s = strchop(exec_string, ';', 0); + if (length(s) > 2) throw UsageError, sprintf("Failed parsing option '%s'", exec_string); variable fname = strtrim(s[0]); variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); + if (NULL == f) throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; + return (@f)(;; opt); } @@ -58,6 +63,7 @@ private define setup_node (node, num_nodes, total_walkers, nrands) { variable set2_len = total_walkers >> 1; % split walker in two sets variable set1_len = total_walkers - set2_len; variable j; + (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); @@ -110,28 +116,31 @@ private define release_walkers_mpi_master (node, num_nodes, N) { #ifexists rcl_mpi_init variable urand = qualifier("urand", NULL); - if (NULL == urand) - throw InternalError, "No random generator given"; variable upick = qualifier("upick", NULL); + + if (NULL == urand) + throw InternalError, "Missing random generator"; + if (NULL == upick) - throw InternalError, "No random generator given"; + throw InternalError, "Missing random generator"; % select pivot walkers for current set variable set_len, complement_len, len_offset; variable set_per_node, set_handled; variable pick; - if (1 == N.set) { - set_len = N.set1; + + if (1 == N.set) { % + set_len = N.set1; complement_len = N.set2; - len_offset = 0; - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; + len_offset = 0; + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; } else if (2 == N.set) { - set_len = N.set2; + set_len = N.set2; complement_len = N.set1; - len_offset = N.set1; - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; + len_offset = N.set1; + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; } pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set @@ -141,20 +150,29 @@ private define release_walkers_mpi_master (node, num_nodes, N) % set current walkers and complement pivots _for i (0, set_len-1, 1) N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = (@urand)(set_len*N.num_rands); % get new random numbers for current set + + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] + = (@urand)(set_len*N.num_rands); % get new random numbers for current set _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data % set the walkers for node i this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; % pick the pivots for node i - this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; % set the randoms for node i - this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1]+(set_handled[i]+len_offset)*N.num_rands]; + this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1] + +(set_handled[i]+len_offset)*N.num_rands]; + _for j (0, set_len-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 + () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], + length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], + length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 } - () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands], set_len*N.num_rands, i, 2); % send random numbers with tag 2 + + () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1] + +len_offset*N.num_rands], + set_len*N.num_rands, i, 2); % send random numbers with tag 2 } #endif } @@ -164,21 +182,26 @@ private define release_walkers_mpi_slave (node, num_nodes, N) #ifexists rcl_mpi_init variable set_len, len_offset; variable this_randoms; + if (1 == N.set) { - set_len = N.set1_per_node; + set_len = N.set1_per_node; len_offset = 0; } else if (2 == N.set) { - set_len = N.set2_per_node; + set_len = N.set2_per_node; len_offset = N.set1_per_node[node]; } variable j; this_randoms = Double_Type[set_len*N.num_rands]; _for j (0, set_len-1, 1) { - () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) + () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], + length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], + length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) } - () = rcl_mpi_org_recv_double(this_randoms, set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + () = rcl_mpi_org_recv_double(this_randoms, + set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; #endif } @@ -196,27 +219,34 @@ private define catch_walkers_mpi_master (node, num_nodes, N) { #ifexists rcl_mpi_init variable set_len, len_offset, set_per_node, set_handled; + if (1 == N.set) { set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - len_offset = 0; + set_handled = N.set1_handled; + len_offset = 0; } else if (2 == N.set) { set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - len_offset = N.set1; + set_handled = N.set2_handled; + len_offset = N.set1; } variable i,j; - variable this_stat, this_update; % we have to use intermediate storage, slang creates a copy of an array when addressed by index + variable this_stat, this_update; % we have to use intermediate storage, + % slang creates a copy of an array when addressed by index + _for i (1, num_nodes-1, 1) { - this_stat = Double_Type[set_per_node[i]]; + this_stat = Double_Type[set_per_node[i]]; this_update = Int_Type[set_per_node[i]]; + _for j (0, set_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], length(N.walkers[0]), i, i); + () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], + length(N.walkers[0]), i, i); + () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); + N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; - N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; + N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; } #endif } @@ -225,17 +255,20 @@ private define catch_walkers_mpi_slave (node, num_nodes, N) { #ifexists rcl_mpi_init variable set_len, len_offset; + if (1 == N.set) { - set_len = N.set1_per_node[node]; + set_len = N.set1_per_node[node]; len_offset = 0; } else if (2 == N.set) { - set_len = N.set2_per_node[node]; + set_len = N.set2_per_node[node]; len_offset = N.set1_per_node[node]; } variable i; _for i (0, set_len-1, 1) - () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], length(N.walkers[len_offset]), 0, node); + () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], + length(N.walkers[len_offset]), 0, node); + () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); #endif @@ -251,15 +284,16 @@ private define catch_walkers_mpi (node, num_nodes, N) private define emcee_mpi (walker_per_par, number_par, steps) { variable total_walkers = walker_per_par*number_par; - variable init = qualifier("init", NULL); - variable move = qualifier("move", NULL); - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - variable cont = qualifier("continue", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write", NULL); + + variable init = qualifier("init", NULL); + variable move = qualifier("move", NULL); + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); + variable cont = qualifier("continue", NULL); + variable output = qualifier("output", NULL); + variable io = qualifier("write", NULL); variable load_hook = qualifier("read", NULL); - variable load = qualifier("load", NULL); + variable load = qualifier("load", NULL); variable node, num_nodes; @@ -267,7 +301,8 @@ private define emcee_mpi (walker_per_par, number_par, steps) { node = rcl_mpi_init(); num_nodes = rcl_mpi_numtasks(); num_nodes = (num_nodes<1) ? 1 : num_nodes; - rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! BUT: since we have a memory leak from the mpi module anyway we accept it currently ... + rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! + % BUT: since we have a memory leak from the mpi module anyway we accept it currently ... #else node = 0; num_nodes = 1; @@ -275,11 +310,13 @@ private define emcee_mpi (walker_per_par, number_par, steps) { move = emcee_call_setup_fun(move, "move"); variable this = setup_node(node, num_nodes, total_walkers, move.nrands); + % 'this' is the mpi_emcee handle for this node! + % It contains all relevant data to do the calculation % read the settings, if one of them is NULL help was called - io = emcee_call_setup_fun(io, "io"); + io = emcee_call_setup_fun(io, "io"); load_hook = emcee_call_setup_fun(load_hook, "io"); - init = emcee_call_setup_fun(init, "init"); + init = emcee_call_setup_fun(init, "init"); if (NULL == io || NULL == load_hook || NULL == init) return; ifnot (node) { % master only @@ -310,7 +347,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { walker_cycle = Array_Type[collector_length]; update_cycle = Int_Type[collector_length]; - stat_cycle = Double_Type[collector_length]; + stat_cycle = Double_Type[collector_length]; if (NULL == urand || NULL == upick) throw InternalError, "Missing random number generator"; @@ -319,7 +356,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { % and if not continuing a chain, write them out ifnot (node) { _for j (0, length(this.walkers)-1, 1) { - this.stat[j] = this.fit.eval_statistic(this.walkers[j]); + this.stat[j] = this.fit.eval_statistic(this.walkers[j]); this.update[j] = 1; } if (cont == NULL) @@ -333,22 +370,26 @@ private define emcee_mpi (walker_per_par, number_par, steps) { _for set (1, 2, 1) { this.set = set; - release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... + release_walkers_mpi(node, + num_nodes, + this; upick=upick, urand=urand); % release walkers to freedom ... if (1 == set) { - set_len = this.set1_per_node[node]; + set_len = this.set1_per_node[node]; len_offset = 0; } else if (2 == set) { - set_len = this.set2_per_node[node]; + set_len = this.set2_per_node[node]; len_offset = this.set1_per_node[node]; } _for j (0, set_len-1, 1) { % ... let them move ... - (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], this.pivots[j+len_offset], - this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], this.stat[j+len_offset]); + (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], + this.pivots[j+len_offset], + this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], + this.stat[j+len_offset]); this.walkers[j+len_offset] = walker; - this.update[j+len_offset] = update; - this.stat[j+len_offset] = stat; + this.update[j+len_offset] = update; + this.stat[j+len_offset] = stat; } catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! @@ -384,25 +425,28 @@ private define emcee_mpi (walker_per_par, number_par, steps) { define emcee_new (walkers_per_par, steps) { variable qs = struct { - move = "stretch", % defined move - urand = &rand_uniform, % double random generator - upick = &rand_int, % int random generator - init = "uniform", % initialization function - load = NULL, % initialize from file - read = "fits", % specifier for read - write = "fits", % specifier for write - output = strftime("%Y%m%d-%H%M%S_mcmc_chain.fits"), % output file + move = "stretch", % defined move + urand = &rand_uniform, % double random generator + upick = &rand_int, % int random generator + init = "uniform", % initialization function + load = NULL, % initialize from file + read = "fits", % specifier for read + write = "fits", % specifier for write + output = strftime("%Y%m%d-%H%M%S_mcmc_chain.fits"), % output file continue = NULL, % continue file }; if (NULL == get_fit_fun()) throw UsageError, "No fit function loaded"; + if (NULL == all_data()) throw UsageError, "No data set loaded"; + ifnot (0 ((1<<29)-1)) throw UsageError, "Unable to create ensemble for this large number of walkers"; -- GitLab From ed4a7217ba69c85262f3031e12ef38ff9c8b6c80 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 2 Feb 2021 11:13:25 +0100 Subject: [PATCH 34/89] Start emcee resturcture --- src/fitting/ensemble-samplers/emcee-mpi.sl | 683 +++++++++++++++++++++ 1 file changed, 683 insertions(+) create mode 100644 src/fitting/ensemble-samplers/emcee-mpi.sl diff --git a/src/fitting/ensemble-samplers/emcee-mpi.sl b/src/fitting/ensemble-samplers/emcee-mpi.sl new file mode 100644 index 00000000..a0381dac --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee-mpi.sl @@ -0,0 +1,683 @@ +% -*- mode: slang; mode: fold; -*- % + +require("rand"); + +%%% START FROM SCRATCH... well sort of + +% Implementation of the emcee hammer () with the principle idea +% that multiple nodes (engines) are responsible for a part of the +% walkers. For efficiency the walkers are distributed equally to +% each engine. To keep the statistical properties the walkers are +% seperated in to two groups (see ref) where the next step of +% group one depends on the current position of group two and the +% next step of group two depends on the new position of group one. +% For most efficiency we try to reduce the required computations +% to the minimum possible such that the model evaluation plus +% the necessary communication is everything that happens in the +% main loop. +% +% To prevent any side effects from the PRNG we let the master +% calculate enough for each step and distribute them to the +% slaves. + +private define emceeEvalFunction () %{{{ +{ + variable args = __pop_list(_NARGS); + variable handle = args[0]; + variable fun = handle.function; + + return @fun(__push_list(args[[1:]]), handle.info;; handle.options); +} +%}}} + +private define emceeAddFunction (name, function, list) %{{{ +{ + list[strtrim(name)] = struct { + function = function, + userdata = qualifier("userdata"), + data = qualifier("data"); + help = qualifier("help", name), + }; +} +%}}} + +private define emceeSetFunction (functionString, list, pointer) %{{{ +{ + variable s = strchop(functionString, ';', 0); + variable name = strtrim(s[0]); + variable options = NULL; + + if (length(s)>1) + options = eval(sprintf("struct { %s }", s[1])); + + ifnot (assoc_key_exists(list, name)) + throw UndefinedNameError, sprintf("'%s' is not registere"); + + variable function = struct { + eval = &emceeEvalFunction, + @(list[name]), + options = options, + functionString = functionString, + }; + + @pointer = function; +} +%}}} + +private define emceeGetFunction (pointer) %{{{ +{ + return pointer.functionString; +} +%}}} + +%{{{ move functions + +% access to the engine. Can set how many random numbers are required + +private variable Emcee_Move_List = Assoc_Type[Struct_Type]; +define add_emcee_move (name, function, nrand, list) %{{{ +{ + variable data = struct { nrand = nrand }; + variable userdata = qualifier("userdata"); + + emceeAddFunction(__push_list(args), Emcee_Move_List; + data=data, userdata=userdata); +} +%}}} + +private variable Emcee_Move; +define set_emcee_move (moveString) %{{{ +{ + emceeSetFunction(moveString, Emcee_Move_List, &Emcee_Move); +} +%}}} + +define get_emcee_move () %{{{ +{ + return emceeGetFunction(&emcee_Move); +} +%}}} + +%}}} + +%{{{ file functions + +% have acces to the engine. Gets the cycle number + +private variable Emcee_Write_List = Assoc_Type[Struct_Type]; +private variable Emcee_Read_List = Assoc_Type[Struct_Type]; +define add_emcee_write( + +%}}} + +private define emceeSetupEngine (id, numberEngines, totalNumberWalkers) %{{{ +{ + variable engine = struct { + id = id, % the engines id (0 is the master) + numberEngines = numberEngines, % total number of engines + + walkers, % array of current walker position for this engine + pivots, % array of current pivot positions, should not change for one loop! + rolls, % array of required random numbers + update, % array updater track + stat, % array step statistic + totalNumberWalkers = totalNumberWalkers, % number of all walkers + totalNumberSet1, % number walkers in set 1 numWalkers/2 + totalNumberSet2, % number walkers in set 2 numWalkers-numWalkers/2 + + numberWalkers, % number walkers handled by this engine + numberWalkersSet1, % number walkers in set 1 belonging to this engine + numberWalkersSet2, % number walkers in set 2 belonging to this engine + + firstWalkerSet1, % index of first walker in set 1 handle by this engine + firstWalkerSet2, % index of first walker in set 2 handle by this engine + + % set = 0, % indicates which set to update (1: first set, 2: second set) + fit = open_fit(), % the fit object, will fail if no model is loaded + numberParameters = num_free_params(), % number of fit parameters + numberWalkerRolls, % number of random numbers each walker uses + }; + + engine.totalNumberSet1 = totalNumberWalkers/2; + engine.totalNumberSet2 = totalNumberWalkers - engine.totalNumberSet1; + + % divide walkers evenly (remainders are given to highest ids) + engine.numberWalkers = engine.totalNumberWalkers/engine.numberEngines + + 1 - ((engine.totalNumberWalkers mod engine.numberEngines) <= engine.id); + engine.numberWalkersSet1 = engine.numberWalkers/2; + engine.numberWalkersSet2 = engine.numberWalkers-engine.numberWalkersSet1; + + % master stores all values + variable size = id ? engine.numberWalkers : engine.totalNumberWalkers; + variable j; + engine.walkers = Array_Type[size]; + engine.pivots = Array_Type[size]; + engine.rolls = Double_Type[size]; + engine.update = Int_Type[size]; + engine.stat = Double_Type[size]; + + _for j (0, size-1) { + engine.walkers[j] = Double_Type[engine.numberParameters]; + engine.pivots[j] = Double_Type[engine.numberParameters]; + } + + return engine; +} +%}}} + +private define emceeSetupGears (engine) %{{{ +{ + variable gears = struct { + initialize, % initialize function + step, % step function + random_uniform, % uniform random number generator (float) + random_pick, % uniform random number generator (int) + read, % read handle + write, % write handle (may be null, which means that read handle is used) + }; + + gears.move = NULL; + + if (0 == engine.id) { + % load or init, open file/use open file + } + + return gears; +} +%}}} + +private define emceeReleaseWalkers (engine) %{{{ +{ +} +%}}} + +private define emceeMoveWalkers (engine) %{{{ +{ +} +%}}} + +private define emceeCatchWalkers (engine) %{{{ +{ +} +%}}} + +%{{{ some helpers + +% get walkers per node +private define distributeWalkers (numNodes, numWalkers) %{{{ +{ + variable walkersPerNode = Int_Type[numNodes]; + variable n = numWalkers/numNodes + 1; + variable missing = numNodes - (numWalkers mod numNodes); + variable set1PerNode, set2PerNode; % TODO: does not have to be array + + walkersPerNode[[0:numNodes-missing-1]] = n; + walkersPerNode[[numNodes-missing:numNodes-1]] = n-1; + + set2PerNode = walkersPerNode >> 1; % half walkers handled by each node per set + set1PerNode = walkersPerNode - set2PerNode; + + return set1PerNode, set2PerNode; +} +%}}} + +% get walkers handled up to this node +private define getHandledNumberWalkers (set1PerNode, set2PerNode) %{{{ +{ + variable l = length(set1PerNode); + variable set1Handled = Int_Type[l]; + variable set2Handled = Int_Type[l]; + variable i, c1 = 0, c2 = 0; + + _for i (0, l-1, 1) { + set1Handled[i] = c1; + set2Handled[i] = c2; + c1 += set1Handled[i]; + c2 += set2Handled[i]; + } + + return set1Handled, set2Handled; +} +%}}} + +% string similar to statistic or fit method definitions "name; option1=foo, option2=bar" +private define emceeInterfaceSetup (execString, type) %{{{ +{ + if (NULL == execString || typeof(execString) != String_Type) + throw UsageError, sprintf("Unable to parse qualifier for %s", type); + + variable s = strchop(execString, ';', 0); + + if (length(s) > 2) + throw UsageError, sprintf("Failed parsing option '%s'", execString); + + variable fname = strtrim(s[0]); + variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); + + if (NULL == f) + throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); + + variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; + + return (@f)(;; opt); +} +%}}} + +private define emceeMPISetupNode (node, numNodes, numTotalWalkers, numRands) %{{{ +{ + variable set1PerNode, set2PerNode; + variable set1Handled, set2Handled; + variable set2Len = numTotalWwalkers >> 1; % split walker in two sets + variable set1Len = numTotalWalkers - set2Len; + variable j; + + (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); + (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); + + variable N = struct { + node = node, % this nodes number (0 is master) + num_nodes = num_nodes, % total number of nodes + walkers, % current walker position + pivots, % current pivot positions, should not change for one loop! + randoms, % required randoms + update, % updater track + stat, % step statistic + total_walkers, % number of totals walkers + set1 = set1_len, % walkers for 1st update + set2 = set2_len, % walkers set for 2nd update + set1_per_node = set1_per_node, % walkers of set1 processed per node + set2_per_node = set2_per_node, % walkers of set2 processed per node + set1_handled = set1_handled, % walkers of set1 handled by all previous nodes + set2_handled = set2_handled, % walkers of set2 handled by all previous nodes + set = 0, % indicates which set to update (1: first set, 2: second set) + fit = open_fit(), % the fit object, will fail if no model is loaded + num_pars = num_free_params(), % number of fit parameters + num_rands = nrands, % number of random numbers per walker + }; + + if (node) { % setup for slaves + N.walkers = Array_Type[set1_per_node[node]+set2_per_node[node]]; + N.pivots = Array_Type[set1_per_node[node]+set2_per_node[node]]; + N.update = Int_Type[set1_per_node[node]+set2_per_node[node]]; + N.stat = Double_Type[set1_per_node[node]+set2_per_node[node]]; + N.randoms = Double_Type[(set1_per_node[node]+set2_per_node[node])*nrands]; + } else { % setup for master + N.walkers = Array_Type[total_walkers]; + N.pivots = Array_Type[total_walkers]; + N.update = Int_Type[total_walkers]; + N.stat = Double_Type[total_walkers]; + N.randoms = Double_Type[total_walkers*nrands]; + } + + _for j (0, length(N.walkers)-1, 1) { + N.walkers[j] = Double_Type[N.num_pars]; + N.pivots[j] = Double_Type[N.num_pars]; + } + + return N; +} +%}}} + +%}}} + +%{{{ mpi functions + +private define release_walkers_mpi_master (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); + + if (NULL == urand) + throw InternalError, "Missing random generator"; + + if (NULL == upick) + throw InternalError, "Missing random generator"; + + % select pivot walkers for current set + variable set_len, complement_len, len_offset; + variable set_per_node, set_handled; + variable pick; + + if (1 == N.set) { % + set_len = N.set1; + complement_len = N.set2; + len_offset = 0; + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; + } else if (2 == N.set) { + set_len = N.set2; + complement_len = N.set1; + len_offset = N.set1; + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; + } + pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set + + variable this_walkers, this_pivots, this_randoms; + variable i,j; + + % set current walkers and complement pivots + _for i (0, set_len-1, 1) + N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points + + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] + = (@urand)(set_len*N.num_rands); % get new random numbers for current set + + _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data + % set the walkers for node i + this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + % pick the pivots for node i + this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + % set the randoms for node i + this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1] + +(set_handled[i]+len_offset)*N.num_rands]; + + _for j (0, set_len-1, 1) { + () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], + length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], + length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 + } + + () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1] + +len_offset*N.num_rands], + set_len*N.num_rands, i, 2); % send random numbers with tag 2 + } +#endif +} + +private define release_walkers_mpi_slave (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable set_len, len_offset; + variable this_randoms; + + if (1 == N.set) { + set_len = N.set1_per_node; + len_offset = 0; + } else if (2 == N.set) { + set_len = N.set2_per_node; + len_offset = N.set1_per_node[node]; + } + + variable j; + this_randoms = Double_Type[set_len*N.num_rands]; + _for j (0, set_len-1, 1) { + () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], + length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], + length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) + } + () = rcl_mpi_org_recv_double(this_randoms, + set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; +#endif +} + +private define release_walkers_mpi (node, num_nodes, N) { + % distribute walkers, pivots and random numbers + + if (node) + release_walkers_mpi_slave(node, num_nodes, N); + else + release_walkers_mpi_master(node, num_nodes, N;; __qualifiers()); +} + +private define catch_walkers_mpi_master (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable set_len, len_offset, set_per_node, set_handled; + + if (1 == N.set) { + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; + len_offset = 0; + } else if (2 == N.set) { + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; + len_offset = N.set1; + } + + variable i,j; + variable this_stat, this_update; % we have to use intermediate storage, + % slang creates a copy of an array when addressed by index + + _for i (1, num_nodes-1, 1) { + this_stat = Double_Type[set_per_node[i]]; + this_update = Int_Type[set_per_node[i]]; + + _for j (0, set_per_node[i]-1, 1) + () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], + length(N.walkers[0]), i, i); + + () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); + () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); + + N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; + N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; + } +#endif +} + +private define catch_walkers_mpi_slave (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable set_len, len_offset; + + if (1 == N.set) { + set_len = N.set1_per_node[node]; + len_offset = 0; + } else if (2 == N.set) { + set_len = N.set2_per_node[node]; + len_offset = N.set1_per_node[node]; + } + + variable i; + _for i (0, set_len-1, 1) + () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], + length(N.walkers[len_offset]), 0, node); + + () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); + () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); +#endif +} + +private define catch_walkers_mpi (node, num_nodes, N) +{ + if (node) + catch_walkers_mpi_slave(node, num_nodes, N); + else + catch_walkers_mpi_master(node, num_nodes, N); +} + +private define emceeMPIInitFile (emceeT) %{{{ +{ + variable init = qualifier("init", NULL); + variable cont = qualifier("continue", NULL); + variable output = qualifier("output", NULL); + variable io = qualifier("write", NULL); + variable load_hook = qualifier("read", NULL); + variable load = qualifier("load", NULL); + + % read the settings, if one of them is NULL help was called + io = emcee_call_setup_fun(io, "io"); + load_hook = emcee_call_setup_fun(load_hook, "io"); + init = emcee_call_setup_fun(init, "init"); + if (NULL == io || NULL == load_hook || NULL == init) return; + + ifnot (node) { % master only + if (cont != NULL) + io.__f_open(cont, emceeT.fit, emcee.walkers); + else if (load != NULL) { + load_hook.__f_read(load, emceeT.walkers); + load_hook.__f_close(); + io.__f_create(output, emceeT.fit, emceeT.numTotalWalkers); + } else { + io.__f_create(output, emceeT.fit, total_walkers); + init.__f(emceeT.walkers, emceeT.fit); + } + } +} +%}}} + +private define emceeMPIInit () %{{{ +{ + variable move = qualifier("move", NULL); + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); + + variable node, numNodes; +#ifexists rcl_mpi_init + node = rcl_mpi_init(); + numNodes = rcl_mpi_numtasks(); + numNodes = (numNodes<1) ? 1 : numNodes; + rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! + % BUT: since we have a memory leak from the mpi module anyway we accept it currently ... +#else + node = 0; + num_nodes = 1; +#endif + + move = emceeInterfaceSetup(move, "move"); + + % This is the mpi_emcee handle for this node! + % It contains all relevant data to do the calculation + variable emceeHandle = setup_node(node, num_nodes, total_walkers, move.nrands); + +} +%}}} + +%}}}% + +private define emcee_mpi (walker_per_par, number_par, steps) { + variable total_walkers = walker_per_par*number_par; + + variable init = qualifier("init", NULL); + variable move = qualifier("move", NULL); + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); + variable cont = qualifier("continue", NULL); + variable output = qualifier("output", NULL); + variable io = qualifier("write", NULL); + variable load_hook = qualifier("read", NULL); + variable load = qualifier("load", NULL); + + variable node, num_nodes; + (node, num_nodes) = emceeMPIInit(); + + move = emcee_call_setup_fun(move, "move"); + variable this = setup_node(node, num_nodes, total_walkers, move.nrands); + % 'this' is the mpi_emcee handle for this node! + % It contains all relevant data to do the calculation + + % read the settings, if one of them is NULL help was called + io = emcee_call_setup_fun(io, "io"); + load_hook = emcee_call_setup_fun(load_hook, "io"); + init = emcee_call_setup_fun(init, "init"); + if (NULL == io || NULL == load_hook || NULL == init) return; + + ifnot (node) { % master only + if (cont != NULL) + io.__f_open(cont, this.fit, this.walkers); + else if (load != NULL) { + load_hook.__f_read(load, this.walkers); + load_hook.__f_close(); + io.__f_create(output, this.fit, total_walkers); + } else { + io.__f_create(output, this.fit, total_walkers); + init.__f(this.walkers, this.fit); + } + } + + variable collector_length; % the collector so we can skip turns before writing to disk + variable walker_cycle; + variable update_cycle; + variable stat_cycle; + variable cycle_step = 0; + + % setup space + variable j; + if (node) % slave + collector_length = 0; + else % master + collector_length = length(this.walkers)*io.cycle; + + walker_cycle = Array_Type[collector_length]; + update_cycle = Int_Type[collector_length]; + stat_cycle = Double_Type[collector_length]; + + if (NULL == urand || NULL == upick) + throw InternalError, "Missing random number generator"; + + % evaluate the model at the walker positions to get the statistics + % and if not continuing a chain, write them out + ifnot (node) { + _for j (0, length(this.walkers)-1, 1) { + this.stat[j] = this.fit.eval_statistic(this.walkers[j]); + this.update[j] = 1; + } + if (cont == NULL) + io.__f_initwrite(this.fit, this.walkers, this.update, this.stat); + } + + % the main loop where the magic happens + variable s, walker, update, stat, set, set_len, len_offset; + _for s (0, steps-1, 1) { + cycle_step = s mod io.cycle; + _for set (1, 2, 1) { + this.set = set; + + release_walkers_mpi(node, + num_nodes, + this; upick=upick, urand=urand); % release walkers to freedom ... + + if (1 == set) { + set_len = this.set1_per_node[node]; + len_offset = 0; + } else if (2 == set) { + set_len = this.set2_per_node[node]; + len_offset = this.set1_per_node[node]; + } + + _for j (0, set_len-1, 1) { % ... let them move ... + (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], + this.pivots[j+len_offset], + this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], + this.stat[j+len_offset]); + this.walkers[j+len_offset] = walker; + this.update[j+len_offset] = update; + this.stat[j+len_offset] = stat; + } + + catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! + } + + % if cycle end is reached write the chain + ifnot (node) { % master only + if (not cycle_step && s > 0) + io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); + _for j (0, length(this.walkers)-1, 1) { + walker_cycle[j+cycle_step*length(this.walkers)] = @(this.walkers[j]); + update_cycle[j+cycle_step*length(this.walkers)] = this.update[j]; + stat_cycle[j+cycle_step*length(this.walkers)] = this.stat[j]; + } + } + } + + % we might have unwritten steps left, so better write them here + cycle_step++; % the last step is never written in the loop, we have to deal with it here + ifnot (node) { + io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], + update_cycle[[:cycle_step*length(this.walkers)-1]], + stat_cycle[[:cycle_step*length(this.walkers)-1]]); + } + + % and finally call the finalizing function + ifnot (node) { % master only + io.__f_finalize(steps, walker_per_par, number_par, this.fit); + io.__f_close(); + } +} +%}}}% -- GitLab From 974c0bfe45ad6ec2d07f511e789ef20ceed5c4b8 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 22 Feb 2021 02:11:56 +0100 Subject: [PATCH 35/89] Communication not working ... --- src/fitting/ensemble-samplers/emcee-init.sl | 95 - src/fitting/ensemble-samplers/emcee-io.sl | 290 --- src/fitting/ensemble-samplers/emcee-moves.sl | 78 - src/fitting/ensemble-samplers/emcee-mpi.sl | 683 -------- src/fitting/ensemble-samplers/emcee.sl | 1653 ++++++++++++++---- 5 files changed, 1305 insertions(+), 1494 deletions(-) delete mode 100644 src/fitting/ensemble-samplers/emcee-init.sl delete mode 100644 src/fitting/ensemble-samplers/emcee-io.sl delete mode 100644 src/fitting/ensemble-samplers/emcee-moves.sl delete mode 100644 src/fitting/ensemble-samplers/emcee-mpi.sl diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl deleted file mode 100644 index 0895ceab..00000000 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ /dev/null @@ -1,95 +0,0 @@ -% -*- mode: slang; mode: fold; -*- - -require("rand"); - -% WALKER INIT FUNCTIONS -% Allow different functions for initializing walkers. Follows the same idea -% as for the move steps. -% -% Distribution is only done by the master process, so we don't have to care -% about the random numbers. -% -% Init function takes an initialized walker array, and the fit handle -% setup function must be called emcee_init_ - -%{{{% helpers -private define __init_globals () { return struct_combine( - struct { - name = "unspecified init", - __f = NULL, - }, - __qualifiers()); -} - -%}}}% - -%{{{% Uniform initialization function -% pick random parameter values within the boundaries -private define init_parameter_walker_uniform (init, walkers, fit_handle) { - variable i; - variable par = __parameters(fit_handle.object); - variable num_p = length(par.value); - - % throw an error on unspecified bounds - if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) - throw UsageError, "Some parameters have unspecified parameter ranges"; - - _for i (0, length(walkers)-1, 1) - walkers[i] = rand_uniform(num_p)*(par.max-par.min)+par.min; -} -public define emcee_init_uniform () { - if (qualifier_exists("help")) { - help("emcee_init_uniform"); - return NULL; - } - - variable defaults = __init_globals(; - name="uniform init", - ); - return struct { @defaults, @__qualifiers(), __f=&init_parameter_walker_uniform }; -} -%}}}% - -%{{{% Sphere initialization function with exponential decresing probability -% pick random parameters from gauss((x-x0 -private define init_parameter_walker_gauss_sphere (init, walkers, fit_handle) { - variable i,j,w; - variable par = __parameters(fit_handle.object); - variable num_p = length(par.value); - variable sigma = qualifier("sigma", 10); % default to p-pmin = 10 sigma (pmax-p = 10 sigma) if p-pmin>(<)pmax-p - variable relative = qualifier_exists("relative") || - not qualifier_exists("sigma") || - not (typeof(sigma) == Array_Type); % imply relative if sigma is not given or not given as arrays - - % throw an error on unspecified bounds - if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) - throw UsageError, "Some parameters have unspecified parameter ranges"; - - variable s_par; - variable s_len = length(sigma); - if (Array_Type == typeof(sigma) && s_len != num_p) - throw UsageError, sprintf("Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p); - _for i (0, length(walkers)-1, 1) { - if (relative) - s_par = _min(par.value-par.min, par.max-par.value)/sigma; - else - s_par = sigma; - walkers[i] = rand_gauss(1, num_p)*s_par+par.value; - w = where(walkers[i]par.max); - walkers[i][w] = par.max[w]; - } -} -public define emcee_init_gauss () { - if (qualifier_exists("help")) { - help("emcee_init_gauss"); - return NULL; - } - - variable defaults = __init_globals(; - name="gauss init", - ); - return struct { @defaults, @__qualifiers(), __f=&init_parameter_walker_gauss_sphere }; -} -%}}}% diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl deleted file mode 100644 index 0bd8a35b..00000000 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ /dev/null @@ -1,290 +0,0 @@ -% -*- mode: slang; mode: fold; -*- % - -% EMCEE INPUT OUTPUT ROUTINES -% Same structure as moves. Routines for output and input are describeed -% here. - -%{{{% helpers -private define __io_globals () { - return struct { - name = "unspecified io", - __f_create = NULL, % create function, takes 'io-object', 'filename', 'fit_handle', 'total walkers' - __f_open = NULL, % open function, takes 'io-object', 'filename', 'initialized walkers'. - __f_read = NULL, % same as open but is used for seting the walkers, requires less consitency - __f_write = NULL, % write to file, takes 'io-object', 'walkers array', 'update array', 'statistic array' - __f_finalize = NULL, % finalizes output, takes 'io-object', 'current number steps', 'walker per parameter', 'number parameter' - __f_close = NULL, % close any handles if necessary, takes 'io-object' - handle = NULL, % io access (usually file pointer) - cycle = 1, % number of steps to perform before write - @__qualifiers() }; -} -%}}}% - -%{{{% FITS input output routines -private define emcee_init_chain_fits (io, filename, fit_handle, total_walkers) { - % write ensemble evolution to fits file - variable init_values; - variable data_info; - variable par_names; - variable i; - list_data(&data_info); - variable par = __parameters(fit_handle.object); - io.handle = fits_open_file(filename, "c"); - - % write first table - par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[par.index-1]; - fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), - ["FREE_PAR", "FREE_PAR_NAME"], - ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], - [" parameter indices", " parameter names"]); - fits_update_key(io.handle, "MODEL", get_fit_fun(), "model function"); - fits_update_key(io.handle, "SLOPPY", 0, " sloppy level"); - array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, par.index[array_sort(par.index)])) % sort here, so at least they are in index order - throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) - throw IOError; - - % write second table - fits_create_binary_table(io.handle, "MCMCCHAIN", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], - ["D", "J", ["D"][par.index*0]], - [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); - fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); - fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); - fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); - - % write third table - fits_create_binary_table(io.handle, "CHAINSTATS", 0, - ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], - [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); - fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); - - % move back to chain table - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); - - io.num_steps = 0; - io.sloppy = 0; - - () = _fits_get_rowsize(io.handle, &(io.cycle)); - io.cycle = io.cycle/total_walkers; - if (io.cycle < 1) - io.cycle = 1; -} - -private define emcee_open_chain_fits (io, filename, fit_handle, walkers) { - io.handle = fits_open_file(filename+"[PARAMETERS]", "w"); - - if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { - fits_close_file(io.handle); - io.handle = NULL; - throw IsisError, "Current model and chain model do not match"; - } - - variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab, "free_par")) { - fits_close_file(io.handle); - io.handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - variable par = __parameters(fit_handle.object); - if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != par.index[array_sort(par.index)])) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, "Free parameters and chain parameters differ"; - } - - variable fsloppy = fits_read_key(io.handle, "SLOPPY"); - io.sloppy = (fsloppy > io.sloppy) ? fsloppy : io.sloppy; % largest sloppyness - fits_update_key(io.handle, "SLOPPY", io.sloppy); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (io.sloppy<1)) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; - } - fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - variable file_nw = fits_read_key(io.handle, "NWALKERS")*fits_read_key(io.handle, "NFREEPAR"); - if (file_nw != length(walkers)) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, sprintf("Unable to continue chain with %d walkers with chain with %d walkers", file_nw, length(walkers)); - } - - tab = fits_read_table(io.handle); - variable i,j; - variable names = get_struct_field_names(tab); - variable l = length(names)-2; - _for j (0, length(walkers)-1, 1) - _for i (0, l-1, 1) - walkers[j][i] = get_struct_field(tab, names[i+2])[-length(walkers)+j]; - - () = _fits_get_rowsize(io.handle, &(io.cycle)); - io.cycle = io.cycle/length(walkers); - if (io.cycle < 1) - io.cycle = 1; - - io.num_steps = fits_get_num_rows(io.handle); -} - - -private define emcee_write_init_step (io, fit_handle, init_walkers, init_update, init_stat){ - variable par = __parameters(fit_handle.object); - fits_create_binary_table(io.handle, "INITWALKER", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], - ["D", "J", ["D"][par.index*0]], - [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "INITWALKER", 0); - variable tmp, update; - variable npar = length(par.index); - variable nwalkers = length(init_walkers); - fits_update_key(io.handle, "NFREEPAR", npar, " Number of free parameters"); - fits_update_key(io.handle, "NWALKERS", nwalkers/npar, " Number of walkers per free parameter"); - - variable i,j; - _for j (0, npar-1, 1) { - tmp = Double_Type[nwalkers]; - _for i (0, nwalkers-1, 1) - tmp[i] = init_walkers[i][j]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", par.index[j])), 1, 1, tmp); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), 1, 1, init_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), 1, 1, init_update); - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); -} - - -private define emcee_read_chain_fits (io, filename, fit_handle, walkers) { - io.handle = fits_open_file(filename+"[PARAMETERS]", "r"); - - if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { - fits_close_file(io.handle); - io.handle = NULL; - throw IsisError, "Current model and chain model do not match"; - } - - variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab, "free_par")) { - fits_close_file(io.handle); - io.handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - variable par = __parameters(fit_handle.object); - if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != par.index[array_sort(par.index)])) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, "Free parameters and chain parameters differ"; - } - - variable file_nw = fits_read_key(io.handle, "NWALKERS")*fits_read_key(io.handle, "NFREEPAR"); - if (file_nw != length(walkers)) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, sprintf("Unable to set chain with %d walkers from file with %d walkers", file_nw, length(walkers)); - } - - tab = fits_read_table(io.handle); - variable i,j; - variable names = get_struct_field_names(tab); - variable l = length(names)-2; - _for j (0, length(walkers)-1, 1) - _for i (0, l-1, 1) - walkers[j][i] = get_struct_field(tab, names[i+2])[-length(walkers)+j]; -} - -private define emcee_write_chain_fits (io, fit_handle, walkers_cycle, update_cycle, stat_cycle) { - variable tmp, update; - variable par = __parameters(fit_handle.object); - variable npar = length(par.index); - variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle - variable i,j; - variable collen = fits_get_num_rows(io.handle); - _for j (0, npar-1, 1) { - tmp = Double_Type[steps_walkers]; - _for i (0, steps_walkers-1, 1) - tmp[i] = walkers_cycle[i][j]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", par.index[j])), collen+1, 1, tmp); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); -} - -private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter, fit_handle) { - variable tmp, tmp2; - variable reread; - variable total_walkers = walker_per_parameter*number_parameter; - variable collen = fits_get_num_rows(io.handle); % length of chain - variable all_steps = fits_read_key(io.handle, "NSTEPS"); - all_steps = (all_steps<0) ? steps : all_steps + steps; - fits_update_key(io.handle, "NSTEPS", all_steps); - fits_update_key(io.handle, "NWALKERS", walker_per_parameter); - fits_update_key(io.handle, "NFREEPAR", number_parameter); - - () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], - io.num_steps+1, collen-io.num_steps, &reread); - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); - collen = fits_get_num_rows(io.handle); % read length of chain summary - variable j; - variable frac_update = Double_Type[steps]; - variable min_stat = Double_Type[steps]; - variable med_stat = Double_Type[steps]; - variable max_stat = Double_Type[steps]; - variable step_stat = reread[1]; - variable step_update = reread[0]; - - _for j (0, steps-1, 1) { - frac_update[j] = sum(step_update[[0:total_walkers-1]+j*total_walkers])/total_walkers; - tmp2 = step_stat[[0:total_walkers-1]+j*total_walkers]; - min_stat[j] = min(tmp2); - max_stat[j] = max(tmp2); - med_stat[j] = median(tmp2); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FRAC_UPDATE"), collen+1, 1, frac_update); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); -} - -private define emcee_close_chain_fits (io) { - fits_close_file(io.handle); - io.handle = NULL; -} - -public define emcee_io_fits () { - if (qualifier_exists("help")) { - help("emcee_init_uniform"); - return NULL; - } - variable settings = (_NARGS==1) ? () : NULL; - variable defaults = __io_globals(; - name = "io fits", - num_steps = NULL, - sloppy = 0, - ); - return struct { @defaults, @__qualifiers(), - __f_create=&emcee_init_chain_fits, - __f_open=&emcee_open_chain_fits, - __f_write=&emcee_write_chain_fits, - __f_read=&emcee_read_chain_fits, - __f_finalize=&emcee_finalize_chain_fits, - __f_close=&emcee_close_chain_fits, - __f_initwrite=&emcee_write_init_step, - }; -} - -%}}}% diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl deleted file mode 100644 index 145c5027..00000000 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ /dev/null @@ -1,78 +0,0 @@ -% -*- mode: slang; mode: fold; -*- % - -% DEFINED MOVES FOR THE EMCEE ENSEMBLE WALKER -% Walker moves should be defined as functions where the function returns a -% structure suitable to describe the step algorithm. For adjustment the -% function should combine the qualifiers with the default structure. The -% move function MUST evaluate the fit model by using the fit handle and -% return new position and statistics. -% -% Besides additional arguments given in the structure the algorithm must -% process the fit object, current position, pivot position and an array of -% random numbers. -% -% For constructing the structure the __move_globals function is convenient. -% The constructur function must be names emcee_move_. -% -% For an example see the STRETCH_MOVE -%{{{% Helpers for move steps -private define __move_globals () { - return struct_combine ( struct { - name = "unspecified move", % name of the step function to be used in output files - move = NULL, % the move function itself, NULL == Error - nrands = 0, % number of required random numbers for each step - }, __qualifiers); -} -%}}}% - -%{{{% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY -% define inverse cumulative distribution function for generating -% random numbers following 1/z^2 when z in [1/a, a] -% TODO: should make this an adjustable thing -private define inverse_cdf (u, a) { - return (u*(a-1.)+1.)^2./a; -} - -% stretch move as of Goodman & Weare 2010 -% Move must evaluate the fit function -private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { - variable z = inverse_cdf(u[0], move.a); - variable ystat, ret_stat = prev_stat; - variable y; % step proposition - variable x_t1 = x; % resulting step - variable update = 0; % update indicator - - % calculate the new position (utilize array operations) - y = x_j + z*(x-x_j); - - % try evaluating, if out of bounds, does nothing TODO: This is biasing the result, check how to do this correct - try { - % evaluate fit function for 'y' - ystat = fit_object.eval_statistic(y;nocopy); - - % caluculate if we accept the step based on the statistics of the - % model. We assume that the statistic is given as -2 log likelihood - if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(prev_stat-ystat)/2.)) { - ret_stat = ystat; - x_t1 = y; - update = 1; - } - } catch IsisError; - - % return new walker position, update, new statistic - return (x_t1, update, ret_stat); -} - -public define emcee_move_stretch () { - if (qualifier_exists("help")) { - help("emcee_init_uniform"); - return NULL; - } - variable defaults = __move_globals(; - name="stretch move", - nrands=2, - a=2. % move scaling - ); - return struct { @defaults, @__qualifiers(), __f=&stretch_move }; % combine settings -} -%}}}% diff --git a/src/fitting/ensemble-samplers/emcee-mpi.sl b/src/fitting/ensemble-samplers/emcee-mpi.sl deleted file mode 100644 index a0381dac..00000000 --- a/src/fitting/ensemble-samplers/emcee-mpi.sl +++ /dev/null @@ -1,683 +0,0 @@ -% -*- mode: slang; mode: fold; -*- % - -require("rand"); - -%%% START FROM SCRATCH... well sort of - -% Implementation of the emcee hammer () with the principle idea -% that multiple nodes (engines) are responsible for a part of the -% walkers. For efficiency the walkers are distributed equally to -% each engine. To keep the statistical properties the walkers are -% seperated in to two groups (see ref) where the next step of -% group one depends on the current position of group two and the -% next step of group two depends on the new position of group one. -% For most efficiency we try to reduce the required computations -% to the minimum possible such that the model evaluation plus -% the necessary communication is everything that happens in the -% main loop. -% -% To prevent any side effects from the PRNG we let the master -% calculate enough for each step and distribute them to the -% slaves. - -private define emceeEvalFunction () %{{{ -{ - variable args = __pop_list(_NARGS); - variable handle = args[0]; - variable fun = handle.function; - - return @fun(__push_list(args[[1:]]), handle.info;; handle.options); -} -%}}} - -private define emceeAddFunction (name, function, list) %{{{ -{ - list[strtrim(name)] = struct { - function = function, - userdata = qualifier("userdata"), - data = qualifier("data"); - help = qualifier("help", name), - }; -} -%}}} - -private define emceeSetFunction (functionString, list, pointer) %{{{ -{ - variable s = strchop(functionString, ';', 0); - variable name = strtrim(s[0]); - variable options = NULL; - - if (length(s)>1) - options = eval(sprintf("struct { %s }", s[1])); - - ifnot (assoc_key_exists(list, name)) - throw UndefinedNameError, sprintf("'%s' is not registere"); - - variable function = struct { - eval = &emceeEvalFunction, - @(list[name]), - options = options, - functionString = functionString, - }; - - @pointer = function; -} -%}}} - -private define emceeGetFunction (pointer) %{{{ -{ - return pointer.functionString; -} -%}}} - -%{{{ move functions - -% access to the engine. Can set how many random numbers are required - -private variable Emcee_Move_List = Assoc_Type[Struct_Type]; -define add_emcee_move (name, function, nrand, list) %{{{ -{ - variable data = struct { nrand = nrand }; - variable userdata = qualifier("userdata"); - - emceeAddFunction(__push_list(args), Emcee_Move_List; - data=data, userdata=userdata); -} -%}}} - -private variable Emcee_Move; -define set_emcee_move (moveString) %{{{ -{ - emceeSetFunction(moveString, Emcee_Move_List, &Emcee_Move); -} -%}}} - -define get_emcee_move () %{{{ -{ - return emceeGetFunction(&emcee_Move); -} -%}}} - -%}}} - -%{{{ file functions - -% have acces to the engine. Gets the cycle number - -private variable Emcee_Write_List = Assoc_Type[Struct_Type]; -private variable Emcee_Read_List = Assoc_Type[Struct_Type]; -define add_emcee_write( - -%}}} - -private define emceeSetupEngine (id, numberEngines, totalNumberWalkers) %{{{ -{ - variable engine = struct { - id = id, % the engines id (0 is the master) - numberEngines = numberEngines, % total number of engines - - walkers, % array of current walker position for this engine - pivots, % array of current pivot positions, should not change for one loop! - rolls, % array of required random numbers - update, % array updater track - stat, % array step statistic - totalNumberWalkers = totalNumberWalkers, % number of all walkers - totalNumberSet1, % number walkers in set 1 numWalkers/2 - totalNumberSet2, % number walkers in set 2 numWalkers-numWalkers/2 - - numberWalkers, % number walkers handled by this engine - numberWalkersSet1, % number walkers in set 1 belonging to this engine - numberWalkersSet2, % number walkers in set 2 belonging to this engine - - firstWalkerSet1, % index of first walker in set 1 handle by this engine - firstWalkerSet2, % index of first walker in set 2 handle by this engine - - % set = 0, % indicates which set to update (1: first set, 2: second set) - fit = open_fit(), % the fit object, will fail if no model is loaded - numberParameters = num_free_params(), % number of fit parameters - numberWalkerRolls, % number of random numbers each walker uses - }; - - engine.totalNumberSet1 = totalNumberWalkers/2; - engine.totalNumberSet2 = totalNumberWalkers - engine.totalNumberSet1; - - % divide walkers evenly (remainders are given to highest ids) - engine.numberWalkers = engine.totalNumberWalkers/engine.numberEngines - + 1 - ((engine.totalNumberWalkers mod engine.numberEngines) <= engine.id); - engine.numberWalkersSet1 = engine.numberWalkers/2; - engine.numberWalkersSet2 = engine.numberWalkers-engine.numberWalkersSet1; - - % master stores all values - variable size = id ? engine.numberWalkers : engine.totalNumberWalkers; - variable j; - engine.walkers = Array_Type[size]; - engine.pivots = Array_Type[size]; - engine.rolls = Double_Type[size]; - engine.update = Int_Type[size]; - engine.stat = Double_Type[size]; - - _for j (0, size-1) { - engine.walkers[j] = Double_Type[engine.numberParameters]; - engine.pivots[j] = Double_Type[engine.numberParameters]; - } - - return engine; -} -%}}} - -private define emceeSetupGears (engine) %{{{ -{ - variable gears = struct { - initialize, % initialize function - step, % step function - random_uniform, % uniform random number generator (float) - random_pick, % uniform random number generator (int) - read, % read handle - write, % write handle (may be null, which means that read handle is used) - }; - - gears.move = NULL; - - if (0 == engine.id) { - % load or init, open file/use open file - } - - return gears; -} -%}}} - -private define emceeReleaseWalkers (engine) %{{{ -{ -} -%}}} - -private define emceeMoveWalkers (engine) %{{{ -{ -} -%}}} - -private define emceeCatchWalkers (engine) %{{{ -{ -} -%}}} - -%{{{ some helpers - -% get walkers per node -private define distributeWalkers (numNodes, numWalkers) %{{{ -{ - variable walkersPerNode = Int_Type[numNodes]; - variable n = numWalkers/numNodes + 1; - variable missing = numNodes - (numWalkers mod numNodes); - variable set1PerNode, set2PerNode; % TODO: does not have to be array - - walkersPerNode[[0:numNodes-missing-1]] = n; - walkersPerNode[[numNodes-missing:numNodes-1]] = n-1; - - set2PerNode = walkersPerNode >> 1; % half walkers handled by each node per set - set1PerNode = walkersPerNode - set2PerNode; - - return set1PerNode, set2PerNode; -} -%}}} - -% get walkers handled up to this node -private define getHandledNumberWalkers (set1PerNode, set2PerNode) %{{{ -{ - variable l = length(set1PerNode); - variable set1Handled = Int_Type[l]; - variable set2Handled = Int_Type[l]; - variable i, c1 = 0, c2 = 0; - - _for i (0, l-1, 1) { - set1Handled[i] = c1; - set2Handled[i] = c2; - c1 += set1Handled[i]; - c2 += set2Handled[i]; - } - - return set1Handled, set2Handled; -} -%}}} - -% string similar to statistic or fit method definitions "name; option1=foo, option2=bar" -private define emceeInterfaceSetup (execString, type) %{{{ -{ - if (NULL == execString || typeof(execString) != String_Type) - throw UsageError, sprintf("Unable to parse qualifier for %s", type); - - variable s = strchop(execString, ';', 0); - - if (length(s) > 2) - throw UsageError, sprintf("Failed parsing option '%s'", execString); - - variable fname = strtrim(s[0]); - variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); - - if (NULL == f) - throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); - - variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; - - return (@f)(;; opt); -} -%}}} - -private define emceeMPISetupNode (node, numNodes, numTotalWalkers, numRands) %{{{ -{ - variable set1PerNode, set2PerNode; - variable set1Handled, set2Handled; - variable set2Len = numTotalWwalkers >> 1; % split walker in two sets - variable set1Len = numTotalWalkers - set2Len; - variable j; - - (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); - (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); - - variable N = struct { - node = node, % this nodes number (0 is master) - num_nodes = num_nodes, % total number of nodes - walkers, % current walker position - pivots, % current pivot positions, should not change for one loop! - randoms, % required randoms - update, % updater track - stat, % step statistic - total_walkers, % number of totals walkers - set1 = set1_len, % walkers for 1st update - set2 = set2_len, % walkers set for 2nd update - set1_per_node = set1_per_node, % walkers of set1 processed per node - set2_per_node = set2_per_node, % walkers of set2 processed per node - set1_handled = set1_handled, % walkers of set1 handled by all previous nodes - set2_handled = set2_handled, % walkers of set2 handled by all previous nodes - set = 0, % indicates which set to update (1: first set, 2: second set) - fit = open_fit(), % the fit object, will fail if no model is loaded - num_pars = num_free_params(), % number of fit parameters - num_rands = nrands, % number of random numbers per walker - }; - - if (node) { % setup for slaves - N.walkers = Array_Type[set1_per_node[node]+set2_per_node[node]]; - N.pivots = Array_Type[set1_per_node[node]+set2_per_node[node]]; - N.update = Int_Type[set1_per_node[node]+set2_per_node[node]]; - N.stat = Double_Type[set1_per_node[node]+set2_per_node[node]]; - N.randoms = Double_Type[(set1_per_node[node]+set2_per_node[node])*nrands]; - } else { % setup for master - N.walkers = Array_Type[total_walkers]; - N.pivots = Array_Type[total_walkers]; - N.update = Int_Type[total_walkers]; - N.stat = Double_Type[total_walkers]; - N.randoms = Double_Type[total_walkers*nrands]; - } - - _for j (0, length(N.walkers)-1, 1) { - N.walkers[j] = Double_Type[N.num_pars]; - N.pivots[j] = Double_Type[N.num_pars]; - } - - return N; -} -%}}} - -%}}} - -%{{{ mpi functions - -private define release_walkers_mpi_master (node, num_nodes, N) -{ -#ifexists rcl_mpi_init - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - - if (NULL == urand) - throw InternalError, "Missing random generator"; - - if (NULL == upick) - throw InternalError, "Missing random generator"; - - % select pivot walkers for current set - variable set_len, complement_len, len_offset; - variable set_per_node, set_handled; - variable pick; - - if (1 == N.set) { % - set_len = N.set1; - complement_len = N.set2; - len_offset = 0; - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - } else if (2 == N.set) { - set_len = N.set2; - complement_len = N.set1; - len_offset = N.set1; - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - } - pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set - - variable this_walkers, this_pivots, this_randoms; - variable i,j; - - % set current walkers and complement pivots - _for i (0, set_len-1, 1) - N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points - - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] - = (@urand)(set_len*N.num_rands); % get new random numbers for current set - - _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data - % set the walkers for node i - this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; - % pick the pivots for node i - this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; - % set the randoms for node i - this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1] - +(set_handled[i]+len_offset)*N.num_rands]; - - _for j (0, set_len-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], - length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], - length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 - } - - () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1] - +len_offset*N.num_rands], - set_len*N.num_rands, i, 2); % send random numbers with tag 2 - } -#endif -} - -private define release_walkers_mpi_slave (node, num_nodes, N) -{ -#ifexists rcl_mpi_init - variable set_len, len_offset; - variable this_randoms; - - if (1 == N.set) { - set_len = N.set1_per_node; - len_offset = 0; - } else if (2 == N.set) { - set_len = N.set2_per_node; - len_offset = N.set1_per_node[node]; - } - - variable j; - this_randoms = Double_Type[set_len*N.num_rands]; - _for j (0, set_len-1, 1) { - () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], - length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], - length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) - } - () = rcl_mpi_org_recv_double(this_randoms, - set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) - - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; -#endif -} - -private define release_walkers_mpi (node, num_nodes, N) { - % distribute walkers, pivots and random numbers - - if (node) - release_walkers_mpi_slave(node, num_nodes, N); - else - release_walkers_mpi_master(node, num_nodes, N;; __qualifiers()); -} - -private define catch_walkers_mpi_master (node, num_nodes, N) -{ -#ifexists rcl_mpi_init - variable set_len, len_offset, set_per_node, set_handled; - - if (1 == N.set) { - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - len_offset = 0; - } else if (2 == N.set) { - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - len_offset = N.set1; - } - - variable i,j; - variable this_stat, this_update; % we have to use intermediate storage, - % slang creates a copy of an array when addressed by index - - _for i (1, num_nodes-1, 1) { - this_stat = Double_Type[set_per_node[i]]; - this_update = Int_Type[set_per_node[i]]; - - _for j (0, set_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], - length(N.walkers[0]), i, i); - - () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); - () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); - - N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; - N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; - } -#endif -} - -private define catch_walkers_mpi_slave (node, num_nodes, N) -{ -#ifexists rcl_mpi_init - variable set_len, len_offset; - - if (1 == N.set) { - set_len = N.set1_per_node[node]; - len_offset = 0; - } else if (2 == N.set) { - set_len = N.set2_per_node[node]; - len_offset = N.set1_per_node[node]; - } - - variable i; - _for i (0, set_len-1, 1) - () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], - length(N.walkers[len_offset]), 0, node); - - () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); - () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); -#endif -} - -private define catch_walkers_mpi (node, num_nodes, N) -{ - if (node) - catch_walkers_mpi_slave(node, num_nodes, N); - else - catch_walkers_mpi_master(node, num_nodes, N); -} - -private define emceeMPIInitFile (emceeT) %{{{ -{ - variable init = qualifier("init", NULL); - variable cont = qualifier("continue", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write", NULL); - variable load_hook = qualifier("read", NULL); - variable load = qualifier("load", NULL); - - % read the settings, if one of them is NULL help was called - io = emcee_call_setup_fun(io, "io"); - load_hook = emcee_call_setup_fun(load_hook, "io"); - init = emcee_call_setup_fun(init, "init"); - if (NULL == io || NULL == load_hook || NULL == init) return; - - ifnot (node) { % master only - if (cont != NULL) - io.__f_open(cont, emceeT.fit, emcee.walkers); - else if (load != NULL) { - load_hook.__f_read(load, emceeT.walkers); - load_hook.__f_close(); - io.__f_create(output, emceeT.fit, emceeT.numTotalWalkers); - } else { - io.__f_create(output, emceeT.fit, total_walkers); - init.__f(emceeT.walkers, emceeT.fit); - } - } -} -%}}} - -private define emceeMPIInit () %{{{ -{ - variable move = qualifier("move", NULL); - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - - variable node, numNodes; -#ifexists rcl_mpi_init - node = rcl_mpi_init(); - numNodes = rcl_mpi_numtasks(); - numNodes = (numNodes<1) ? 1 : numNodes; - rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! - % BUT: since we have a memory leak from the mpi module anyway we accept it currently ... -#else - node = 0; - num_nodes = 1; -#endif - - move = emceeInterfaceSetup(move, "move"); - - % This is the mpi_emcee handle for this node! - % It contains all relevant data to do the calculation - variable emceeHandle = setup_node(node, num_nodes, total_walkers, move.nrands); - -} -%}}} - -%}}}% - -private define emcee_mpi (walker_per_par, number_par, steps) { - variable total_walkers = walker_per_par*number_par; - - variable init = qualifier("init", NULL); - variable move = qualifier("move", NULL); - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - variable cont = qualifier("continue", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write", NULL); - variable load_hook = qualifier("read", NULL); - variable load = qualifier("load", NULL); - - variable node, num_nodes; - (node, num_nodes) = emceeMPIInit(); - - move = emcee_call_setup_fun(move, "move"); - variable this = setup_node(node, num_nodes, total_walkers, move.nrands); - % 'this' is the mpi_emcee handle for this node! - % It contains all relevant data to do the calculation - - % read the settings, if one of them is NULL help was called - io = emcee_call_setup_fun(io, "io"); - load_hook = emcee_call_setup_fun(load_hook, "io"); - init = emcee_call_setup_fun(init, "init"); - if (NULL == io || NULL == load_hook || NULL == init) return; - - ifnot (node) { % master only - if (cont != NULL) - io.__f_open(cont, this.fit, this.walkers); - else if (load != NULL) { - load_hook.__f_read(load, this.walkers); - load_hook.__f_close(); - io.__f_create(output, this.fit, total_walkers); - } else { - io.__f_create(output, this.fit, total_walkers); - init.__f(this.walkers, this.fit); - } - } - - variable collector_length; % the collector so we can skip turns before writing to disk - variable walker_cycle; - variable update_cycle; - variable stat_cycle; - variable cycle_step = 0; - - % setup space - variable j; - if (node) % slave - collector_length = 0; - else % master - collector_length = length(this.walkers)*io.cycle; - - walker_cycle = Array_Type[collector_length]; - update_cycle = Int_Type[collector_length]; - stat_cycle = Double_Type[collector_length]; - - if (NULL == urand || NULL == upick) - throw InternalError, "Missing random number generator"; - - % evaluate the model at the walker positions to get the statistics - % and if not continuing a chain, write them out - ifnot (node) { - _for j (0, length(this.walkers)-1, 1) { - this.stat[j] = this.fit.eval_statistic(this.walkers[j]); - this.update[j] = 1; - } - if (cont == NULL) - io.__f_initwrite(this.fit, this.walkers, this.update, this.stat); - } - - % the main loop where the magic happens - variable s, walker, update, stat, set, set_len, len_offset; - _for s (0, steps-1, 1) { - cycle_step = s mod io.cycle; - _for set (1, 2, 1) { - this.set = set; - - release_walkers_mpi(node, - num_nodes, - this; upick=upick, urand=urand); % release walkers to freedom ... - - if (1 == set) { - set_len = this.set1_per_node[node]; - len_offset = 0; - } else if (2 == set) { - set_len = this.set2_per_node[node]; - len_offset = this.set1_per_node[node]; - } - - _for j (0, set_len-1, 1) { % ... let them move ... - (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], - this.pivots[j+len_offset], - this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], - this.stat[j+len_offset]); - this.walkers[j+len_offset] = walker; - this.update[j+len_offset] = update; - this.stat[j+len_offset] = stat; - } - - catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! - } - - % if cycle end is reached write the chain - ifnot (node) { % master only - if (not cycle_step && s > 0) - io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); - _for j (0, length(this.walkers)-1, 1) { - walker_cycle[j+cycle_step*length(this.walkers)] = @(this.walkers[j]); - update_cycle[j+cycle_step*length(this.walkers)] = this.update[j]; - stat_cycle[j+cycle_step*length(this.walkers)] = this.stat[j]; - } - } - } - - % we might have unwritten steps left, so better write them here - cycle_step++; % the last step is never written in the loop, we have to deal with it here - ifnot (node) { - io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], - update_cycle[[:cycle_step*length(this.walkers)-1]], - stat_cycle[[:cycle_step*length(this.walkers)-1]]); - } - - % and finally call the finalizing function - ifnot (node) { % master only - io.__f_finalize(steps, walker_per_par, number_par, this.fit); - io.__f_close(); - } -} -%}}}% diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 17bd8e27..42acc4a4 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1,457 +1,1414 @@ % -*- mode: slang; mode: fold; -*- % require("rand"); - -%{{{% some helpers ... -% get walkers per node -private define distribute_walkers (nodes, number_walkers) { - variable walkers_per_node = Int_Type[nodes]; - variable n = number_walkers/nodes + 1; - variable missing = nodes - (number_walkers mod nodes); - variable set1_per_node, set2_per_node; +require("fork"); +require("socket"); + +% Implementation of the emcee hammer () with the principle idea +% that multiple nodes (engines) are responsible for a part of the +% walkers. For efficiency the walkers are distributed equally to +% each engine. To keep the statistical properties the walkers are +% seperated in to two groups (see ref) where the next step of +% group one depends on the current position of group two and the +% next step of group two depends on the new position of group one. +% For most efficiency we try to reduce the required computations +% to the minimum possible such that the model evaluation plus +% the necessary communication is everything that happens in the +% main loop. +% +% To prevent any side effects from the PRNG we let the master +% calculate enough for each step and distribute them to the +% slaves. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +private variable EmceeInitRegister = Assoc_Type[Ref_Type, &NULL]; +private variable EmceeStepRegister = Assoc_Type[Ref_Type, &NULL]; +private variable EmceeFileRegister = Assoc_Type[Ref_Type, &NULL]; +private variable EmceeShipRegister = Assoc_Type[Ref_Type, &NULL]; + +% Engine and Leader %{{{ + +private variable EmceeEngine = struct { + % engine numbers + id, % engine id (master is 0) + numberEngines, % total number of engines + numberSteps, % total number of steps + + % arrays and length + walkers, % walker array for this engine (master has all) + pivots, % pivot array for this engine (master has all) + rolls, % random numbers required (master has all) + update, % update indicator array (master has all) + stat, % statistic array (master has all) + totalNumberWalkers, % total number of walkers + totalNumberSet1, % total number of walkers in set 1 + totalNumberSet2, % total number of walkers in set 2 + + % per engine walkers + numberWalkers, % number of walkers this engine handles + numberWalkersSet1, % number of walkers this engine handles in set 1 + numberWalkersSet2, % number of walkers this engine handles in set 2 + + % selected set + setOffset, % current set offset (for access in walker array) + setLength, % current set length + + % fit + fit, % fit object + numberParameters, % number of (free) parameters + + gears = NULL, % step, random generators + leader = NULL, % write buffers etc. +}; + +private variable EmceeLeader = struct { + walkersPerEngine, % number of walkers for each engine id + walkersPerSet1, % number of walkers for each engine id in set 1 + walkersPerSet2, % number of walkers for each engine id in set 2 + walkersPerSet, % selected set walkers + totalOffset, % start of ALL walkers in set + writeBuffer, % total write buffer array + inFile, % input file handle + outFile, % output file handle +}; + +private variable EmceeGears = struct { + upick, % function + urand, % function + + step, % step interface +}; + +private define emceeDrawSet (engine, set) %{{{ +{ + variable urand = engine.gears.urand; + variable upick = engine.gears.upick; + + variable totalNumberSet, + totalNumberComplement, + totalOffset; + + if (1 == set) { + totalNumberSet = engine.totalNumberSet1; + totalNumberComplement = engine.totalNumberSet2; + totalOffset = 0; + engine.setOffset = 0; + engine.setLength = engine.numberWalkersSet1; + if (0 == engine.id) { + engine.leader.walkersPerSet = engine.leader.walkersPerSet1; + engine.leader.totalOffset = totalOffset; + } + } else if (2 == set) { + totalNumberSet = engine.totalNumberSet2; + totalNumberComplement = engine.totalNumberSet1; + totalOffset = engine.totalNumberSet1; + engine.setOffset = engine.numberWalkersSet1; + engine.setLength = engine.numberWalkersSet2; + if (0 == engine.id) { + engine.leader.walkersPerSet = engine.leader.walkersPerSet2; + engine.leader.totalOffset = totalOffset; + } + } + + % master picks new pivots & randoms + if (0 == engine.id) { + variable pick = @upick(0, totalNumberComplement-1, totalNumberSet)+totalOffset; + variable i; + _for i (0, totalNumberSet-1) + engine.pivots[i+totalOffset] = @(engine.walkers[pick[i]]); + + variable numberRandoms = engine.gears.step.numberRandoms; + engine.rolls[[0:totalNumberSet*numberRandoms-1]+totalOffset*numberRandoms] + = @urand(totalNumberSet*numberRandoms); + } +} +%}}} - walkers_per_node[[0:nodes-missing-1]] = n; - walkers_per_node[[nodes-missing:nodes-1]] = n-1; +private define emceeSetupGears (engine, urand, upick, step) %{{{ +{ + variable gears = struct { @EmceeGears }; + gears.urand = urand; + gears.upick = upick; + gears.step = step; + + % here we can set the rolls + engine.rolls = Double_Type[length(engine.walkers)*step.numberRandoms]; + + engine.gears = gears; +} +%}}} + +private define emceeSetupWriteBuffer (leader, numberWalkers, numberSteps) %{{{ +{ + % buffer size should ideally be as large as the write routine wants + % but is limited by the maximum array size and must be at least + % as large as one iteration requires + variable size = min([[leader.outFile.cycle, numberSteps]*numberWalkers, INT_MAX-(INT_MAX mod numberWalkers)]); + variable writeBuffer = struct { + size = size, + cycle = size/numberWalkers, + walkers = Array_Type[size], + update = Double_Type[size], + stat = Double_Type[size], + }; - set2_per_node = walkers_per_node >> 1; % half walkers handled by each node per set - set1_per_node = walkers_per_node - set2_per_node; + variable i; + _for i (0, size-1) + writeBuffer.walkers[i] = Double_Type[num_free_params()]; - return set1_per_node, set2_per_node; + leader.writeBuffer = writeBuffer; } +%}}} -% get walkers handled up to this node -private define previous_number_walkers (set1_per_node, set2_per_node) { - variable l = length(set1_per_node); - variable set1_handled = Int_Type[l]; - variable set2_handled = Int_Type[l]; - variable i, c1 = 0, c2 = 0; +private define emceeSetupLeader (engine, inFile, outFile) %{{{ +{ + if (0 == engine.id) { + variable nEngines = engine.numberEngines; + variable id; + variable leader = @EmceeLeader; + leader.walkersPerEngine = Int_Type[nEngines]; + leader.walkersPerSet1 = Int_Type[nEngines]; + leader.walkersPerSet2 = Int_Type[nEngines]; + leader.inFile = inFile; + leader.outFile = outFile; + + emceeSetupWriteBuffer(leader, engine.totalNumberWalkers, engine.numberSteps); + + variable set1 = engine.totalNumberSet1; + variable set2 = engine.totalNumberSet2; + + _for id (0, engine.numberEngines-1) { + leader.walkersPerSet1[id] = set1/nEngines + ((set1 mod nEngines) > (nEngines-id-1)); + leader.walkersPerSet2[id] = set2/nEngines + ((set2 mod nEngines) > (nEngines-id-1)); + } - _for i (0, l-1, 1) { - set1_handled[i] = c1; - set2_handled[i] = c2; - c1 += set1_handled[i]; - c2 += set2_handled[i]; + engine.leader = leader; } +} +%}}} - return set1_handled, set2_handled; +private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ +{ + variable engine = @EmceeEngine; + ship.engine = engine; + + variable set1 = totalNumberWalkers/2; + variable set2 = totalNumberWalkers - set1; + engine.totalNumberSet1 = set1; + engine.totalNumberSet2 = set2; + engine.totalNumberWalkers = totalNumberWalkers; + engine.numberSteps = totalSteps; + + % set sail (get number of engines and set id) + ship.setSail(); + + % divide walkers evenly (remainders are given to highest ids) + engine.numberWalkersSet1 = set1/engine.numberEngines + + ((set1 mod engine.numberEngines) > (engine.numberEngines-engine.id-1)); + engine.numberWalkersSet2 = set2/engine.numberEngines + + ((set2 mod engine.numberEngines) > (engine.numberEngines-engine.id-1)); + engine.numberWalkers = engine.numberWalkersSet1 + engine.numberWalkersSet2; + engine.numberParameters = num_free_params(); + engine.fit = open_fit(); + + % master stores all values + variable size = engine.id ? engine.numberWalkers : totalNumberWalkers; + + engine.walkers = Array_Type[size]; + engine.pivots = Array_Type[size]; + % rolls can only be set up after step is known + % engine.rolls = Double_Type[size*engine.gears.step.numberRandoms]; + engine.update = Int_Type[size]; + engine.stat = Double_Type[size] + DOUBLE_MAX; + + variable j; + _for j (0, size-1) { + engine.walkers[j] = Double_Type[engine.numberParameters]; + engine.pivots[j] = Double_Type[engine.numberParameters]; + } } +%}}} +%}}} + +%{{{ Init interface +%!%+ +%\function{emcee--init} +%\synopsis{Set emcee initialization function} +%\usage{init="method;parameters";} +%\description +% The initialization method can be set with the function string +% "method;parameter" +% +% Available methods: +% uniform : Draw initial walker positions from a uniform distribution +% within the parameter ranges. +% +% gauss : Draw initial walker positions from a gaussian distribution +% within parameter ranges. +% ; sigma : [=10.] Sigma of the gauss function in terms of the +% parameter range. +% +% file : Load initial walkers from a valid chain file created by the +% emcee method +% ; filename : The file to load +% +%!%- +% 1: pick - get walkers from parameters and distribution or file +private variable EmceeInit = struct { + pick, % function + + filename, % the filename (if any) + + % private data +}; + +%{{{ Uniform initialization function +% pick random parameter values within the boundaries +private define emceeInitUniformPick (init, engine) %{{{ +{ + variable i; + variable par = __parameters(engine.fit.object); + variable numParameter = length(par.value); -private define emcee_call_setup_fun (exec_string, type) % string similar to statistic or fit method definitions "name; option1=foo, option2=bar" + % throw an error on unspecified bounds + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges, unable to set inital walkers"; + + _for i (0, engine.totalNumberWalkers-1) + engine.walkers[i] = rand_uniform(numParameter)*(par.max-par.min)+par.min; +} +%}}} +%}}} +private define emceeInitUniform () %{{{ { - if (NULL == exec_string || typeof(exec_string) != String_Type) - throw UsageError, sprintf("Unable to parse qualifier for %s", type); + variable init = struct { @EmceeInit }; + init.pick = &emceeInitUniformPick; + init.filename = NULL; - variable s = strchop(exec_string, ';', 0); + return init; +} +%}}} +EmceeInitRegister["uniform"] = &emceeInitUniform; - if (length(s) > 2) - throw UsageError, sprintf("Failed parsing option '%s'", exec_string); +%{{{ Gauss initialization function +private define rand_gauss_cut (sigma, v, bmin, bmax) %{{{ +{ + variable upper = Real(cerf((bmax-v)/sqrt(2.)/sigma)); + variable lower = Real(cerf((bmin-v)/sqrt(2.)/sigma)); - variable fname = strtrim(s[0]); - variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); + return sqrt(2)*erfinv(rand_uniform(length(v))*(upper-lower)+lower)*sigma+v; +} +%}}} - if (NULL == f) - throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); - - variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; +private define emceeInitGaussPick (init, engine) %{{{ +{ + variable i,w; + variable par = __parameters(engine.fit.object); + variable numParameter = length(par.value); - return (@f)(;; opt); + % throw an error on unspecified bounds + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges, unable to set initial walkers"; + + variable sigma = (par.max-par.min)/init.sigma; + _for i (0, engine.totalNumberWalkers-1) + engine.walkers[i] = rand_gauss_cut(sigma, par.value, par.min, par.max); } +%}}} +%}}} +private define emceeInitGauss () %{{{ +{ + variable init = struct { @EmceeInit, sigma }; + init.pick = &emceeInitGaussPick; + init.filename = NULL; + init.sigma = qualifier("sigma", 10); -private define setup_node (node, num_nodes, total_walkers, nrands) { - variable set1_per_node, set2_per_node; - variable set1_handled, set2_handled; - variable set2_len = total_walkers >> 1; % split walker in two sets - variable set1_len = total_walkers - set2_len; - variable j; + return init; +} +%}}} +EmceeInitRegister["gauss"] = &emceeInitGauss; - (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); - (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); - - variable N = struct { - walkers, % current walker position - pivots, % current pivot positions, should not change for one loop! - randoms, % required randoms - update, % updater track - stat, % step statistic - total_walkers, % number of totals walkers - set1 = set1_len, % walkers for 1st update - set2 = set2_len, % walkers set for 2nd update - set1_per_node = set1_per_node, % walkers of set1 processed per node - set2_per_node = set2_per_node, % walkers of set2 processed per node - set1_handled = set1_handled, % walkers of set1 handled by all previous nodes - set2_handled = set2_handled, % walkers of set2 handled by all previous nodes - set = 0, % indicates which set to update (1: first set, 2: second set) - fit = open_fit(), % the fit object, will fail if no model is loaded - num_pars = num_free_params(), % number of fit parameters - num_rands = nrands, % number of random numbers per walker - }; +%{{{ file initialization function +private define fisher_yates (a, n) %{{{ +{ + if (n>length(a)) + return rand_int(0, length(a)-1, n); - if (node) { % setup for slaves - N.walkers = Array_Type[set1_per_node[node]+set2_per_node[node]]; - N.pivots = Array_Type[set1_per_node[node]+set2_per_node[node]]; - N.update = Int_Type[set1_per_node[node]+set2_per_node[node]]; - N.stat = Double_Type[set1_per_node[node]+set2_per_node[node]]; - N.randoms = Double_Type[(set1_per_node[node]+set2_per_node[node])*nrands]; - } else { % setup for master - N.walkers = Array_Type[total_walkers]; - N.pivots = Array_Type[total_walkers]; - N.update = Int_Type[total_walkers]; - N.stat = Double_Type[total_walkers]; - N.randoms = Double_Type[total_walkers*nrands]; + variable p = [length(a)-n:length(a)-1]; + variable j; + _for j (0, n-2) + array_swap(p, j, j+rand_int(0, n-1)); + return p; +} +%}}} + +private define emceeInitFilePick (init, engine) %{{{ +{ + variable file = engine.leader.inFile; + variable walkerDistribution, readNumber; + (walkerDistribution, readNumber) = file.read(engine, engine.totalNumberWalkers); + variable i,j; + % randomize (and bootstrap if necessary) + variable randomize = fisher_yates(walkerDistribution, readNumber); + variable parV = Double_Type[engine.numberParameters]; + _for i (0, length(engine.walkers)-1) { + _for j (0, length(parV)-1) + parV[j] = walkerDistribution[randomize[i]][j]; + engine.walkers[i] = @parV; } +} +%}}} +%}}} +private define emceeInitFile () %{{{ +{ + variable init = struct { @EmceeInit }; + init.pick = &emceeInitFilePick; + init.filename = qualifier("filename"); - _for j (0, length(N.walkers)-1, 1) { - N.walkers[j] = Double_Type[N.num_pars]; - N.pivots[j] = Double_Type[N.num_pars]; + return init; +} +%}}} +EmceeInitRegister["file"] = &emceeInitFile; +%}}} + +%{{{ Ship interface: +%!%+ +%\function{emcee--driver} +%\synopsis{Set emcee parallel computation method} +%\usage{driver="method;options"} +%\description +% The driver method can be set with the function string +% "method;parameter" +% +% Available methods: +% mpi : The mpi parallel driver using as many nodes as registered +% in an mpi environment +%!%- +% 1: setSail - set id for engines and how many there are +% 2: leader_send - leader sends to members +% 3: member_send - members send to leader +% 4: leader_receive - leader receives members +% 5: member_receive - member receive leader +% 6: enterHarbor - cleanup if necessary +private variable EmceeShip = struct { + setSail, % function + leaderSend, % function + memberSend, % function + leaderReceive, % function + memberReceive, % function + enterHarbor, % function + + engine, % the working horse + + % private data +}; + +%{{{ fork ship functions +private define elementType (t) %{{{ +{ + switch (t) + { case Char_Type: "c"; } + { case UChar_Type: "C"; } + { case Short_Type: "h"; } + { case UShort_Type: "H"; } + { case Int_Type: "i"; } + { case UInt_Type: "I"; } + { case Long_Type: "l"; } + { case ULong_Type: "L"; } + { case LLong_Type: "m"; } + { case ULLong_Type: "M"; } + { case Int16_Type: "j"; } + { case UInt16_Type: "J"; } + { case Int32_Type: "k"; } + { case UInt32_Type: "K"; } + { case Int64_Type: "q"; } + { case UInt64_Type: "Q"; } + { case Float_Type: "f"; } + { case Double_Type: "d"; } + { case Float32_Type: "F"; } + { case Float64_Type: "D"; } + { case String_Type: "s"; } + { case Null_Type: "x"; } +} +%}}} + +private define writeArray (fp, array) %{{{ +{ + variable bytes, msg, fmt; + fmt = sprintf("%s%d", elementType(_typeof(array)), length(array)); + msg = pack(fmt, array); + bytes = write(fp, msg); + return bstrlen(msg)-bytes; +} +%}}} + +private define readArray (fp, array) %{{{ +{ + variable i, bytes, msg, fmt; + fmt = sprintf("%s%d", elementType(_typeof(array)), length(array)); + bytes = read(fp, &msg, sizeof_pack(fmt)); + array[*] = unpack(fmt, msg); + return bstrlen(msg)-bytes; +} +%}}} + +private define emceeForkSetSail (ship) %{{{ +{ + variable sockRead, sockWrite; + variable pid, cid=0, t; + variable flags; + + _for cid (1, ship.tasks-1) { + (sockRead, sockWrite) = socketpair(AF_UNIX, SOCK_STREAM, 0); + pid = fork(); + if (pid == -1) + throw InternalError, sprintf("Unable to fork process %d", cid); + else if (pid == 0) { + close(sockWrite); + ship.socket = sockRead; + break; + } else { % set master pipes + if (NULL == ship.socket) + ship.socket = FD_Type[ship.tasks]; + flags = fcntl_getfd(sockWrite); +% fcntl_setfd(sockWrite, flags | O_NONBLOCK); + ship.socket[cid] = sockWrite; + close(sockRead); + } } - return N; + ship.engine.id = (pid == 0) ? cid : 0; + ship.engine.numberEngines = ship.tasks; +} +%}}} + +private define emceeForkLeaderSend (ship) %{{{ +{ + variable walkers, + pivots, + rolls; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + + variable i,j; + variable firstIndex = walkersPerSet[0]; + _for i (1, engine.numberEngines-1) { + % set the walkers for node i + walkers = engine.walkers[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; + % pick the pivots for node i + pivots = engine.pivots[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; + % set the randoms for node i + rolls = engine.rolls[[0:walkersPerSet[i]*engine.gears.step.numberRandoms-1] + +(firstIndex+totalOffset)*engine.gears.step.numberRandoms]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = writeArray(ship.socket[i], walkers[j]); + () = writeArray(ship.socket[i], pivots[j]); + } + () = writeArray(ship.socket[i], rolls); + firstIndex += walkersPerSet[i]; + } } +%}}} -%}}}% +private define emceeForkMemberSend (ship) %{{{ +{ + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; -%{{{% mpi functions + variable i; + _for i (0, setLength-1, 1) + () = writeArray(ship.socket, engine.walkers[i+setOffset]); -private define release_walkers_mpi_master (node, num_nodes, N) + () = writeArray(ship.socket, engine.update[[0:setLength-1]+setOffset]); + () = writeArray(ship.socket, engine.stat[[0:setLength-1]+setOffset]); +} +%}}} + +private define emceeForkLeaderReceive (ship) %{{{ { -#ifexists rcl_mpi_init - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - - if (NULL == urand) - throw InternalError, "Missing random generator"; - - if (NULL == upick) - throw InternalError, "Missing random generator"; - - % select pivot walkers for current set - variable set_len, complement_len, len_offset; - variable set_per_node, set_handled; - variable pick; - - if (1 == N.set) { % - set_len = N.set1; - complement_len = N.set2; - len_offset = 0; - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - } else if (2 == N.set) { - set_len = N.set2; - complement_len = N.set1; - len_offset = N.set1; - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - } - pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set - - variable this_walkers, this_pivots, this_randoms; + variable walker, + stat, + update; + + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + variable i,j; + variable firstIndex = walkersPerSet[0]; % skip master walkers + + walker = Double_Type[engine.numberParameters]; + _for i (1, engine.numberEngines-1) { + stat = Double_Type[walkersPerSet[i]]; + update = Int_Type[walkersPerSet[i]]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = readArray(ship.socket[i], walker); + engine.walkers[j+firstIndex+totalOffset][*] = walker; + } - % set current walkers and complement pivots - _for i (0, set_len-1, 1) - N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points + () = readArray(ship.socket[i], update); + () = readArray(ship.socket[i], stat); + + engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; + engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; + + firstIndex += walkersPerSet[0]; + } +} +%}}} - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] - = (@urand)(set_len*N.num_rands); % get new random numbers for current set +private define emceeForkMemberReceive (ship) %{{{ +{ + variable rolls, + param; + + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + variable nRolls = engine.gears.step.numberRandoms; + + variable j; + rolls = Double_Type[setLength*nRolls]; + param = Double_Type[engine.numberParameters]; + _for j (0, setLength-1, 1) { + () = readArray(ship.socket, param); + engine.walkers[j+setOffset][*] = param; + () = readArray(ship.socket, param); + engine.pivots[j+setOffset][*] = param; + } + () = readArray(ship.socket, rolls); + + engine.rolls[[0:setLength*nRolls-1]+setOffset*nRolls] = @rolls; +} +%}}} - _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data +private define emceeForkEnterHarbor (ship) %{{{ +{ + variable id; + if (ship.engine.id == 0) { + _for id (1, ship.engine.numberEngines-1) + close(ship.socket[id]); + } else { + close(ship.socket); + exit(); + } +} +%}}} +%}}} +private define emceeShipFork () %{{{ +{ + variable ship = struct { @EmceeShip, socket, tasks }; + ship.setSail = &emceeForkSetSail; + ship.leaderSend = &emceeForkLeaderSend; + ship.memberSend = &emceeForkMemberSend; + ship.leaderReceive = &emceeForkLeaderReceive; + ship.memberReceive = &emceeForkMemberReceive; + ship.enterHarbor = &emceeForkEnterHarbor; + ship.tasks = qualifier("tasks", _num_cpus()); + + return ship; +} +%}}} +EmceeShipRegister["fork"] = &emceeShipFork; + +%{{{ MPI Ship functions +private define emceeMPISetSail (ship) %{{{ +{ + variable engine = ship.engine; + engine.id = rcl_mpi_init(); + engine.numberEngines = rcl_mpi_numtasks(); + rcl_init_mpi_request(engine.numberEngines); +} +%}}} + +private define emceeMPILeaderSend (ship) %{{{ +{ +#ifexists rcl_mpi_init + variable walkers, + pivots, + rolls; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + + variable i,j; + variable firstIndex = walkersPerSet[0]; % skip master walkers + + _for i (1, engine.numberEngines-1) { % loop over the slave nodes and send relevant data % set the walkers for node i - this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + walkers = engine.walkers[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; % pick the pivots for node i - this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + pivots = engine.pivots[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; % set the randoms for node i - this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1] - +(set_handled[i]+len_offset)*N.num_rands]; - - _for j (0, set_len-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], - length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], - length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 + rolls = engine.rolls[[0:walkersPerSet[i]*engine.gears.step.numberRandoms-1] + +(firstIndex+totalOffset)*engine.gears.step.numberRandoms]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = rcl_mpi_org_isend_double(walkers[j], length(walkers[j]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(pivots[j], length(pivots[j]), i, 1); % send pivots from other set with tag 1 } - () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1] - +len_offset*N.num_rands], - set_len*N.num_rands, i, 2); % send random numbers with tag 2 + () = rcl_mpi_org_isend_double(rolls, length(rolls), i, 2); % send random numbers with tag 2 + firstIndex += walkersPerSet[i]; } #endif } +%}}} -private define release_walkers_mpi_slave (node, num_nodes, N) +private define emceeMPILeaderReceive (ship) %{{{ { #ifexists rcl_mpi_init - variable set_len, len_offset; - variable this_randoms; + variable walker, + stat, + update; - if (1 == N.set) { - set_len = N.set1_per_node; - len_offset = 0; - } else if (2 == N.set) { - set_len = N.set2_per_node; - len_offset = N.set1_per_node[node]; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + + variable i,j; + variable firstIndex = walkersPerSet[0]; % skip master walkers + + walker = Double_Type[engine.numberParameters]; + _for i (1, engine.numberEngines-1) { + stat = Double_Type[walkersPerSet[i]]; + update = Int_Type[walkersPerSet[i]]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = rcl_mpi_org_recv_double(walker, length(walker), i, i); + engine.walkers[j+firstIndex+totalOffset][*] = walker; +% vmessage("recv %d: (%g, %g)", j+firstIndex+totalOffset, +% engine.walkers[j+firstIndex+totalOffset][0], engine.walkers[j+firstIndex+totalOffset][1]); + } + + () = rcl_mpi_org_recv_int(update, length(update), i, i); + () = rcl_mpi_org_recv_double(stat, length(stat), i, i); + + engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; + engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; + + firstIndex += walkersPerSet[i]; + } +#endif +} +%}}} + +private define emceeMPIMemberSend (ship) %{{{ +{ +#ifexists rcl_mpi_init + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + + variable i; + _for i (0, setLength-1, 1) { +% vmessage("send: (%g, %g)", engine.walkers[i+setOffset][0], engine.walkers[i+setOffset][1]); + () = rcl_mpi_org_isend_double(engine.walkers[i+setOffset], + length(engine.walkers[0]), 0, engine.id); } + () = rcl_mpi_org_isend_int(engine.update[[0:setLength-1]+setOffset], setLength, 0, engine.id); + () = rcl_mpi_org_isend_double(engine.stat[[0:setLength-1]+setOffset], setLength, 0, engine.id); +#endif +} +%}}} + +private define emceeMPIMemberReceive (ship) %{{{ +{ +#ifexists rcl_mpi_init + variable rolls; + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + variable nRolls = engine.gears.step.numberRandoms; + variable j; - this_randoms = Double_Type[set_len*N.num_rands]; - _for j (0, set_len-1, 1) { - () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], - length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], - length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) + rolls = Double_Type[setLength*nRolls]; + _for j (0, setLength-1, 1) { + () = rcl_mpi_org_recv_double(engine.walkers[j+setOffset], + length(engine.walkers[j+setOffset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(engine.pivots[j+setOffset], + length(engine.pivots[j+setOffset]), 0, 1); % receive pivot points (tag 1) } - () = rcl_mpi_org_recv_double(this_randoms, - set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + () = rcl_mpi_org_recv_double(rolls, + setLength*engine.gears.step.numberRandoms, 0, 2); % receive random numbers (tag 2) - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; + engine.rolls[[0:setLength*nRolls-1]+setOffset*nRolls] = @rolls; #endif } +%}}} + +private define emceeMPIEnterHarbor (ship) %{{{ +{ + +} +%}}} +%}}} +private define emceeShipMPI () %{{{ +{ + variable ship = struct { @EmceeShip }; + ship.setSail = &emceeMPISetSail; + ship.leaderSend = &emceeMPILeaderSend; + ship.memberSend = &emceeMPIMemberSend; + ship.leaderReceive = &emceeMPILeaderReceive; + ship.memberReceive = &emceeMPIMemberReceive; + ship.enterHarbor = &emceeMPIEnterHarbor; + + return ship; +} +%}}} +EmceeShipRegister["mpi"] = &emceeShipMPI; +%}}} + +%{{{ File interface: +%!%+ +%\function{emcee--file} +%\synopsis{Set emcee file input and output methods} +%\usage{input="method;options" +% \altusage{output="method;options"}} +%\description +% The file inpu/output methods can be set with the function string +% "method;parameter" +% +% Available methods: +% fit : Fits file interface to write the chain as fits table extension +%!%- +% 1: create - open new file pointer and write necessary intial values +% 2: open - open existing file for read/write +% 3: read - open file and return n walkers and how many walkers were used +% 4: write - write cycle steps to the file (n) +% 5: close - close open file at end +private variable EmceeFile = struct { + create, % function + open, % function + read, % function + write, % function + close, % function + + mode, % 0 read, 1 write, 2 read | write + handle, % file handle + filename, % full file name + cycle, % number of steps before file gets written + + % additional private data +}; + +%{{{ Fits file functions + +% Create function %{{{ +private define __emceeFitsWriteT1(handle, engine) %{{{ +{ + variable dataInfo; + list_data(&dataInfo); + + variable par = __parameters(engine.fit.object); + variable params = get_params(); + variable numberTotalParams = length(params); + + %variable parNames = array_map(String_Type, &get_struct_field, get_params(), "name")[par.index-1]; + +% fits_create_binary_table(handle, "PARAMETERS", num_free_params(), +% ["FREE_PAR", "FREE_PAR_NAME"], +% ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, parNames)))], +% [" parameter indices", " parameter names"]); + variable paramsTable = struct { + name=String_Type[numberTotalParams], + index=Int_Type[numberTotalParams], + value=Double_Type[numberTotalParams], + min=Double_Type[numberTotalParams], + max=Double_Type[numberTotalParams], + hard_min=Double_Type[numberTotalParams], + hard_max=Double_Type[numberTotalParams], + freeze=Int_Type[numberTotalParams], + tie=String_Type[numberTotalParams], + units=String_Type[numberTotalParams], + fun=String_Type[numberTotalParams], + free=Int_Type[numberTotalParams], % combines freeze, fun and tie + }; + variable j; + _for j (0, numberTotalParams-1) { + paramsTable.name[j] = params[j].name; + paramsTable.index[j] = params[j].index; + paramsTable.value[j] = params[j].value; + paramsTable.min[j] = params[j].min; + paramsTable.max[j] = params[j].max; + paramsTable.hard_min[j] = params[j].hard_min; + paramsTable.hard_max[j] = params[j].hard_max; + paramsTable.freeze[j] = params[j].freeze; + paramsTable.tie[j] = (params[j].tie == NULL) ? "" : params[j].tie; + paramsTable.units[j] = params[j].units; + paramsTable.fun[j] = (params[j].fun == NULL) ? "" : params[j].fun; + paramsTable.free[j] = (not params[j].freeze) and (params[j].fun == NULL) and (params[j].tie == NULL); + } + fits_write_binary_table(handle, "PARAMETERS", paramsTable); + + fits_update_key(handle, "MODEL", get_fit_fun(), "model function"); + fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + fits_update_key(handle, "SLOPPY", 0, " sloppy level"); + + array_map(&fits_write_comment, handle, strchop(dataInfo, '\n', 0)); + + % sort to index order here +% if (_fits_write_col(handle, fits_get_colnum(handle, "FREE_PAR"), 1, 1, par.index[array_sort(par.index)]) +% && _fits_write_col(handle, fits_get_colnum(handle, "FREE_PAR_NAME"), 1, 1, parNames)) +% throw IOError; +} +%}}} -private define release_walkers_mpi (node, num_nodes, N) { - % distribute walkers, pivots and random numbers +private define __emceeFitsWriteT2(handle, engine) %{{{ +{ + variable par = __parameters(engine.fit.object); + + fits_create_binary_table(handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], + ["D", "J", ["D"][par.index*0]], + [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); + fits_update_key(handle, "NWALKERS", engine.totalNumberWalkers/engine.numberParameters, " Number of walkers per free parameter"); + fits_update_key(handle, "NFREEPAR", engine.numberParameters, " Number of free parameters"); + fits_update_key(handle, "NSTEPS", engine.numberSteps, " Numer of iteration steps done"); +} +%}}} - if (node) - release_walkers_mpi_slave(node, num_nodes, N); - else - release_walkers_mpi_master(node, num_nodes, N;; __qualifiers()); +private define __emceeFitsWriteT3(handle, engine) %{{{ +{ + fits_create_binary_table(handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); } +%}}} -private define catch_walkers_mpi_master (node, num_nodes, N) +private define emceeFileFitsCreate (file, engine) %{{{ { -#ifexists rcl_mpi_init - variable set_len, len_offset, set_per_node, set_handled; + file.mode = 1; + + % Create fits file and write headers + file.handle = fits_open_file(file.filename, "c"); + + % write first table + __emceeFitsWriteT1(file.handle, engine); + + % write second table + __emceeFitsWriteT2(file.handle, engine); + + % write third table + %__emceeFitsWriteT3(file.handle, engine); + + % move back to chain table + () = _fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); - if (1 == N.set) { - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - len_offset = 0; - } else if (2 == N.set) { - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - len_offset = N.set1; + % set write cycle + () = _fits_get_rowsize(file.handle, &(file.cycle)); + file.cycle = file.cycle/engine.totalNumberWalkers; + if (file.cycle < 1) + file.cycle = 1; + + % fits routine customs + file.numberSteps = 0; + file.sloppy = 0; +} +%}}} +%}}} + +% Open function %{{{ + +private define __emceeFitsReadChecks (file, engine) %{{{ +{ + variable handle = file.handle; + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; } - variable i,j; - variable this_stat, this_update; % we have to use intermediate storage, - % slang creates a copy of an array when addressed by index + if (fits_read_key(handle, "MODEL") != get_fit_fun()) { + fits_close_file(handle); + handle = NULL; + throw IsisError, "Current model and chain model do not match"; + } - _for i (1, num_nodes-1, 1) { - this_stat = Double_Type[set_per_node[i]]; - this_update = Int_Type[set_per_node[i]]; + variable tab = fits_read_table(handle); + ifnot (struct_field_exists(tab, "free") + || struct_field_exists(tab, "value")) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } - _for j (0, set_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], - length(N.walkers[0]), i, i); + variable par = __parameters(engine.fit.object); + if ((length(where(tab.free)) != num_free_params()) + || any(tab.index[where(tab.free)] != par.index[array_sort(par.index)])) { + fits_close_file(handle); + handle = NULL; + throw UsageError, "Free parameters and chain parameters differ"; + } +} +%}}} - () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); - () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); +private define __emceeFitsWriteChecks (file, engine) %{{{ +{ + variable handle = file.handle; + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } - N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; - N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; + if ((fits_read_key(handle, "STATISTIC") != get_fit_statistic()) && (file.sloppy<2)) { + fits_close_file(handle); + handle = NULL; + throw UsageError, "Current fit statistic and chain fit statistic differ, increase sloppy level (at least 2) to continue anyway"; + } + fits_update_key(handle, "STATISTIC", get_fit_statistic()); + + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + if ((fits_read_key(handle, "NWALKERS")*fits_read_key(handle, "NFREEPAR")) != length(engine.walkers) + && (handle.sloppy<1)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Number of walkers differs from number used in chain file, increase sloppy level to continue"; } -#endif } +%}}} -private define catch_walkers_mpi_slave (node, num_nodes, N) +private define emceeFileFitsOpen (file, engine) %{{{ { -#ifexists rcl_mpi_init - variable set_len, len_offset; + file.mode = 2; + + file.handle = fits_open_file(file.filename, "w"); - if (1 == N.set) { - set_len = N.set1_per_node[node]; - len_offset = 0; - } else if (2 == N.set) { - set_len = N.set2_per_node[node]; - len_offset = N.set1_per_node[node]; + __emceeFitsReadChecks(file, engine); + __emceeFitsWriteChecks(file, engine); + + if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(file.handle); + file.handle = NULL; + throw IOError, "Not a emcee chain file"; } - variable i; - _for i (0, set_len-1, 1) - () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], - length(N.walkers[len_offset]), 0, node); + () = _fits_get_rowsize(file.handle, &(file.cycle)); + file.cycle = file.cycle/length(engine.walkers); + if (file.cycle < 1) + file.cycle = 1; - () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); - () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); -#endif + file.numberSteps = fits_get_num_rows(file.handle); } +%}}} -private define catch_walkers_mpi (node, num_nodes, N) +%}}} + +% Read function %{{{ +private define emceeFileFitsRead (file, engine, numberWalkers) %{{{ { - if (node) - catch_walkers_mpi_slave(node, num_nodes, N); - else - catch_walkers_mpi_master(node, num_nodes, N); + file.mode = 0; + + file.handle = fits_open_file(file.filename, "r"); + + __emceeFitsReadChecks(file, engine); + + if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(file.handle); + file.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + variable totalNumberWalkers = fits_read_key(file.handle, "NWALKERS") + *fits_read_key(file.handle, "NFREEPAR"); + variable numberParameters = fits_get_num_cols(file.handle); + variable totalNumberRecords = fits_get_num_rows(file.handle); + variable walkerDistribution; % this has to be an array of arrays with the parameter distribution in each + () = _fits_read_cols(file.handle, + [2:numberParameters], + max([0, totalNumberRecords-numberWalkers]), + numberWalkers, + &walkerDistribution); + + fits_close_file(file.handle); + + return walkerDistribution, totalNumberWalkers; } +%}}} +%}}} -private define emcee_mpi (walker_per_par, number_par, steps) { - variable total_walkers = walker_per_par*number_par; +% Write function %{{{ +private define emceeFileFitsWrite (file, engine, numberWalkersSteps) %{{{ +{ + if (numberWalkersSteps > engine.leader.writeBuffer.size) + throw InternalError, "Trying to write more than accessible"; - variable init = qualifier("init", NULL); - variable move = qualifier("move", NULL); - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - variable cont = qualifier("continue", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write", NULL); - variable load_hook = qualifier("read", NULL); - variable load = qualifier("load", NULL); + variable par = __parameters(engine.fit.object); + variable npar = engine.numberParameters; - variable node, num_nodes; + %variable walkersPerCycle = engine.leader.writeBuffer.size; % total_walkers*steps_per_cycle + variable i,j; + variable firstIndex = fits_get_num_rows(file.handle)+1; % first index of this cycle + variable parCycle = Double_Type[numberWalkersSteps]; + + _for j (0, npar-1, 1) { + _for i (0, numberWalkersSteps-1, 1) + parCycle[i] = engine.leader.writeBuffer.walkers[i][j]; + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, sprintf("CHAINS%d", par.index[j])), + firstIndex, + 1, + parCycle); + } + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, "FITSTAT"), + firstIndex, + 1, + engine.leader.writeBuffer.stat[[:numberWalkersSteps-1]]); + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, "UPDATE"), + firstIndex, + 1, + engine.leader.writeBuffer.update[[:numberWalkersSteps-1]]); +} +%}}} +%}}} -#ifexists rcl_mpi_init - node = rcl_mpi_init(); - num_nodes = rcl_mpi_numtasks(); - num_nodes = (num_nodes<1) ? 1 : num_nodes; - rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! - % BUT: since we have a memory leak from the mpi module anyway we accept it currently ... -#else - node = 0; - num_nodes = 1; -#endif +% Close function %{{{ +private define emceeFileFitsClose (file, engine) %{{{ +{ + % todo: write fitstat table + variable nHDUs = fits_get_num_hdus(file.handle); + variable i; - move = emcee_call_setup_fun(move, "move"); - variable this = setup_node(node, num_nodes, total_walkers, move.nrands); - % 'this' is the mpi_emcee handle for this node! - % It contains all relevant data to do the calculation - - % read the settings, if one of them is NULL help was called - io = emcee_call_setup_fun(io, "io"); - load_hook = emcee_call_setup_fun(load_hook, "io"); - init = emcee_call_setup_fun(init, "init"); - if (NULL == io || NULL == load_hook || NULL == init) return; - - ifnot (node) { % master only - if (cont != NULL) - io.__f_open(cont, this.fit, this.walkers); - else if (load != NULL) { - load_hook.__f_read(load, this.walkers); - load_hook.__f_close(); - io.__f_create(output, this.fit, total_walkers); - } else { - io.__f_create(output, this.fit, total_walkers); - init.__f(this.walkers, this.fit); + if (file.mode) { + _for i (1, nHDUs) { + () = _fits_movabs_hdu(file.handle, i); + fits_write_chksum(file.handle); } } - variable collector_length; % the collector so we can skip turns before writing to disk - variable walker_cycle; - variable update_cycle; - variable stat_cycle; - variable cycle_step = 0; + fits_close_file(file.handle); +} +%}}} +%}}} +%}}} +private define emceeFileFits () %{{{ +{ + variable file = struct { @EmceeFile, numberSteps, sloppy }; + file.create = &emceeFileFitsCreate; + file.open = &emceeFileFitsOpen; + file.read = &emceeFileFitsRead; + file.write = &emceeFileFitsWrite; + file.close = &emceeFileFitsClose; - % setup space + file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.fits")); + file.cycle = 1; + + file.numberSteps = 0; + file.sloppy = 0; + + return file; +} +%}}} +EmceeFileRegister["fits"] = &emceeFileFits; +%}}} + +%{{{ Step interface: +%!%+ +%\function{emcee--step} +%\synopsis{Set emcee step algorithm} +%\usage{step="method;options"} +%\description +% The step algorithm can be set with the function string +% "method;parameter" +% +% Available algorithms: +% stretch : The stretch move as described in Goodman & Weare 2010 +% ; scale : [=2] Scale for the range of possible moves +%!%- +% 1: move - loop over walkers and update +private variable EmceeStep = struct { + move, % function + + numberRandoms, % random number required per step + + % private data +}; + +%{{{ Stretch move functions (Foreman & Mackey) +% define inverse cumulative distribution function for generating +% random numbers following 1/z^2 when z in [1/a, a] +% TODO: should make this an adjustable thing +private define stretchInverseCDF (u, a) %{{{ +{ + return (u*(a-1.)+1.)^2./a; +} +%}}} + +% stretch move as of Goodman & Weare 2010 +% Move must evaluate the fit function +private define emceeStepStretchMove (step, engine) %{{{ +{ variable j; - if (node) % slave - collector_length = 0; - else % master - collector_length = length(this.walkers)*io.cycle; - - walker_cycle = Array_Type[collector_length]; - update_cycle = Int_Type[collector_length]; - stat_cycle = Double_Type[collector_length]; - - if (NULL == urand || NULL == upick) - throw InternalError, "Missing random number generator"; - - % evaluate the model at the walker positions to get the statistics - % and if not continuing a chain, write them out - ifnot (node) { - _for j (0, length(this.walkers)-1, 1) { - this.stat[j] = this.fit.eval_statistic(this.walkers[j]); - this.update[j] = 1; - } - if (cont == NULL) - io.__f_initwrite(this.fit, this.walkers, this.update, this.stat); - } - - % the main loop where the magic happens - variable s, walker, update, stat, set, set_len, len_offset; - _for s (0, steps-1, 1) { - cycle_step = s mod io.cycle; - _for set (1, 2, 1) { - this.set = set; - - release_walkers_mpi(node, - num_nodes, - this; upick=upick, urand=urand); % release walkers to freedom ... - - if (1 == set) { - set_len = this.set1_per_node[node]; - len_offset = 0; - } else if (2 == set) { - set_len = this.set2_per_node[node]; - len_offset = this.set1_per_node[node]; + variable z; + variable proposed; + variable newStat; + variable startIndex = engine.setOffset; + variable setLength = engine.setLength; + + _for j (startIndex, startIndex+setLength-1) { + z = stretchInverseCDF(engine.rolls[j*step.numberRandoms], step.scale); + proposed = engine.pivots[j] + z*(engine.walkers[j]-engine.pivots[j]); + + engine.update[j] = 0; + + try { + newStat = engine.fit.eval_statistic(proposed; nocopy); + + % accept or reject dimansionally normalized. Assuming statistic is -2 log likelihood + if (log(engine.rolls[j*step.numberRandoms+1]) + <= (log(z)*(engine.fit.num_vary-1)+(engine.stat[j]-newStat)/2.)) { + engine.stat[j] = newStat; + engine.walkers[j][*] = proposed; + engine.update[j] = 1; } + } catch IsisError; + } +} +%}}} +%}}} +private define emceeStepStretch () %{{{ +{ + variable step = struct { @EmceeStep, scale }; + step.move = &emceeStepStretchMove; + step.numberRandoms = 2; + step.scale = qualifier("scale", 2); - _for j (0, set_len-1, 1) { % ... let them move ... - (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], - this.pivots[j+len_offset], - this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], - this.stat[j+len_offset]); - this.walkers[j+len_offset] = walker; - this.update[j+len_offset] = update; - this.stat[j+len_offset] = stat; - } + return step; +} +%}}} +EmceeStepRegister["stretch"] = &emceeStepStretch; +%}}} + +%%% emcee call +private define emceeOption (str) %{{{ +{ + variable s = strchop(str, ';', 0); + return strtrim(s[0]), length(s)>1 ? eval(sprintf("struct {%s}", s[1])) : NULL; +} +%}}} + +private define emceeLoop (ship, step, output) %{{{ +{ + variable engine = ship.engine; + variable s, j, set, cycle, leader, size; + + variable k; + if (engine.id==0) { + _for k (0, length(engine.walkers)-1) + writecol(stdout, Int_Type[engine.numberParameters]-1, + Int_Type[engine.numberParameters]+k, + engine.walkers[k]); + vmessage(""); + } + + _for s (0, engine.numberSteps-1) { + _for set (1, 2) { + emceeDrawSet(engine, set); + + if (0 == engine.id) + ship.leaderSend(); + else + ship.memberReceive(); - catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! + step.move(engine); + + if (0 == engine.id) + ship.leaderReceive(); + else + ship.memberSend(); + } + + _for k (0, engine.numberWalkers-1) { + writecol(stdout, Int_Type[engine.numberParameters]+engine.id, + Int_Type[engine.numberParameters]+k, + engine.walkers[k]); } - % if cycle end is reached write the chain - ifnot (node) { % master only - if (not cycle_step && s > 0) - io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); - _for j (0, length(this.walkers)-1, 1) { - walker_cycle[j+cycle_step*length(this.walkers)] = @(this.walkers[j]); - update_cycle[j+cycle_step*length(this.walkers)] = this.update[j]; - stat_cycle[j+cycle_step*length(this.walkers)] = this.stat[j]; + if (engine.id==0) { + _for k (0, length(engine.walkers)-1) + writecol(stdout, Int_Type[engine.numberParameters]-1, + Int_Type[engine.numberParameters]+k, + engine.walkers[k]); + vmessage(""); + } + + if (0 == engine.id) { + leader = engine.leader; + size = leader.writeBuffer.size; + cycle = s mod leader.writeBuffer.cycle; + % write to buffer + _for j (0, engine.totalNumberWalkers-1) { + leader.writeBuffer.walkers[j+cycle*engine.totalNumberWalkers][*] = @(engine.walkers[j]); + leader.writeBuffer.stat[j+cycle*engine.totalNumberWalkers] = engine.stat[j]; + leader.writeBuffer.update[j+cycle*engine.totalNumberWalkers] = engine.update[j]; } + + if (cycle == (leader.writeBuffer.cycle-1)) + output.write(engine, size); } } - % we might have unwritten steps left, so better write them here - cycle_step++; % the last step is never written in the loop, we have to deal with it here - ifnot (node) { - io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], - update_cycle[[:cycle_step*length(this.walkers)-1]], - stat_cycle[[:cycle_step*length(this.walkers)-1]]); + % write remaining steps + if (0 == engine.id) { + if (cycle < (leader.writeBuffer.cycle-1)) + output.write(engine, (cycle+1)*engine.totalNumberWalkers); + output.close(engine); } +} +%}}} - % and finally call the finalizing function - ifnot (node) { % master only - io.__f_finalize(steps, walker_per_par, number_par, this.fit); - io.__f_close(); +private define emceeSetup (ship, steps, options) %{{{ +{ + variable leader, size, engine; + variable j, set; + + engine = ship.engine; + if (0 == engine.id) { + emceeSetupLeader(engine, options.input, options.output); + options.init.pick(engine); } -} -%}}}% -define emcee_new (walkers_per_par, steps) { - variable qs = struct { - move = "stretch", % defined move - urand = &rand_uniform, % double random generator - upick = &rand_int, % int random generator - init = "uniform", % initialization function - load = NULL, % initialize from file - read = "fits", % specifier for read - write = "fits", % specifier for write - output = strftime("%Y%m%d-%H%M%S_mcmc_chain.fits"), % output file - continue = NULL, % continue file - }; + % set walkers and eval once + _for set (1, 2) { + emceeDrawSet(engine, set); - if (NULL == get_fit_fun()) - throw UsageError, "No fit function loaded"; + if (0 == engine.id) + ship.leaderSend(); + else + ship.memberReceive(); + } - if (NULL == all_data()) - throw UsageError, "No data set loaded"; + _for j (0, length(engine.walkers)-1) + engine.stat[j] = engine.fit.eval_statistic(engine.walkers[j]; nocopy); - ifnot (0 ((1<<29)-1)) - throw UsageError, "Unable to create ensemble for this large number of walkers"; + % write initial walkers to buffer + _for j (0, length(engine.walkers)-1) { + leader.writeBuffer.walkers[j] = @(engine.walkers[j]); + leader.writeBuffer.stat[j] = engine.stat[j]; + leader.writeBuffer.update[j] = 1; + } - emcee_mpi(walkers_per_par, num_free_params(), steps;; struct { @qs, @__qualifiers() }); -#ifexists rcl_mpi_init -% rcl_mpi_finalize(); -#endif + % if we create new file write initial walkers to it + ifnot (options.continue) + options.output.write(engine, engine.totalNumberWalkers); + } +} +%}}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +define emcee_hammer (steps) +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%!%+ +%\function{emcee_hammer} +%\synopsis{Explore parameter space with MCMC method} +%\usage{emcee_hammer (Int_Type);} +%#c%{{{ +%\qualifiers{ +% Basic +% \qualifier{walkers}{[=10]: Number of walkers per parameter} +% \qualifier{continue}{: If given (and possible set to a file) continue chain from this file} +% \qualifier{infile}{: Set the input file name for reading and continuing} +% \qualifier{outfile}{: Set the output file name} +% Advanced +% \qualifier{init}{[="uniform" or "file"]: The walker initialization method} +% \qualifier{driver}{[="mpi"]: The parallelization method} +% \qualifier{step}{[="stretch"]: The walker step algorithm} +% \qualifier{input}{[="fits"]: The file reading method} +% \qualifier{output}{[="fits"]: The file writing method} +% \qualifier{urand}{[=&rand_uniform]: PRNG for uniform numbers (Double_Type[] = urand(Int_Type))} +% \qualifier{upick}{[=&rand_int]: PRNG to chose complement walker (Int_Type[] = upick(Int_Type, Int_Type, Int_Type))} +%} +% +%\description +% The MCMC parameter space exploration algorithm as described by +% Foreman-Mackey et al. The function expects that data and a model is loaded. +% The only input parameter gives the number of iterations the algorithm +% performs. The resulting walker positions are written to a file which can +% be set with the "outfile" qualifier. +% +% The function allows to choose other algorithms for the step proposition, +% the read and write routines and how the walker ensamble is initialized. +% To get more information about the methods read 'help emcee_'. +% +% Per default a new chain is started when the function is called. To continue +% a chain use the "continue" qualifier. +% +%\seealso{emcee--init, emcee--step, emcee--driver, emcee--input, emcee--output} +%!%- +{ + % options + variable oContinue = qualifier("continue"); + variable oInfile = qualifier("infile", oContinue); + variable oOutfile = qualifier("outfile", oContinue); + + % advanced options + variable initHandle, initOption; + if (NULL != oInfile) + (initHandle, initOption) = emceeOption(qualifier("init", "file")); + else + (initHandle, initOption) = emceeOption(qualifier("init", "uniform")); + + variable shipHandle, shipOption; + (shipHandle, shipOption) = emceeOption(qualifier("driver", "mpi")); + + variable stepHandle, stepOption; + (stepHandle, stepOption) = emceeOption(qualifier("step", "stretch")); + + variable inputHandle, inputOption; + (inputHandle, inputOption) = emceeOption(qualifier("input", "fits")); + if (NULL != oInfile) inputOption = struct { @inputOption, filename=oInfile }; + + variable outputHandle, outputOption; + (outputHandle, outputOption) = emceeOption(qualifier("output", "fits")); + if (NULL != oOutfile) outputOption = struct { @outputOption, filename=oOutfile }; + + variable totalNumberWalkers = qualifier("walkers", 10)*num_free_params(); + variable ship = @(EmceeShipRegister[shipHandle])(;;shipOption); + emceeSetupEngine(ship, totalNumberWalkers, steps); + + variable options = struct { + init = @(EmceeInitRegister[initHandle])(;;initOption), + step = @(EmceeStepRegister[stepHandle])(;;stepOption), + output = NULL, + input = NULL, + urand = qualifier("urand", &rand_uniform), + upick = qualifier("upick", &rand_int), + continue = qualifier_exists("continue"), + }; + emceeSetupGears(ship.engine, options.urand, options.upick, options.step); + + if (0 == ship.engine.id) { + options.input = @(EmceeFileRegister[inputHandle])(;;inputOption); + options.output = @(EmceeFileRegister[outputHandle])(;;outputOption); + } + + emceeSetup(ship, steps, options); + + emceeLoop(ship, options.step, options.output); + + ship.enterHarbor(); } +%}}} -- GitLab From 926b6783c797237f82d1a77afa90b1bd84b084de Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 22 Feb 2021 11:22:37 +0100 Subject: [PATCH 36/89] Communication is working Uniform init working, others need testing. Things to change: Add function initialization method, add approx_inv initialization from valid chain file. Maybe change fork sockets to non-blocking. --- src/fitting/ensemble-samplers/emcee.sl | 79 +++++++++++++++----------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 42acc4a4..48117f64 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -3,6 +3,7 @@ require("rand"); require("fork"); require("socket"); +%require("select"); % Implementation of the emcee hammer () with the principle idea % that multiple nodes (engines) are responsible for a part of the @@ -67,7 +68,7 @@ private variable EmceeLeader = struct { walkersPerSet1, % number of walkers for each engine id in set 1 walkersPerSet2, % number of walkers for each engine id in set 2 walkersPerSet, % selected set walkers - totalOffset, % start of ALL walkers in set + totalOffset, % start of set in walker array writeBuffer, % total write buffer array inFile, % input file handle outFile, % output file handle @@ -92,22 +93,23 @@ private define emceeDrawSet (engine, set) %{{{ if (1 == set) { totalNumberSet = engine.totalNumberSet1; totalNumberComplement = engine.totalNumberSet2; - totalOffset = 0; engine.setOffset = 0; engine.setLength = engine.numberWalkersSet1; + totalOffset = 0; if (0 == engine.id) { - engine.leader.walkersPerSet = engine.leader.walkersPerSet1; engine.leader.totalOffset = totalOffset; + engine.leader.walkersPerSet = engine.leader.walkersPerSet1; } } else if (2 == set) { totalNumberSet = engine.totalNumberSet2; totalNumberComplement = engine.totalNumberSet1; - totalOffset = engine.totalNumberSet1; engine.setOffset = engine.numberWalkersSet1; engine.setLength = engine.numberWalkersSet2; + totalOffset = engine.totalNumberSet1; if (0 == engine.id) { - engine.leader.walkersPerSet = engine.leader.walkersPerSet2; engine.leader.totalOffset = totalOffset; + engine.leader.walkersPerSet = engine.leader.walkersPerSet2; + engine.setOffset = totalOffset; } } @@ -382,6 +384,12 @@ EmceeInitRegister["file"] = &emceeInitFile; % "method;parameter" % % Available methods: +% serial : The serial driver. No parallelization at all +% +% fork : The fork (& socket) parallel driver. Per default uses +% _num_cpus many tasks. +% ; tasks : [=_num_cpus] Number of total processes used +% % mpi : The mpi parallel driver using as many nodes as registered % in an mpi environment %!%- @@ -404,6 +412,35 @@ private variable EmceeShip = struct { % private data }; +%{{{ serial ship functions +private define void () %{{{ +{ + variable args = __pop_list(_NARGS); +} +%}}} + +private define emceeSerialInit (ship) %{{{ +{ + ship.engine.numberEngines = 1; + ship.engine.id = 1; +} +%}}} +%}}} +private define emceeShipSerial () %{{{ +{ + variable ship = struct { @EmceeShip }; + ship.setSail = &emceeSerialInit; + ship.leaderSend = &void; + ship.memberSend = &void; + ship.leaderReceive = &void; + ship.memberReceive = &void; + ship.enterHarbor = &void; + + return ship; +} +%}}} +EmceeShipRegister["serial"] = &emceeShipSerial; + %{{{ fork ship functions private define elementType (t) %{{{ { @@ -471,7 +508,7 @@ private define emceeForkSetSail (ship) %{{{ } else { % set master pipes if (NULL == ship.socket) ship.socket = FD_Type[ship.tasks]; - flags = fcntl_getfd(sockWrite); +% flags = fcntl_getfd(sockWrite); % fcntl_setfd(sockWrite, flags | O_NONBLOCK); ship.socket[cid] = sockWrite; close(sockRead); @@ -557,7 +594,7 @@ private define emceeForkLeaderReceive (ship) %{{{ engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; - firstIndex += walkersPerSet[0]; + firstIndex += walkersPerSet[i]; } } %}}} @@ -682,8 +719,6 @@ private define emceeMPILeaderReceive (ship) %{{{ _for j (0, walkersPerSet[i]-1, 1) { () = rcl_mpi_org_recv_double(walker, length(walker), i, i); engine.walkers[j+firstIndex+totalOffset][*] = walker; -% vmessage("recv %d: (%g, %g)", j+firstIndex+totalOffset, -% engine.walkers[j+firstIndex+totalOffset][0], engine.walkers[j+firstIndex+totalOffset][1]); } () = rcl_mpi_org_recv_int(update, length(update), i, i); @@ -707,7 +742,6 @@ private define emceeMPIMemberSend (ship) %{{{ variable i; _for i (0, setLength-1, 1) { -% vmessage("send: (%g, %g)", engine.walkers[i+setOffset][0], engine.walkers[i+setOffset][1]); () = rcl_mpi_org_isend_double(engine.walkers[i+setOffset], length(engine.walkers[0]), 0, engine.id); } @@ -1207,15 +1241,6 @@ private define emceeLoop (ship, step, output) %{{{ variable engine = ship.engine; variable s, j, set, cycle, leader, size; - variable k; - if (engine.id==0) { - _for k (0, length(engine.walkers)-1) - writecol(stdout, Int_Type[engine.numberParameters]-1, - Int_Type[engine.numberParameters]+k, - engine.walkers[k]); - vmessage(""); - } - _for s (0, engine.numberSteps-1) { _for set (1, 2) { emceeDrawSet(engine, set); @@ -1233,20 +1258,6 @@ private define emceeLoop (ship, step, output) %{{{ ship.memberSend(); } - _for k (0, engine.numberWalkers-1) { - writecol(stdout, Int_Type[engine.numberParameters]+engine.id, - Int_Type[engine.numberParameters]+k, - engine.walkers[k]); - } - - if (engine.id==0) { - _for k (0, length(engine.walkers)-1) - writecol(stdout, Int_Type[engine.numberParameters]-1, - Int_Type[engine.numberParameters]+k, - engine.walkers[k]); - vmessage(""); - } - if (0 == engine.id) { leader = engine.leader; size = leader.writeBuffer.size; @@ -1372,7 +1383,7 @@ define emcee_hammer (steps) (initHandle, initOption) = emceeOption(qualifier("init", "uniform")); variable shipHandle, shipOption; - (shipHandle, shipOption) = emceeOption(qualifier("driver", "mpi")); + (shipHandle, shipOption) = emceeOption(qualifier("driver", "serial")); variable stepHandle, stepOption; (stepHandle, stepOption) = emceeOption(qualifier("step", "stretch")); -- GitLab From 1647e13029959ec535b3e4924af1dfaf70147c54 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 22 Feb 2021 12:32:10 +0100 Subject: [PATCH 37/89] Serial driver was setting ID wrong There was no node responsible for writing... --- src/fitting/ensemble-samplers/emcee.sl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 48117f64..be2acd26 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -422,7 +422,7 @@ private define void () %{{{ private define emceeSerialInit (ship) %{{{ { ship.engine.numberEngines = 1; - ship.engine.id = 1; + ship.engine.id = 0; } %}}} %}}} -- GitLab From 278797635eaf9348e039efac2a109d123213f237 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 23 Feb 2021 15:15:17 +0100 Subject: [PATCH 38/89] Emcee communication seems to work Added init methods for files Checked communication (hopefully) Remaining tasks: Check performance, add function initialization (how?) --- src/fitting/ensemble-samplers/emcee.sl | 87 +++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 9 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index be2acd26..77b075a0 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -241,6 +241,8 @@ private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ %\description % The initialization method can be set with the function string % "method;parameter" +% Initialization methods that read from file use the defined input +% method (default: fits). % % Available methods: % uniform : Draw initial walker positions from a uniform distribution @@ -253,15 +255,18 @@ private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ % % file : Load initial walkers from a valid chain file created by the % emcee method -% ; filename : The file to load +% +% chain : Draw initial walkers from an approximated CDF of an existing +% chain file. +% ; steps : [=10] The number of steps to concider for constructing the CDF +% (from the end of the chain) +% ; rng : [=&rand_uniform] uniform random number generator % %!%- % 1: pick - get walkers from parameters and distribution or file private variable EmceeInit = struct { pick, % function - filename, % the filename (if any) - % private data }; @@ -286,7 +291,6 @@ private define emceeInitUniform () %{{{ { variable init = struct { @EmceeInit }; init.pick = &emceeInitUniformPick; - init.filename = NULL; return init; } @@ -323,7 +327,6 @@ private define emceeInitGauss () %{{{ { variable init = struct { @EmceeInit, sigma }; init.pick = &emceeInitGaussPick; - init.filename = NULL; init.sigma = qualifier("sigma", 10); return init; @@ -331,7 +334,7 @@ private define emceeInitGauss () %{{{ %}}} EmceeInitRegister["gauss"] = &emceeInitGauss; -%{{{ file initialization function +%{{{ File initialization function private define fisher_yates (a, n) %{{{ { if (n>length(a)) @@ -350,6 +353,7 @@ private define emceeInitFilePick (init, engine) %{{{ variable file = engine.leader.inFile; variable walkerDistribution, readNumber; (walkerDistribution, readNumber) = file.read(engine, engine.totalNumberWalkers); + variable i,j; % randomize (and bootstrap if necessary) variable randomize = fisher_yates(walkerDistribution, readNumber); @@ -366,13 +370,77 @@ private define emceeInitFile () %{{{ { variable init = struct { @EmceeInit }; init.pick = &emceeInitFilePick; - init.filename = qualifier("filename"); return init; } %}}} EmceeInitRegister["file"] = &emceeInitFile; + +%{{{ Chain initialization function +define empiric_cdf_inverse (p, a, amin, amax) %{{{ +{ + variable s = array_sort(p); + + if (p[s][0]<0 || p[s][-1]>=1) + throw DomainError, "not in range 0<=p<1"; + + a = a[array_sort(a)]; + a = a[where(amin<=a<=amax)]; % restrict to cdf in range + + variable u = unique(a); + variable ecdf = [u/1./length(a), 1.]; + variable lo = [amin, a[u]]; + variable hi = [a[u], amax]; + + variable r = Double_Type[length(p)]; + variable k, i = 0; + variable m = .5*([(ecdf[[1:]]-ecdf[[:-2]])/(lo[[1:]]-lo[[:-2]]), 0.] + +[0., (ecdf[[1:]]-ecdf[[:-2]])/(hi[[1:]]-hi[[:-2]])]); + + variable hitsmin = (amin == a[0]); % gives NaN if true and p == 0 + _for k (0, length(p)-1) { + while (p[s[k]] > ecdf[i+1]) i++; + if (hitsmin && p[s[k]]==0) + r[s[k]] = amin; + else + r[s[k]] = (p[s[k]]-ecdf[i])/m[i]+lo[i]; + } + + return r; +} +%}}} + +private define emceeInitChainPick (init, engine) %{{{ +{ + variable file = engine.leader.inFile; + variable walkerDistribution, numberSteps; + (walkerDistribution, numberSteps) = file.read(engine, init.steps); + variable par = get_params(); + + variable parRand; + variable i,j; + _for i (0, engine.numberParameters-1) { + parRand = empiric_cdf_inverse(@(init.rng)(engine.totalNumberWalkers), + walkerDistribution[i], + par[i].min, + par[i].max); + _for j (0, engine.totalNumberWalkers-1) + engine.walkers[j][i] = parRand[j]; + } +} +%}}} +%}}} +private define emceeInitChain () %{{{ +{ + variable init = struct { @EmceeInit, rng, steps }; + init.pick = &emceeInitChainPick; + init.rng = qualifier("rng", &rand_uniform); + init.steps = qualifier("steps", 10); + + return init; +} %}}} +EmceeInitRegister["chain"] = &emceeInitChain; %{{{ Ship interface: %!%+ @@ -1062,7 +1130,7 @@ private define emceeFileFitsRead (file, engine, numberWalkers) %{{{ variable totalNumberRecords = fits_get_num_rows(file.handle); variable walkerDistribution; % this has to be an array of arrays with the parameter distribution in each () = _fits_read_cols(file.handle, - [2:numberParameters], + [3:numberParameters], max([0, totalNumberRecords-numberWalkers]), numberWalkers, &walkerDistribution); @@ -1340,7 +1408,8 @@ define emcee_hammer (steps) %\qualifiers{ % Basic % \qualifier{walkers}{[=10]: Number of walkers per parameter} -% \qualifier{continue}{: If given (and possible set to a file) continue chain from this file} +% \qualifier{continue}{: If given (and possible set to a file) continue chain from this file +% (using init="file", file="fits" per default)} % \qualifier{infile}{: Set the input file name for reading and continuing} % \qualifier{outfile}{: Set the output file name} % Advanced -- GitLab From 4b2f50af91c092f5ddc00ccb70a7ee560b1d4c09 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 24 Feb 2021 00:36:02 +0100 Subject: [PATCH 39/89] Fix fork driver issue close return value not catched, and setup was not done correctly --- src/fitting/ensemble-samplers/emcee.sl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 77b075a0..56a8c9cb 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -441,6 +441,7 @@ private define emceeInitChain () %{{{ } %}}} EmceeInitRegister["chain"] = &emceeInitChain; +%}}} %{{{ Ship interface: %!%+ @@ -564,13 +565,14 @@ private define emceeForkSetSail (ship) %{{{ variable pid, cid=0, t; variable flags; - _for cid (1, ship.tasks-1) { + _for t (1, ship.tasks-1) { (sockRead, sockWrite) = socketpair(AF_UNIX, SOCK_STREAM, 0); + cid++; pid = fork(); if (pid == -1) throw InternalError, sprintf("Unable to fork process %d", cid); else if (pid == 0) { - close(sockWrite); + () = close(sockWrite); ship.socket = sockRead; break; } else { % set master pipes @@ -579,7 +581,7 @@ private define emceeForkSetSail (ship) %{{{ % flags = fcntl_getfd(sockWrite); % fcntl_setfd(sockWrite, flags | O_NONBLOCK); ship.socket[cid] = sockWrite; - close(sockRead); + () = close(sockRead); } } @@ -697,9 +699,9 @@ private define emceeForkEnterHarbor (ship) %{{{ variable id; if (ship.engine.id == 0) { _for id (1, ship.engine.numberEngines-1) - close(ship.socket[id]); + () = close(ship.socket[id]); } else { - close(ship.socket); + () = close(ship.socket); exit(); } } @@ -1377,7 +1379,7 @@ private define emceeSetup (ship, steps, options) %{{{ if (0 == ship.engine.id) { if (options.continue) - options.output.open(engine); + options.output.open(engine); else options.output.create(engine); -- GitLab From 6efc6a7c5c6a37126679fcd01ce04bd7bc5f2525 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Fri, 8 Nov 2019 19:39:43 +0100 Subject: [PATCH 40/89] Revamp emcee code due to bugs Current state has bugs which are not easy to debug due to complex code structure. Renew with clean structure, more comments, etc. --- src/fitting/ensemble-samplers/emcee.sl | 95 ++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/fitting/ensemble-samplers/emcee.sl diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl new file mode 100644 index 00000000..04c182be --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -0,0 +1,95 @@ + + +%%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY %{{{% + +% define inverse cumulative distribution function for generating +% random numbers following 1/z^2 when z in [1/a, a] +% TODO: should make this an adjustable thing +private define inverse_cdf (u, a) { + return (u*(a-1)+1)^2/a; +} + +% stretch move as of Goodman & Weare 2010 +% Move must evaluate the functions +private define stretch_move (fit_object, x, x_j, u, a) { + variable z = inverse_cdf(u[0], a); + variable xstat = 1e32, ystat = -1e32; % from mike, why doing it that way? + variable y; % step proposition + variable x_t1 = x; % resulting step + + % evaluate fit function for 'x' + xstat = fit_object.eval_statistic(x;nocopy); + + % calculate the new position (utilize array operations) + y = x_j + z*(x-x_j); + + % try evaluating, if out of bounds, does nothing + try { + % evaluate fit function for 'y' + ystat = fit_object.eval_statistic(y;nocopy); + + % caluculate if we accept the step based on the statistics of the + % model. We assume that the statistic is given as -2 log likelihood + if(u[1] <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) + x_t1 = y; + } catch IsisError; + + % return new walker position, proposed position, xstat, ystat + return (x_t1, y, xstat, ystat); +} + +private define get_move_args (m) { + return m.args; +} + +private define set_move_args () { + variable args, m; + if (_NARGS>2) { + args = __pop_list(_NARGS-1); + m = (); + } else + (m,args) = (); + if (length(args) != m.nargs) + throw UsageError: sprintf("%s expects %d arguments, set it with .set_move_args(...)!", m.name, m.nargs); + m.args = args; +} + +variable STRETCH_MOVE = struct { + name = "stretch move", + move = &stretch_move, % the function + nrands = 2, % the randoms needed + args = {2}, % additional arguments + nargs = 1; % number additional arguments + get = &get_move_args, + set = &set_move_args, +} + +%}}}% + +% calculate the move for the fit 'fit_object' based on the current walker position x, +% a randomly choosen walker x_j and an array of uniform random numbers enough to +% calculate the next step. move is the struct encapsulating the move function +private define __move (fit_object, x, x_j, u, move) { + % for alternative step functions, must be symmetric (that is, + % Pr(x -> y) = Pr(y -> x)) otherwise no detailed balance + + % push standard arguments & arguments for specified move + return move(__push_list(list_concat({fit_object, x, x_j, u}, move.get))); +} + +% start emcee walkers +% nwalkers is number walkers per parameter, nsteps is number of iterations +define emcee (nwalkers, nsteps) { + % setup user interaction + variable urand = qualifier("urand", &random_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) + variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps + variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps + + if (get_fit_fun() == NULL) + throw UsageError: "No fit function defined"; + if (all_data() == NULL) + throw UsageError: "No data is loaded"; + + % setup walkers + variable total_walkers = num_free_params()*nwalkers; + variable urands = \ No newline at end of file -- GitLab From ffbcf034655307b37ec3b34fbf56858a78746679 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 9 Nov 2019 01:47:36 +0100 Subject: [PATCH 41/89] More of new emcee --- src/fitting/ensemble-samplers/emcee.sl | 71 ++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 04c182be..94645397 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -75,21 +75,84 @@ private define __move (fit_object, x, x_j, u, move) { % push standard arguments & arguments for specified move return move(__push_list(list_concat({fit_object, x, x_j, u}, move.get))); +} + +% get all free param values, index and min max +private define free_par_sets () { + variable all = get_params(); + variable i, ind = {}, v = {}, mi = {}, ma = {}; + _for i (0, length(all)-1, 1) { + ifnot (all[i].freeze==0 && all[i].tie==NULL && all[i].fun==NULL) + continue; + list_append(ind, all[i].index); + list_append(v, all[i].value); + list_append(mi, all[i].min); + list_append(ma, all[i].ma); + } + + return list_to_array(ind), list_to_array(v), list_to_array(mi), list_to_array(ma); } +% pick random parameter values within the boundaries +private define init_parameter_walker_uniform (n) { + variable walkers = Array_Type[n]; + variable i; + variable ind, p, pmin, pmax; + (ind, p, pmin, pmax) = free_par_sets(); + variable num_p = length(ind); + + _for i (0, n-1, 1) + walkers[i] = rand_uniform(num_p)*(p_max-p_min)+p_min; + + return walkers; +} + +% get walkers per node +private define distribute_walkers (nodes, number_walkers) { + variable walkers_per_node = Int_Type[nodes]; + variable n = number_walkers/nodes + 1; + variable missing = nodes - (number_walkers mod nodes); + + walkers_per_node[[0:nodes-missing-1]] = n; + walkers_per_node[[nodes-missing:nodes-1]] = n-1; + return walkers_per_node; +} + +% this function is the core and may be evaluated by multiple tasks +private define iterate_emcee (walkers, set1, set2, u) { + + % start emcee walkers % nwalkers is number walkers per parameter, nsteps is number of iterations -define emcee (nwalkers, nsteps) { +public define emcee (nwalkers, nsteps) { % setup user interaction variable urand = qualifier("urand", &random_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps + variable cycle = qualifier("cycle", 50); % number of evaluations per cycle (recalculates some numbers after each cycle) + variable init = qualifier("init", &init_parameter_walker_uniform); % init function, takes number of total walkers and returns initialized array of parameter arrays if (get_fit_fun() == NULL) throw UsageError: "No fit function defined"; if (all_data() == NULL) throw UsageError: "No data is loaded"; - + % setup walkers - variable total_walkers = num_free_params()*nwalkers; - variable urands = \ No newline at end of file + variable total_walkers = int(num_free_params()*nwalkers); + variable u = urand(cycle*total_walkers*move.nrands); + % to ensure independence between walker ensembles, roll dice + % and pick sets accordingly + variable sort = array_sort(urand(total_walkers)); + variable set1 = sort[0:total_walkers:2]; + variable set2 = sort[1:total_walkers:2]; + variable len_set1 = length(set1); + variable len_set2 = length(set2); + variable walkers = init(total_walkers); + + % setup for parallelization + variable nodes = 1; % number of parallel jobs + variable walkers_per_node = distribute_walkers(nodes, total_walkers); + + % loop over the walkers and update them + variable + \ No newline at end of file -- GitLab From b6ecd905b354d717787a52610c7309a480b7d165 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 9 Nov 2019 16:27:17 +0100 Subject: [PATCH 42/89] More of emcee_new --- src/fitting/ensemble-samplers/emcee.sl | 42 +++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 94645397..c600de75 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -13,7 +13,7 @@ private define inverse_cdf (u, a) { % Move must evaluate the functions private define stretch_move (fit_object, x, x_j, u, a) { variable z = inverse_cdf(u[0], a); - variable xstat = 1e32, ystat = -1e32; % from mike, why doing it that way? + variable xstat = 1e32, ystat = -1e32; % from mikes code variable y; % step proposition variable x_t1 = x; % resulting step @@ -57,7 +57,7 @@ private define set_move_args () { variable STRETCH_MOVE = struct { name = "stretch move", move = &stretch_move, % the function - nrands = 2, % the randoms needed + nrands = 3, % the randoms needed args = {2}, % additional arguments nargs = 1; % number additional arguments get = &get_move_args, @@ -118,9 +118,29 @@ private define distribute_walkers (nodes, number_walkers) { return walkers_per_node; } +% get walkers per handled up to this node +private define previous_number_walkers (walkers_per_node) { + variable l = length(walkers_per_node); + variable handled_walkers = Int_Type[l]; + variable i, c = 0; + + _for i (0, l-1, 1) { + handled_walkers[i] = c; + if (i Date: Sat, 9 Nov 2019 22:30:50 +0100 Subject: [PATCH 43/89] Fix some bugs in emcee_new --- src/fitting/ensemble-samplers/emcee.sl | 57 +++++++------------------- 1 file changed, 14 insertions(+), 43 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index c600de75..799f79c9 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1,4 +1,4 @@ - +require("rand"); %%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY %{{{% @@ -50,7 +50,7 @@ private define set_move_args () { } else (m,args) = (); if (length(args) != m.nargs) - throw UsageError: sprintf("%s expects %d arguments, set it with .set_move_args(...)!", m.name, m.nargs); + throw UsageError, sprintf("%s expects %d arguments, set it with .set_move_args(...)!", m.name, m.nargs); m.args = args; } @@ -59,10 +59,10 @@ variable STRETCH_MOVE = struct { move = &stretch_move, % the function nrands = 3, % the randoms needed args = {2}, % additional arguments - nargs = 1; % number additional arguments + nargs = 1, % number additional arguments get = &get_move_args, set = &set_move_args, -} +}; %}}}% @@ -144,47 +144,18 @@ private define iterate_emcee (walkers, set1, set2, u) {} % start emcee walkers % nwalkers is number walkers per parameter, nsteps is number of iterations -public define emcee (nwalkers, nsteps) { +define emcee_new (nwalkers, nsteps) { % setup user interaction - variable urand = qualifier("urand", &random_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) - variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps - variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps - variable cycle = qualifier("cycle", 50); % number of evaluations per cycle (recalculates some numbers after each cycle) - variable init = qualifier("init", &init_parameter_walker_uniform); % init function, takes number of total walkers and returns initialized array of parameter arrays + variable urand = qualifier("urand", &rand_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) + variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps + variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps if (get_fit_fun() == NULL) - throw UsageError: "No fit function defined"; + throw UsageError, "No fit function defined"; if (all_data() == NULL) - throw UsageError: "No data is loaded"; - + throw UsageError, "No data is loaded"; + % setup walkers - variable total_walkers = int(num_free_params()*nwalkers); - variable u = urand(cycle*total_walkers*move.nrands); - % to ensure independence between walker ensembles, roll dice - % and pick sets accordingly - variable sort = array_sort(urand(total_walkers)); - variable set1 = sort[0:total_walkers:2]; - variable set2 = sort[1:total_walkers:2]; - variable len_set1 = length(set1); - variable len_set2 = length(set2); - variable walkers = init(total_walkers); - variable u_set1 = rand_int(0, len_set1-1, cycle*total_walkers); - variable u_set2 = rand_int(0, len_set2-1, cycle*total_walkers); - - % setup for parallelization - variable nodes = 1; % number of parallel jobs - variable walkers_per_node = distribute_walkers(nodes, total_walkers); - variable handled_walkers = previous_number_walkers (walkers_per_node); - - % set the local variables for each node - variable node_set; % index of walkers the node handles - variable node_u; % random numbers used by the node - variable node_u_set1; % random number to select from set 1 - variable node_u_set2; % random number to select from set 2 - - % loop over the walkers and update them - variable c, steps = 0; - while (steps < nsteps) { - _for c (0, cycle-1, 1) { - - \ No newline at end of file + variable total_walkers = num_free_params()*nwalkers; + variable urands ; +} -- GitLab From 66e8659957fb0232a3bccfceb085cf35de22e23e Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 13 Nov 2019 16:22:45 +0100 Subject: [PATCH 44/89] Reimplement emcee_hammer New implementation is much more compact and hopefully simple to understand. Plus, it utilizes direct calls to the fit function instead of using eval_counts. This has the benefit of performance (at least the documentation claims this) and that the model evaluation takes the free parameters directly (so no calls to get_par or equiv. required). Currently the implementation uses the rcl-mpi module, which is not a clean implementation of the mpi routines. A future version should fix this if possible --- src/fitting/ensemble-samplers/emcee.sl | 171 +++++++++++++++++++++---- 1 file changed, 146 insertions(+), 25 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 799f79c9..316c5314 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -57,7 +57,7 @@ private define set_move_args () { variable STRETCH_MOVE = struct { name = "stretch move", move = &stretch_move, % the function - nrands = 3, % the randoms needed + nrands = 2, % the randoms needed args = {2}, % additional arguments nargs = 1, % number additional arguments get = &get_move_args, @@ -127,35 +127,156 @@ private define previous_number_walkers (walkers_per_node) { _for i (0, l-1, 1) { handled_walkers[i] = c; if (i>1; % number of walkers in set 2 + variable set1_len = total_walkers-set2_len; % number of walkers in set 1 + variable all_u = (@urand)(total_walkers*nrand); % all random numbers for the next step + variable all_pick1 = (@upick)(0, set2_len-1, set1_len); % pick for set 1 + variable all_pick2 = (@upick)(0, set1_len-1, set2_len); % pick for set 2 + variable this_walkers, this_pivots, this_randoms; + _for i (0, nodes-1, 1) { + this_walkers = node_walkers[[0:walkers_per_node[i]-1]+handled_walkers[i]]; + this_pivots = node_walkers[[all_pick1+set1_len, all_pick2]][[0:walkers_per_node[i]-1]+handled_walkers[i]]; + this_randoms = all_u[[0:walkers_per_node[i]*nrand-1]+handled_walkers[i]*nrand]; + if (i == 0) { % set master locals, we utilize that the walkers the master handles are the first in the array + (@node_pivots_ref) = this_pivots; + (@node_randoms_ref) = this_randoms; + } +#ifexists rcl_mpi_init + else { + _for j (0, length(this_walkers)-1, 1) { + () = rcl_mpi_org_isend_double(this_walkers[j], length(this_walkers[j]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j], length(this_walkers[j]), i, 1); % send pivots from other set with tag 1 + } + () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 3 + } + } + } else { + _for j (0, length(node_walkers)-1, 1) { + () = rcl_mpi_org_recv_double(node_walkers[j], length(node_walkers[j]), 0, 0); + () = rcl_mpi_org_recv_double(node_pivots[j], length(node_pivots[j]), 0, 1); + } + () = rcl_mpi_org_recv_double(node_randoms, length(node_randoms), 0, 2); +#endif + } +} + +private define catch_walkers_mpi (node, nodes, walkers_ref, + walkers_per_node, handled_walkers) { % <--- only relevant for master + variable walkers = @walkers_ref; + variable i,j; + variable npar = length(walkers[0]); % walkers are all equal + +#ifexists rcl_mpi_init + if (node == 0) { % master, collect all walkers + _for i (1, nodes-1, 1) { + _for j (0, walkers_per_node[i]-1, 1) + () = rcl_mpi_org_irecv_double(walkers[handled_walkers[i]+j], npar, i, i); + } + } else { + _for j (0, length(walkers)-1, 1) + () = rcl_mpi_org_isend_double(walkers[j], npar, 0, node); + } +#endif +} + +private define emcee_mpi (total_walkers, steps) { + variable init = qualifier("init", NULL); + variable move = qualifier("move", NULL); + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); +#ifexists rcl_mpi_init + variable node = rcl_mpi_init(); + variable nodes = rcl_mpi_numtasks(); + rcl_init_mpi_request(nodes); % this is a stupid memory leak!!!!!!!!! BUT: since we have a memory leak from the mpi module anyway we accept it currently ... +#else + variable node = 0; + variable nodes = 1; +#endif + if (NULL == init || NULL == move || NULL == urand || NULL == upick) + throw InternalError, "Initialization failed"; + + % master only variables + variable walkers_per_node; + variable handled_walkers; + variable sort; + + variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); + variable npar = num_free_params(); + variable node_walkers = Array_Type[node_walkers_len]; + variable node_pivots = Array_Type[node_walkers_len]; + variable node_randoms = Double_Type[node_walkers_len*move.nrands]; + variable new_pos, prop_pos, prev_stat, new_stat; + variable tmp; + + _for tmp (0, node_walkers_len-1, 1) { + node_walkers[tmp] = Double_Type[npar]; + node_pivots[tmp] = Double_Type[npar]; + } + + if (node == 0) { % setup master things + node_walkers = (@init)(total_walkers); % initialize the walkers + sort = array_sort(rand_uniform(total_walkers)); + node_walkers = node_walkers[sort]; % randomize them to be in the safe side, init may introduce bias + walkers_per_node = distribute_walkers(nodes, total_walkers); + handled_walkers = previous_number_walkers(walkers_per_node); + } else { + walkers_per_node = NULL; + handled_walkers = NULL; + } + + variable s; + variable fit_handle = open_fit(); % if no model is loaded this will crash + + _for s (0, steps-1, 1) { + if (node == 0) { _for tmp (0, total_walkers-1, 1) { () = printf("%d: ", s); print_array(node_walkers[tmp]);}} + release_walkers_mpi(node, nodes, total_walkers, move.nrands, walkers_per_node, handled_walkers, % release walkers to freedom ... + &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); + _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... + (new_pos, prop_pos, prev_stat, new_stat) = + __move(fit_handle, node_walkers[tmp], node_pivots[tmp], node_randoms[[0:move.nrands-1]+move.nrands*tmp], move); + node_walkers[tmp] = new_pos; + } +#ifexists rcl_mpi_init + catch_walkers_mpi(node, nodes, &node_walkers, walkers_per_node, handled_walkers); % ... and catch 'em! + () = rcl_mpi_barrier(); % keep the walkers in sync +#endif + } } -% this function is the core and may be evaluated by multiple tasks -private define iterate_emcee (walkers, set1, set2, u) {} - -% start emcee walkers -% nwalkers is number walkers per parameter, nsteps is number of iterations -define emcee_new (nwalkers, nsteps) { - % setup user interaction - variable urand = qualifier("urand", &rand_uniform); % uniform random number generator function (taking one argument, the number of urands to generate) - variable contin = qualifier("continue", NULL); % file that contains generated chain, start from last ensemble and append new steps - variable move = qualifier("move", STRETCH_MOVE); % move function type for individual steps - - if (get_fit_fun() == NULL) - throw UsageError, "No fit function defined"; - if (all_data() == NULL) - throw UsageError, "No data is loaded"; - - % setup walkers - variable total_walkers = num_free_params()*nwalkers; - variable urands ; +define emcee (walkers_per_par, steps) { + variable move = qualifier("move", STRETCH_MOVE); % defined move + variable urand = qualifier("urand", &rand_uniform); % double random generator + variable upick = qualifier("upick", &rand_int); % int random generator + variable init = qualifier("init", &init_parameter_walker_uniform); % initialization function + + variable total_walkers = num_free_params()*walkers_per_par; + if (total_walkers > ((1<<29)-1)) + throw UsageError, "Unable to create ensemble for this large number of walkers"; + + () = printf("--- START ---\n"); + emcee_mpi(total_walkers, steps; move=move, urand=urand, upick=upick, init=init); + () = printf("--- END ---\n"); + rcl_mpi_finalize(); } -- GitLab From 7096a8c90ec20869f576cfb0f07c2a182c5ac2ea Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 16 Nov 2019 10:17:22 +0100 Subject: [PATCH 45/89] MPI mcmc ensamble sempler New implementation of the emcee hammer. Should be simpler to change move algorithm and output routines. TODO: Help, load_qualifier --- src/fitting/ensemble-samplers/emcee.sl | 367 ++++++++++++++++++++++--- 1 file changed, 332 insertions(+), 35 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 316c5314..aaad0c67 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1,6 +1,9 @@ +% -*- mode: slang; mode: fold; -*- % + require("rand"); -%%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY %{{{% +%{{{% defined moves for the ensemble walkers +%%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY % define inverse cumulative distribution function for generating % random numbers following 1/z^2 when z in [1/a, a] @@ -54,7 +57,7 @@ private define set_move_args () { m.args = args; } -variable STRETCH_MOVE = struct { +variable EMCEE_STRETCH_MOVE = struct { name = "stretch move", move = &stretch_move, % the function nrands = 2, % the randoms needed @@ -64,8 +67,6 @@ variable STRETCH_MOVE = struct { set = &set_move_args, }; -%}}}% - % calculate the move for the fit 'fit_object' based on the current walker position x, % a randomly choosen walker x_j and an array of uniform random numbers enough to % calculate the next step. move is the struct encapsulating the move function @@ -77,6 +78,10 @@ private define __move (fit_object, x, x_j, u, move) { return move(__push_list(list_concat({fit_object, x, x_j, u}, move.get))); } +%}}}% + +%{{{% initialization of the parameters + % get all free param values, index and min max private define free_par_sets () { variable all = get_params(); @@ -107,6 +112,184 @@ private define init_parameter_walker_uniform (n) { return walkers; } +%}}}% + +%{{{% write and read functions +private define emcee_write_chain_fits_init (io, filename, total_walkers, create, sloppy) { + % write ensemble evolution to fits file +#ifexists rcl_mpi_init + variable modified_name = (io.all) ? sprintf("%s_%d%s", path_sans_extname(filename), rcl_mpi_rank(), path_extname(filename)) : filename; +#else + variable modified_name = filename; +#endif + variable data_info; + list_data(&data_info); + variable freep = freeParameters(); + if (create) { % create the file(s) initially + io.handle = fits_open_file(modified_name, "c"); + + % write first table + fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), ["FREE_PAR", "FREE_PAR_NAME"], ["J", "A"], [" parameter indices", " parameter names"]); + fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); + fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); + array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) + throw IOError; + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, + array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1])) + throw IOError; + + % write second table + fits_create_binary_table(io.handle, "MCMCCHAIN", 0, ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], + ["D", "J", ["D"][freep*0]], + [" fit statistics", " update indicator", [" parameter values"][freep*0]]); + fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); + fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); + fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); + + % write third table + fits_create_binary_table(io.handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + } else { % if loading, do some sanity checks + io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); + + if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current model and loaded chain model differ, unable to continue chain"; + return; + } + + variable tab = fits_read_table(io.handle); + ifnot (struct_field_exists(tab.free_par)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return; + } + if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current model and chain model have different free parameters"; + } + + variable fsloppy = fits_read_key(io.handle, "SLOPPY"); + sloppy = (fsloppy > sloppy) ? fsloppy : sloppy; % largest sloppyness + fits_update_key(io.handle, "SLOPPY", sloppy); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return; + } + + if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; + return; + } + fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); + } + + % IMPORTANT: stay on mcmcchain table + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "No a mcmc chain file"; + return; + } + + io.storage = fits_get_num_rows(io.handle); + + () = _fits_get_rowsize(io.handle, &(io.cycle)); + io.cycle = io.cycle/total_walkers; + if (io.cycle < 1) + io.cycle = 1; + io.msg = NULL; +} + +private define emcee_write_chain_fits (io, walkers_cycle, prop_cycle, stat_cycle, prev_stat_cycle) { + variable tmp, update; + variable freep = freeParameters(); + variable npar = length(freep); + variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle + variable i,j; + variable collen = fits_get_num_rows(io.handle); + _for j (0, npar-1, 1) { + tmp = Double_Type[steps_walkers]; + _for i (0, steps_walkers-1, 1) + tmp[i] = walkers_cycle[i][j]; + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", freep[j])), collen+1, 1, tmp); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); + tmp = Int_Type[steps_walkers]; + if (NULL == prop_cycle) { + tmp = Int_Type[steps_walkers]+1; + } else { + _for j (0, steps_walkers-1, 1) + tmp[j] = any(walkers_cycle[j] == prop_cycle[j]); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, tmp); +} + +private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter) { + variable tmp, tmp2; + variable total_walkers = walker_per_parameter*number_parameter; + variable reread; + variable collen = fits_get_num_rows(io.handle); % length of chain + variable all_steps = fits_read_key(io.handle, "NSTEPS"); + all_steps = (all_steps<0) ? steps : all_steps + steps; + fits_update_key(io.handle, "NSTEPS", all_steps); + fits_update_key(io.handle, "NWALKERS", walker_per_parameter); + fits_update_key(io.handle, "NFREEPAR", number_parameter); + + () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], + io.storage+1, collen-io.storage, &reread); + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); + collen = fits_get_num_rows(io.handle); % read length of chain summary + variable j; + variable frac_update = Double_Type[steps]; + variable min_stat = Double_Type[steps]; + variable med_stat = Double_Type[steps]; + variable max_stat = Double_Type[steps]; + + _for j (0, steps-1, 1) { + frac_update[j] = sum(reread[0][[0:total_walkers-1]+j*total_walkers])/total_walkers; + tmp2 = reread[1][[0:total_walkers-1]+j*total_walkers]; + min_stat[j] = min(tmp2); + max_stat[j] = max(tmp2); + med_stat[j] = median(tmp2); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FRAC_UPDATE"), collen+1, 1, frac_update); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); + + fits_close_file(io.handle); + io.handle = NULL; + io.storage = NULL; + io.msg = NULL; +} + +variable EMCEE_IO_FITS = struct { + open = &emcee_write_chain_fits_init, % open file for reading or appending, takes +4 arguments: 1: usually filename, 2: num walkers, 3: indicator if create or read, 4: sloppy level + write = &emcee_write_chain_fits, % write function, takes +? arguments + close = &emcee_finalize_chain_fits, % takes +1 argument, can be information only available after finish, i.e., calculation time + handle = NULL, % output handle, usually file pointer, if null after open() indicates error + all = 0, % if 1, all nodes will execute the function CARE: IF THIS IS THE CASE, THE WRITE FUNCTION MUST BE AWARE OF THE OTHER PROCESSES! + name = "emcee_io_fits", + storage = NULL, + msg = NULL, % error message + cycle = 1, % write every steps +}; + +%}}}% + +%{{{% some helpers ... % get walkers per node private define distribute_walkers (nodes, number_walkers) { variable walkers_per_node = Int_Type[nodes]; @@ -133,6 +316,10 @@ private define previous_number_walkers (walkers_per_node) { return handled_walkers; } +%}}}% + +%{{{% mpi functions + private define release_walkers_mpi (node, nodes, total_walkers, nrand, walkers_per_node, handled_walkers, % <--- those are only relevant for master process node_walkers_ref, node_pivots_ref, node_randoms_ref) { @@ -155,13 +342,16 @@ private define release_walkers_mpi (node, nodes, total_walkers, variable all_pick1 = (@upick)(0, set2_len-1, set1_len); % pick for set 1 variable all_pick2 = (@upick)(0, set1_len-1, set2_len); % pick for set 2 variable this_walkers, this_pivots, this_randoms; + _for i (0, nodes-1, 1) { this_walkers = node_walkers[[0:walkers_per_node[i]-1]+handled_walkers[i]]; this_pivots = node_walkers[[all_pick1+set1_len, all_pick2]][[0:walkers_per_node[i]-1]+handled_walkers[i]]; this_randoms = all_u[[0:walkers_per_node[i]*nrand-1]+handled_walkers[i]*nrand]; if (i == 0) { % set master locals, we utilize that the walkers the master handles are the first in the array - (@node_pivots_ref) = this_pivots; - (@node_randoms_ref) = this_randoms; + _for j (0, walkers_per_node[i]-1, 1) { + node_randoms[j] = this_randoms[j]; + node_pivots[j] = this_pivots[j]; + } } #ifexists rcl_mpi_init else { @@ -182,30 +372,47 @@ private define release_walkers_mpi (node, nodes, total_walkers, } } -private define catch_walkers_mpi (node, nodes, walkers_ref, +private define catch_walkers_mpi (node, nodes, walkers_ref, prop_walkers_ref, stat_ref, prev_stat_ref, walkers_per_node, handled_walkers) { % <--- only relevant for master variable walkers = @walkers_ref; + variable prop_walkers = @prop_walkers_ref; + variable stat = @stat_ref; + variable prev_stat = @prev_stat_ref; variable i,j; variable npar = length(walkers[0]); % walkers are all equal #ifexists rcl_mpi_init if (node == 0) { % master, collect all walkers _for i (1, nodes-1, 1) { - _for j (0, walkers_per_node[i]-1, 1) - () = rcl_mpi_org_irecv_double(walkers[handled_walkers[i]+j], npar, i, i); + _for j (0, walkers_per_node[i]-1, 1) { + () = rcl_mpi_org_recv_double(walkers[handled_walkers[i]+j], npar, i, i); + () = rcl_mpi_org_recv_double(prop_walkers[handled_walkers[i]+j], npar, i, i); + } + () = rcl_mpi_org_recv_double(stat[[0:walkers_per_node[i]-1]+handled_walkers[i]], walkers_per_node[i], i, i); + () = rcl_mpi_org_recv_double(prev_stat[[0:walkers_per_node[i]-1]+handled_walkers[i]], walkers_per_node[i], i, i); } } else { - _for j (0, length(walkers)-1, 1) + _for j (0, length(walkers)-1, 1) { () = rcl_mpi_org_isend_double(walkers[j], npar, 0, node); + () = rcl_mpi_org_isend_double(prop_walkers[j], npar, 0, node); + } + () = rcl_mpi_org_isend_double(stat, length(stat), 0, node); + () = rcl_mpi_org_isend_double(prev_stat, length(prev_stat), 0, node); } #endif } -private define emcee_mpi (total_walkers, steps) { +private define emcee_mpi (walker_per_par, number_par, steps) { + variable total_walkers = walker_per_par*number_par; variable init = qualifier("init", NULL); variable move = qualifier("move", NULL); variable urand = qualifier("urand", NULL); variable upick = qualifier("upick", NULL); + variable output = qualifier("output", NULL); + variable io = qualifier("write_hook", NULL); + variable sloppy = qualifier("sloppy", NULL); + variable cont = qualifier("continue", NULL); + #ifexists rcl_mpi_init variable node = rcl_mpi_init(); variable nodes = rcl_mpi_numtasks(); @@ -217,26 +424,52 @@ private define emcee_mpi (total_walkers, steps) { if (NULL == init || NULL == move || NULL == urand || NULL == upick) throw InternalError, "Initialization failed"; + if (io.all || node == 0) { + io.open(output, total_walkers, cont == NULL, sloppy); + if (NULL == io.handle) + throw IOError, (NULL == io.msg) ? "" : io.msg; + } + % master only variables variable walkers_per_node; variable handled_walkers; variable sort; - - variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); - variable npar = num_free_params(); - variable node_walkers = Array_Type[node_walkers_len]; - variable node_pivots = Array_Type[node_walkers_len]; - variable node_randoms = Double_Type[node_walkers_len*move.nrands]; variable new_pos, prop_pos, prev_stat, new_stat; variable tmp; + variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); + variable node_walker_array_len = (node == 0) ? total_walkers : node_walkers_len; + variable npar = num_free_params(); - _for tmp (0, node_walkers_len-1, 1) { - node_walkers[tmp] = Double_Type[npar]; + variable node_walkers; % send & recv (differ between master and slaves) + variable node_prop_walkers; % recv (differ between master and slaves) + variable node_pivots = Array_Type[node_walkers_len]; % send (all the same) + variable node_randoms = Double_Type[node_walkers_len*move.nrands]; % send (all the same) + variable node_stat; % recv (differ between master and slaves) + variable node_prev_stat; % recv (differ between master and slaves) + + variable s; + variable fit_handle = open_fit(); % if no model is loaded this will crash + variable collector_len = 0; + if (io.all || node == 0) + collector_len = node_walkers_len*io.cycle; + variable walker_cycle; % enough to collect the walkers for one cycle + variable prop_cycle; + variable stat_cycle; + variable prev_stat_cycle; + variable cycle_step = 0; + + _for tmp (0, node_walkers_len-1, 1) node_pivots[tmp] = Double_Type[npar]; - } if (node == 0) { % setup master things - node_walkers = (@init)(total_walkers); % initialize the walkers + node_walkers = (@init)(total_walkers); % initialize the walkers & other collectors + node_prop_walkers = Array_Type[total_walkers]; + node_stat = Double_Type[total_walkers]+_Inf; + node_prev_stat = Double_Type[total_walkers]+_Inf; + + _for tmp (0, total_walkers-1, 1) + node_prop_walkers[tmp] = Double_Type[npar]; + sort = array_sort(rand_uniform(total_walkers)); node_walkers = node_walkers[sort]; % randomize them to be in the safe side, init may introduce bias walkers_per_node = distribute_walkers(nodes, total_walkers); @@ -244,39 +477,103 @@ private define emcee_mpi (total_walkers, steps) { } else { walkers_per_node = NULL; handled_walkers = NULL; + + node_walkers = Array_Type[node_walkers_len]; + node_prop_walkers = Array_Type[node_walkers_len]; + node_stat = Double_Type[node_walkers_len]+_Inf; + node_prev_stat = Double_Type[node_walkers_len]+_Inf; + + _for tmp (0, node_walkers_len-1, 1) { + node_walkers[tmp] = Double_Type[npar]; + node_prop_walkers[tmp] = Double_Type[npar]; + } } - variable s; - variable fit_handle = open_fit(); % if no model is loaded this will crash + if (io.all || node==0) { % setup collector and write initial + walker_cycle = Array_Type[collector_len]; % enough to collect the walkers for one cycle + prop_cycle = Array_Type[collector_len]; + stat_cycle = Double_Type[collector_len]+_Inf; + prev_stat_cycle = Double_Type[collector_len]+_Inf; + + _for tmp (0, collector_len-1, 1) { + walker_cycle[tmp] = Double_Type[npar]; + prop_cycle[tmp] = Double_Type[npar]; + } + _for tmp (0, node_walker_array_len-1, 1) { + walker_cycle[tmp] = node_walkers[tmp]; + prop_cycle[tmp] = node_prop_walkers[tmp]; + } + } - _for s (0, steps-1, 1) { - if (node == 0) { _for tmp (0, total_walkers-1, 1) { () = printf("%d: ", s); print_array(node_walkers[tmp]);}} + % the main loop where the magic happens + _for s (1, steps, 1) { + cycle_step = s mod io.cycle; release_walkers_mpi(node, nodes, total_walkers, move.nrands, walkers_per_node, handled_walkers, % release walkers to freedom ... &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... (new_pos, prop_pos, prev_stat, new_stat) = - __move(fit_handle, node_walkers[tmp], node_pivots[tmp], node_randoms[[0:move.nrands-1]+move.nrands*tmp], move); + __move(fit_handle, + node_walkers[tmp], + node_pivots[tmp], + node_randoms[[0:move.nrands-1]+move.nrands*tmp], + move); node_walkers[tmp] = new_pos; + node_prop_walkers[tmp] = prop_pos; + node_prev_stat[tmp] = prev_stat; + node_stat[tmp] = new_stat; + } + catch_walkers_mpi(node, nodes, &node_walkers, &node_prop_walkers, &node_stat, &node_prev_stat, + walkers_per_node, handled_walkers); % ... and catch 'em! + + if (io.all || node == 0) { + _for tmp (0, node_walker_array_len-1, 1) { + walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; + prop_cycle[tmp+cycle_step*node_walker_array_len] = node_prop_walkers[tmp]; + stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; + prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; + } + + ifnot (cycle_step) + io.write(walker_cycle, prop_cycle, stat_cycle, prev_stat_cycle); } -#ifexists rcl_mpi_init - catch_walkers_mpi(node, nodes, &node_walkers, walkers_per_node, handled_walkers); % ... and catch 'em! - () = rcl_mpi_barrier(); % keep the walkers in sync -#endif + } + + % write missing pieces + variable write_initial = (steps < io.cycle); % if we have not written out any cycle, we have to treat the init walkers special + if (io.all || node == 0) { + if (cycle_step) + io.write(walker_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], + prop_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], + stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], + prev_stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]]); + + io.close(steps, walker_per_par, number_par); } } -define emcee (walkers_per_par, steps) { - variable move = qualifier("move", STRETCH_MOVE); % defined move +%}}}% + +define emcee_new (walkers_per_par, steps) { + variable move = qualifier("move", EMCEE_STRETCH_MOVE); % defined move variable urand = qualifier("urand", &rand_uniform); % double random generator variable upick = qualifier("upick", &rand_int); % int random generator variable init = qualifier("init", &init_parameter_walker_uniform); % initialization function + variable write_hook = qualifier("write_hook", EMCEE_IO_FITS); % output routine + variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); + variable sloppy = qualifier("sloppy", 0); + + if (NULL == get_fit_fun()) + throw UsageError, "No fit function loaded"; + if (NULL == all_data()) + throw UsageError, "No data set loaded"; + ifnot (0 ((1<<29)-1)) throw UsageError, "Unable to create ensemble for this large number of walkers"; - () = printf("--- START ---\n"); - emcee_mpi(total_walkers, steps; move=move, urand=urand, upick=upick, init=init); - () = printf("--- END ---\n"); + emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, + init=init, write_hook=write_hook, output=output, sloppy=sloppy); rcl_mpi_finalize(); } -- GitLab From f1dc9381fd1d3cf283f1b53dc8f6e22fd3316f28 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 30 Nov 2019 20:13:24 +0100 Subject: [PATCH 46/89] Fix bugs and add continue qualifier --- src/fitting/ensemble-samplers/emcee.sl | 155 +++++++++++++++---------- 1 file changed, 93 insertions(+), 62 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index aaad0c67..c299b140 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -19,6 +19,7 @@ private define stretch_move (fit_object, x, x_j, u, a) { variable xstat = 1e32, ystat = -1e32; % from mikes code variable y; % step proposition variable x_t1 = x; % resulting step + variable update = 0; % update indicator % evaluate fit function for 'x' xstat = fit_object.eval_statistic(x;nocopy); @@ -33,12 +34,14 @@ private define stretch_move (fit_object, x, x_j, u, a) { % caluculate if we accept the step based on the statistics of the % model. We assume that the statistic is given as -2 log likelihood - if(u[1] <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) + if(u[1] <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { x_t1 = y; + update = 1; + } } catch IsisError; % return new walker position, proposed position, xstat, ystat - return (x_t1, y, xstat, ystat); + return (x_t1, update, xstat, ystat); } private define get_move_args (m) { @@ -122,7 +125,9 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, #else variable modified_name = filename; #endif + variable init_values; variable data_info; + variable i; list_data(&data_info); variable freep = freeParameters(); if (create) { % create the file(s) initially @@ -152,6 +157,8 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + + init_values = NULL; } else { % if loading, do some sanity checks io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); @@ -159,15 +166,15 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, fits_close_file(io.handle); io.handle = NULL; io.msg = "Current model and loaded chain model differ, unable to continue chain"; - return; + return NULL; } variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab.free_par)) { + ifnot (struct_field_exists(tab, "free_par")) { fits_close_file(io.handle); io.handle = NULL; io.msg = "Not a mcmc chain file"; - return; + return NULL; } if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { fits_close_file(io.handle); @@ -183,16 +190,35 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, fits_close_file(io.handle); io.handle = NULL; io.msg = "Not a mcmc chain file"; - return; + return NULL; } if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { fits_close_file(io.handle); io.handle = NULL; io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; - return; + return NULL; } fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return NULL; + } + + init_values = Array_Type[total_walkers]; + tab = fits_read_table(io.handle); + variable j; + variable names = get_struct_field_names(tab); + print(names); + variable l = length(names)-2; + _for j (0, total_walkers-1, 1) { + init_values[j] = Double_Type[l]; + _for i (0, l-1, 1) + init_values[j][i] = get_struct_field(tab, names[i+2])[-total_walkers+j]; + } } % IMPORTANT: stay on mcmcchain table @@ -200,7 +226,7 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, fits_close_file(io.handle); io.handle = NULL; io.msg = "No a mcmc chain file"; - return; + return NULL; } io.storage = fits_get_num_rows(io.handle); @@ -210,9 +236,11 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, if (io.cycle < 1) io.cycle = 1; io.msg = NULL; + + return init_values; } -private define emcee_write_chain_fits (io, walkers_cycle, prop_cycle, stat_cycle, prev_stat_cycle) { +private define emcee_write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, prev_stat_cycle) { variable tmp, update; variable freep = freeParameters(); variable npar = length(freep); @@ -227,13 +255,8 @@ private define emcee_write_chain_fits (io, walkers_cycle, prop_cycle, stat_cycle } () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); tmp = Int_Type[steps_walkers]; - if (NULL == prop_cycle) { - tmp = Int_Type[steps_walkers]+1; - } else { - _for j (0, steps_walkers-1, 1) - tmp[j] = any(walkers_cycle[j] == prop_cycle[j]); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, tmp); + + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); } private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter) { @@ -276,7 +299,7 @@ private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, numbe } variable EMCEE_IO_FITS = struct { - open = &emcee_write_chain_fits_init, % open file for reading or appending, takes +4 arguments: 1: usually filename, 2: num walkers, 3: indicator if create or read, 4: sloppy level + open = &emcee_write_chain_fits_init, % open file for reading or appending, takes +4 arguments: 1: usually filename, 2: num walkers, 3: indicator if create or read, 4: sloppy level, returns null or last walkers if in open mode write = &emcee_write_chain_fits, % write function, takes +? arguments close = &emcee_finalize_chain_fits, % takes +1 argument, can be information only available after finish, i.e., calculation time handle = NULL, % output handle, usually file pointer, if null after open() indicates error @@ -372,30 +395,36 @@ private define release_walkers_mpi (node, nodes, total_walkers, } } -private define catch_walkers_mpi (node, nodes, walkers_ref, prop_walkers_ref, stat_ref, prev_stat_ref, +private define catch_walkers_mpi (node, nodes, walkers_ref, update_ref, stat_ref, prev_stat_ref, walkers_per_node, handled_walkers) { % <--- only relevant for master variable walkers = @walkers_ref; - variable prop_walkers = @prop_walkers_ref; + variable update = @update_ref; variable stat = @stat_ref; variable prev_stat = @prev_stat_ref; variable i,j; variable npar = length(walkers[0]); % walkers are all equal + variable stat_part; + variable update_part; + #ifexists rcl_mpi_init if (node == 0) { % master, collect all walkers _for i (1, nodes-1, 1) { - _for j (0, walkers_per_node[i]-1, 1) { + stat_part = Double_Type[walkers_per_node[i]]; + update_part = Int_Type[walkers_per_node[i]]; + _for j (0, walkers_per_node[i]-1, 1) () = rcl_mpi_org_recv_double(walkers[handled_walkers[i]+j], npar, i, i); - () = rcl_mpi_org_recv_double(prop_walkers[handled_walkers[i]+j], npar, i, i); - } - () = rcl_mpi_org_recv_double(stat[[0:walkers_per_node[i]-1]+handled_walkers[i]], walkers_per_node[i], i, i); - () = rcl_mpi_org_recv_double(prev_stat[[0:walkers_per_node[i]-1]+handled_walkers[i]], walkers_per_node[i], i, i); + () = rcl_mpi_org_recv_int(update_part, walkers_per_node[i], i, i); + update[[0:walkers_per_node[i]-1]+handled_walkers[i]] = update_part; + () = rcl_mpi_org_recv_double(stat_part, walkers_per_node[i], i, i); + stat[[0:walkers_per_node[i]-1]+handled_walkers[i]] = stat_part; + () = rcl_mpi_org_recv_double(stat_part, walkers_per_node[i], i, i); + prev_stat[[0:walkers_per_node[i]-1]+handled_walkers[i]] = stat_part; } } else { - _for j (0, length(walkers)-1, 1) { + _for j (0, length(walkers)-1, 1) () = rcl_mpi_org_isend_double(walkers[j], npar, 0, node); - () = rcl_mpi_org_isend_double(prop_walkers[j], npar, 0, node); - } + () = rcl_mpi_org_isend_int(update, length(update), 0, node); () = rcl_mpi_org_isend_double(stat, length(stat), 0, node); () = rcl_mpi_org_isend_double(prev_stat, length(prev_stat), 0, node); } @@ -412,6 +441,9 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable io = qualifier("write_hook", NULL); variable sloppy = qualifier("sloppy", NULL); variable cont = qualifier("continue", NULL); + variable init_walkers; + if (cont != NULL) + output = cont; #ifexists rcl_mpi_init variable node = rcl_mpi_init(); @@ -425,7 +457,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { throw InternalError, "Initialization failed"; if (io.all || node == 0) { - io.open(output, total_walkers, cont == NULL, sloppy); + init_walkers = io.open(output, total_walkers, cont == NULL, sloppy); if (NULL == io.handle) throw IOError, (NULL == io.msg) ? "" : io.msg; } @@ -434,14 +466,14 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable walkers_per_node; variable handled_walkers; variable sort; - variable new_pos, prop_pos, prev_stat, new_stat; + variable new_pos, update_pos, prev_stat, new_stat; variable tmp; variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); variable node_walker_array_len = (node == 0) ? total_walkers : node_walkers_len; variable npar = num_free_params(); variable node_walkers; % send & recv (differ between master and slaves) - variable node_prop_walkers; % recv (differ between master and slaves) + variable node_update; % recv (differ between master and slaves) variable node_pivots = Array_Type[node_walkers_len]; % send (all the same) variable node_randoms = Double_Type[node_walkers_len*move.nrands]; % send (all the same) variable node_stat; % recv (differ between master and slaves) @@ -451,9 +483,9 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable fit_handle = open_fit(); % if no model is loaded this will crash variable collector_len = 0; if (io.all || node == 0) - collector_len = node_walkers_len*io.cycle; + collector_len = node_walker_array_len*io.cycle; variable walker_cycle; % enough to collect the walkers for one cycle - variable prop_cycle; + variable update_cycle; variable stat_cycle; variable prev_stat_cycle; variable cycle_step = 0; @@ -462,14 +494,14 @@ private define emcee_mpi (walker_per_par, number_par, steps) { node_pivots[tmp] = Double_Type[npar]; if (node == 0) { % setup master things - node_walkers = (@init)(total_walkers); % initialize the walkers & other collectors - node_prop_walkers = Array_Type[total_walkers]; + if (cont != NULL) + node_walkers = init_walkers; + else + node_walkers = (@init)(total_walkers); % initialize the walkers & other collectors + node_update = Int_Type[total_walkers]; node_stat = Double_Type[total_walkers]+_Inf; node_prev_stat = Double_Type[total_walkers]+_Inf; - _for tmp (0, total_walkers-1, 1) - node_prop_walkers[tmp] = Double_Type[npar]; - sort = array_sort(rand_uniform(total_walkers)); node_walkers = node_walkers[sort]; % randomize them to be in the safe side, init may introduce bias walkers_per_node = distribute_walkers(nodes, total_walkers); @@ -479,30 +511,22 @@ private define emcee_mpi (walker_per_par, number_par, steps) { handled_walkers = NULL; node_walkers = Array_Type[node_walkers_len]; - node_prop_walkers = Array_Type[node_walkers_len]; + node_update = Int_Type[node_walkers_len]; node_stat = Double_Type[node_walkers_len]+_Inf; node_prev_stat = Double_Type[node_walkers_len]+_Inf; - _for tmp (0, node_walkers_len-1, 1) { + _for tmp (0, node_walkers_len-1, 1) node_walkers[tmp] = Double_Type[npar]; - node_prop_walkers[tmp] = Double_Type[npar]; - } } if (io.all || node==0) { % setup collector and write initial walker_cycle = Array_Type[collector_len]; % enough to collect the walkers for one cycle - prop_cycle = Array_Type[collector_len]; + update_cycle = Int_Type[collector_len]; stat_cycle = Double_Type[collector_len]+_Inf; prev_stat_cycle = Double_Type[collector_len]+_Inf; - _for tmp (0, collector_len-1, 1) { + _for tmp (0, collector_len-1, 1) walker_cycle[tmp] = Double_Type[npar]; - prop_cycle[tmp] = Double_Type[npar]; - } - _for tmp (0, node_walker_array_len-1, 1) { - walker_cycle[tmp] = node_walkers[tmp]; - prop_cycle[tmp] = node_prop_walkers[tmp]; - } } % the main loop where the magic happens @@ -511,30 +535,36 @@ private define emcee_mpi (walker_per_par, number_par, steps) { release_walkers_mpi(node, nodes, total_walkers, move.nrands, walkers_per_node, handled_walkers, % release walkers to freedom ... &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... - (new_pos, prop_pos, prev_stat, new_stat) = + (new_pos, update_pos, prev_stat, new_stat) = __move(fit_handle, node_walkers[tmp], node_pivots[tmp], node_randoms[[0:move.nrands-1]+move.nrands*tmp], move); node_walkers[tmp] = new_pos; - node_prop_walkers[tmp] = prop_pos; + node_update[tmp] = update_pos; node_prev_stat[tmp] = prev_stat; node_stat[tmp] = new_stat; } - catch_walkers_mpi(node, nodes, &node_walkers, &node_prop_walkers, &node_stat, &node_prev_stat, + catch_walkers_mpi(node, nodes, &node_walkers, &node_update, &node_stat, &node_prev_stat, walkers_per_node, handled_walkers); % ... and catch 'em! if (io.all || node == 0) { - _for tmp (0, node_walker_array_len-1, 1) { - walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; - prop_cycle[tmp+cycle_step*node_walker_array_len] = node_prop_walkers[tmp]; - stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; - prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; - } + if (cont != NULL) { + % if we continue chain, do not write initial walkers + cont = NULL; + s -= 1; + } else { + _for tmp (0, node_walker_array_len-1, 1) { + walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; + update_cycle[tmp+cycle_step*node_walker_array_len] = node_update[tmp]; + stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; + prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; + } - ifnot (cycle_step) - io.write(walker_cycle, prop_cycle, stat_cycle, prev_stat_cycle); + ifnot (cycle_step) + io.write(walker_cycle, update_cycle, stat_cycle, prev_stat_cycle); + } } } @@ -543,7 +573,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { if (io.all || node == 0) { if (cycle_step) io.write(walker_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], - prop_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], + update_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], prev_stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]]); @@ -561,6 +591,7 @@ define emcee_new (walkers_per_par, steps) { variable write_hook = qualifier("write_hook", EMCEE_IO_FITS); % output routine variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); variable sloppy = qualifier("sloppy", 0); + variable cont = qualifier("continue", NULL); if (NULL == get_fit_fun()) throw UsageError, "No fit function loaded"; @@ -574,6 +605,6 @@ define emcee_new (walkers_per_par, steps) { throw UsageError, "Unable to create ensemble for this large number of walkers"; emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, - init=init, write_hook=write_hook, output=output, sloppy=sloppy); + init=init, write_hook=write_hook, output=output, sloppy=sloppy, continue=cont); rcl_mpi_finalize(); } -- GitLab From 2d208f43708072b96a3b81e8a9630339600a468f Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 5 Feb 2020 16:35:45 +0100 Subject: [PATCH 47/89] Fix incomplete parameter names in fits write routine The table attached to the fits file specified the column for the parameter names as 'A'. This caused that only the first character of each string was stored. Now the table is initialized as 'nA' where n is the maximum length of the parameter names. --- src/fitting/ensemble-samplers/emcee.sl | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index c299b140..16775a13 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -13,7 +13,7 @@ private define inverse_cdf (u, a) { } % stretch move as of Goodman & Weare 2010 -% Move must evaluate the functions +% Move must evaluate the fit function private define stretch_move (fit_object, x, x_j, u, a) { variable z = inverse_cdf(u[0], a); variable xstat = 1e32, ystat = -1e32; % from mikes code @@ -127,6 +127,7 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, #endif variable init_values; variable data_info; + variable par_names; variable i; list_data(&data_info); variable freep = freeParameters(); @@ -134,20 +135,24 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, io.handle = fits_open_file(modified_name, "c"); % write first table - fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), ["FREE_PAR", "FREE_PAR_NAME"], ["J", "A"], [" parameter indices", " parameter names"]); + parnames = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; + fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), + ["FREE_PAR", "FREE_PAR_NAME"], + ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, parnames)))], + [" parameter indices", " parameter names"]); fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, - array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1])) + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, parnames)) throw IOError; % write second table - fits_create_binary_table(io.handle, "MCMCCHAIN", 0, ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], - ["D", "J", ["D"][freep*0]], - [" fit statistics", " update indicator", [" parameter values"][freep*0]]); + fits_create_binary_table(io.handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], + ["D", "J", ["D"][freep*0]], + [" fit statistics", " update indicator", [" parameter values"][freep*0]]); fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); @@ -503,7 +508,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { node_prev_stat = Double_Type[total_walkers]+_Inf; sort = array_sort(rand_uniform(total_walkers)); - node_walkers = node_walkers[sort]; % randomize them to be in the safe side, init may introduce bias + node_walkers = node_walkers[sort]; % randomize them to be on the safe side, init may introduce bias walkers_per_node = distribute_walkers(nodes, total_walkers); handled_walkers = previous_number_walkers(walkers_per_node); } else { @@ -606,5 +611,7 @@ define emcee_new (walkers_per_par, steps) { emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, init=init, write_hook=write_hook, output=output, sloppy=sloppy, continue=cont); +#ifexists rcl_mpi_init rcl_mpi_finalize(); +#endif } -- GitLab From 49a26d741567fdbf8543ea42591fcaa5a3d07894 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 5 Feb 2020 18:38:39 +0100 Subject: [PATCH 48/89] Fix emcee fits write routine when all nodes write results Finalize function failed to write result because it tried to allways write 'total_walkers' entries. This does not work when each node is supposed to write the handled walkers. --- src/fitting/ensemble-samplers/emcee.sl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 16775a13..e67005e0 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -264,10 +264,10 @@ private define emcee_write_chain_fits (io, walkers_cycle, update_cycle, stat_cyc () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); } -private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter) { +private define emcee_finalize_chain_fits (io, steps, this_walkers, walker_per_parameter, number_parameter) { variable tmp, tmp2; - variable total_walkers = walker_per_parameter*number_parameter; variable reread; + variable total_walkers = walker_per_parameter*number_parameter; variable collen = fits_get_num_rows(io.handle); % length of chain variable all_steps = fits_read_key(io.handle, "NSTEPS"); all_steps = (all_steps<0) ? steps : all_steps + steps; @@ -286,8 +286,8 @@ private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, numbe variable max_stat = Double_Type[steps]; _for j (0, steps-1, 1) { - frac_update[j] = sum(reread[0][[0:total_walkers-1]+j*total_walkers])/total_walkers; - tmp2 = reread[1][[0:total_walkers-1]+j*total_walkers]; + frac_update[j] = sum(reread[0][[0:this_walkers-1]+j*this_walkers])/this_walkers; + tmp2 = reread[1][[0:this_walkers-1]+j*this_walkers]; min_stat[j] = min(tmp2); max_stat[j] = max(tmp2); med_stat[j] = median(tmp2); @@ -582,7 +582,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], prev_stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]]); - io.close(steps, walker_per_par, number_par); + io.close(steps, node_walker_array_len, walker_per_par, number_par); } } -- GitLab From e64d310b68a5f69442478ebcd96614eff227694c Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Fri, 7 Feb 2020 14:39:29 +0100 Subject: [PATCH 49/89] Fix proposal test Step rejection was compared with linear random number although the probability was given as log likelihood. This caused many steps to get rejected and resulted in wired "static" walkers. --- src/fitting/ensemble-samplers/emcee.sl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index e67005e0..a7b881f4 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -34,7 +34,7 @@ private define stretch_move (fit_object, x, x_j, u, a) { % caluculate if we accept the step based on the statistics of the % model. We assume that the statistic is given as -2 log likelihood - if(u[1] <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { + if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { x_t1 = y; update = 1; } @@ -135,17 +135,17 @@ private define emcee_write_chain_fits_init (io, filename, total_walkers, create, io.handle = fits_open_file(modified_name, "c"); % write first table - parnames = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; + par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), ["FREE_PAR", "FREE_PAR_NAME"], - ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, parnames)))], + ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], [" parameter indices", " parameter names"]); fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, parnames)) + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) throw IOError; % write second table -- GitLab From b04bdb8f13d1dd0013968e33c17eaab2e6aee7a8 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 20 Feb 2020 15:41:38 +0100 Subject: [PATCH 50/89] Clean new emcee code for better overview Code structure should allow for simpler implementation of additional move algorithms and other functions. Still missing: Description and help --- src/fitting/ensemble-samplers/emcee-init.sl | 68 ++++ src/fitting/ensemble-samplers/emcee-io.sl | 226 ++++++++++++ src/fitting/ensemble-samplers/emcee-moves.sl | 89 +++++ src/fitting/ensemble-samplers/emcee.sl | 348 ++----------------- 4 files changed, 409 insertions(+), 322 deletions(-) create mode 100644 src/fitting/ensemble-samplers/emcee-init.sl create mode 100644 src/fitting/ensemble-samplers/emcee-io.sl create mode 100644 src/fitting/ensemble-samplers/emcee-moves.sl diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl new file mode 100644 index 00000000..cb265ec7 --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -0,0 +1,68 @@ +% -*- mode: slang; mode: fold; -*- + +require("rand"); + +% WALKER INIT FUNCTIONS +% Allow different functions for initializing walkers. Follows the same idea +% as for the move steps. +% +% Distribution is only done by the master process, so we don't have to care +% about the random numbers. +% +% Init function takes one argument directly which is the number of walkers + +%{{{% helpers +private define emcee_init_uniform(); +private variable EMCEE_INIT = Assoc_Type[Ref_Type, &emcee_init_uniform]; +private define __init_globals () { return struct_combine( + struct { + name = "unspecified init", + init = NULL, + }, + __qualifiers()); +} +define emcee_get_init(key) { return (@EMCEE_INIT[key])(;; __qualifiers); } +define emcee_get_inits () { return assoc_get_keys(EMCEE_INIT); } + +% get all free param values, index and min max +private define free_par_sets () { + variable all = get_params(); + variable i, ind = {}, v = {}, mi = {}, ma = {}; + _for i (0, length(all)-1, 1) { + ifnot (all[i].freeze==0 && all[i].tie==NULL && all[i].fun==NULL) + continue; + list_append(ind, all[i].index); + list_append(v, all[i].value); + list_append(mi, all[i].min); + list_append(ma, all[i].max); + } + + return list_to_array(ind), list_to_array(v), list_to_array(mi), list_to_array(ma); +} +%}}}% + +%{{{% Uniform initialization function +% pick random parameter values within the boundaries +private define init_parameter_walker_uniform (n) { + variable walkers = Array_Type[n]; + variable i; + variable ind, p, pmin, pmax; + (ind, p, pmin, pmax) = free_par_sets(); + variable num_p = length(ind); + + _for i (0, n-1, 1) + walkers[i] = rand_uniform(num_p)*(pmax-pmin)+pmin; + + return walkers; +} +private define emcee_init_uniform () { + variable settings = (_NARGS==1) ? () : NULL; + variable defaults = __init_globals(; + name="uniform init", + init=&init_parameter_walker_uniform + ); + return struct_combine(struct_combine(defaults, __qualifiers()), settings); +} +%}}}% + +EMCEE_INIT["uniform"] = &emcee_init_uniform; diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl new file mode 100644 index 00000000..433206ea --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -0,0 +1,226 @@ +% -*- mode: slang; mode: fold; -*- % + +% EMCEE INPUT OUTPUT ROUTINES +% Same structure as moves. Routines for output and input are describeed +% here. + +%{{{% helpers +private define emcee_io_fits(); +private variable EMCEE_IO = Assoc_Type[Ref_Type, &emcee_io_fits]; +define emcee_get_io (key) { return (@EMCEE_IO[key])(;; __qualifiers); } +define emcee_get_ios () { return assoc_get_keys(EMCEE_IO); } +private define __io_globals () { + return struct_combine(struct { + name = "unspecified io", + open = NULL, % open function, takes 'io-object', 'filename', 'number walkers', 'create flag', 'sloppy flag' + write = NULL, % write to file, takes 'io-object', 'walkers array', 'update array', 'statistic array', 'pre statistisc array' + close = NULL, % finalizes output, takes 'io-object', 'current number steps', 'number walkers', 'walker per parameter', 'number parameter' + handle = NULL, % io access (usually file pointer) + all = 0, % flag indicating if all nodes execute io or just master + msg = NULL, % current io message (used only for feedback) + cycle = 1, % number of steps to perform before write + }, __qualifiers()); +} +%}}}% +%{{{% FITS input output routines +private define write_chain_fits_init (io, filename, total_walkers, create, sloppy) { + % write ensemble evolution to fits file +#ifexists rcl_mpi_init + variable modified_name = (io.all) ? sprintf("%s_%d%s", path_sans_extname(filename), rcl_mpi_rank(), path_extname(filename)) : filename; +#else + variable modified_name = filename; +#endif + variable init_values; + variable data_info; + variable par_names; + variable i; + list_data(&data_info); + variable freep = freeParameters(); + if (create) { % create the file(s) initially + io.handle = fits_open_file(modified_name, "c"); + + % write first table + par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; + fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), + ["FREE_PAR", "FREE_PAR_NAME"], + ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], + [" parameter indices", " parameter names"]); + fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); + fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); + array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) + throw IOError; + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) + throw IOError; + + % write second table + fits_create_binary_table(io.handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], + ["D", "J", ["D"][freep*0]], + [" fit statistics", " update indicator", [" parameter values"][freep*0]]); + fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); + fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); + fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); + + % write third table + fits_create_binary_table(io.handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + + init_values = NULL; + } else { % if loading, do some sanity checks + io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); + + if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current model and loaded chain model differ, unable to continue chain"; + return NULL; + } + + variable tab = fits_read_table(io.handle); + ifnot (struct_field_exists(tab, "free_par")) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return NULL; + } + if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current model and chain model have different free parameters"; + } + + variable fsloppy = fits_read_key(io.handle, "SLOPPY"); + sloppy = (fsloppy > sloppy) ? fsloppy : sloppy; % largest sloppyness + fits_update_key(io.handle, "SLOPPY", sloppy); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return NULL; + } + + if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; + return NULL; + } + fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "Not a mcmc chain file"; + return NULL; + } + + init_values = Array_Type[total_walkers]; + tab = fits_read_table(io.handle); + variable j; + variable names = get_struct_field_names(tab); + print(names); + variable l = length(names)-2; + _for j (0, total_walkers-1, 1) { + init_values[j] = Double_Type[l]; + _for i (0, l-1, 1) + init_values[j][i] = get_struct_field(tab, names[i+2])[-total_walkers+j]; + } + } + + % IMPORTANT: stay on mcmcchain table + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + io.msg = "No a mcmc chain file"; + return NULL; + } + + io.storage = fits_get_num_rows(io.handle); + + () = _fits_get_rowsize(io.handle, &(io.cycle)); + io.cycle = io.cycle/total_walkers; + if (io.cycle < 1) + io.cycle = 1; + io.msg = NULL; + + return init_values; +} + +private define write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, prev_stat_cycle) { + variable tmp, update; + variable freep = freeParameters(); + variable npar = length(freep); + variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle + variable i,j; + variable collen = fits_get_num_rows(io.handle); + _for j (0, npar-1, 1) { + tmp = Double_Type[steps_walkers]; + _for i (0, steps_walkers-1, 1) + tmp[i] = walkers_cycle[i][j]; + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", freep[j])), collen+1, 1, tmp); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); + tmp = Int_Type[steps_walkers]; + + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); +} + +private define finalize_chain_fits (io, steps, this_walkers, walker_per_parameter, number_parameter) { + variable tmp, tmp2; + variable reread; + variable total_walkers = walker_per_parameter*number_parameter; + variable collen = fits_get_num_rows(io.handle); % length of chain + variable all_steps = fits_read_key(io.handle, "NSTEPS"); + all_steps = (all_steps<0) ? steps : all_steps + steps; + fits_update_key(io.handle, "NSTEPS", all_steps); + fits_update_key(io.handle, "NWALKERS", walker_per_parameter); + fits_update_key(io.handle, "NFREEPAR", number_parameter); + + () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], + io.storage+1, collen-io.storage, &reread); + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); + collen = fits_get_num_rows(io.handle); % read length of chain summary + variable j; + variable frac_update = Double_Type[steps]; + variable min_stat = Double_Type[steps]; + variable med_stat = Double_Type[steps]; + variable max_stat = Double_Type[steps]; + + _for j (0, steps-1, 1) { + frac_update[j] = sum(reread[0][[0:this_walkers-1]+j*this_walkers])/this_walkers; + tmp2 = reread[1][[0:this_walkers-1]+j*this_walkers]; + min_stat[j] = min(tmp2); + max_stat[j] = max(tmp2); + med_stat[j] = median(tmp2); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FRAC_UPDATE"), collen+1, 1, frac_update); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); + + fits_close_file(io.handle); + io.handle = NULL; + io.storage = NULL; + io.msg = NULL; +} + +private define emcee_io_fits () { + variable settings = (_NARGS==1) ? () : NULL; + variable defaults = __io_globals(; + name = "io fits", + open = &write_chain_fits_init, + write = &write_chain_fits, + close = &finalize_chain_fits, + storage = NULL, + ); + return struct_combine(struct_combine(defaults, __qualifiers()), settings); +} + +%}}}% + +% Provide io functions +EMCEE_IO["fits"] = &emcee_io_fits; diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl new file mode 100644 index 00000000..81bbd91d --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -0,0 +1,89 @@ +% -*- mode: slang; mode: fold; -*- % + +% DEFINED MOVES FOR THE EMCEE ENSEMBLE WALKER +% Walker moves should be defined as functions where the function returns a +% structure suitable to describe the step algorithm. For adjustment the +% function should combine the qualifiers with the default structure and +% should handle an argument that may be NULL or a structure with the same +% properties, where the argument should overwrite any qualifiers given to +% the function. The move function MUST evaluate the fit model (in the +% simplest case by using the fit object) and return new position and +% statistics. +% +% Besides additional arguments given in the structure the algorithm must +% process the fit object, current position, pivot position and an array of +% random numbers. +% +% Functions should be private and only accessible through the access function +% 'get_emcee' or 'get_emcee_move'. +% Required default parameters can be retrieved with '__move_globals'. +% +% For an example see the STRETCH_MOVE +%{{{% Helpers for move steps +private define __move_globals () { + return struct_combine ( struct { + name = "unspecified move", % name of the step function to be used in output files + move = NULL, % the move function itself, NULL == Error + nrands = 0, % number of required random numbers for each step + }, __qualifiers); +} +private define emcee_move_stretch(); % for default value +private variable EMCEE_MOVES = Assoc_Type[Ref_Type, &emcee_move_stretch]; +define emcee_get_move (key) { return (@EMCEE_MOVES[key])(;;__qualifiers); } +define emcee_get_moves () { return assoc_get_keys(EMCEE_MOVES); } +%}}}% + +%{{{% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY +% define inverse cumulative distribution function for generating +% random numbers following 1/z^2 when z in [1/a, a] +% TODO: should make this an adjustable thing +private define inverse_cdf (u, a) { + return (u*(a-1.)+1.)^2./a; +} + +% stretch move as of Goodman & Weare 2010 +% Move must evaluate the fit function +private define stretch_move (move, fit_object, x, x_j, u) { + variable z = inverse_cdf(u[0], move.a); + variable xstat = 1e32, ystat = -1e32; % from mikes code + variable y; % step proposition + variable x_t1 = x; % resulting step + variable update = 0; % update indicator + + % evaluate fit function for 'x' + xstat = fit_object.eval_statistic(x;nocopy); + + % calculate the new position (utilize array operations) + y = x_j + z*(x-x_j); + + % try evaluating, if out of bounds, does nothing + try { + % evaluate fit function for 'y' + ystat = fit_object.eval_statistic(y;nocopy); + + % caluculate if we accept the step based on the statistics of the + % model. We assume that the statistic is given as -2 log likelihood + if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { + x_t1 = y; + update = 1; + } + } catch IsisError; + + % return new walker position, proposed position, xstat, ystat + return (x_t1, update, xstat, ystat); +} + +private define emcee_move_stretch () { + variable settings = (_NARGS==1)? () : NULL; + variable defaults = __move_globals(; + name="stretch move", + move=&stretch_move, + nrands=2, + a=2. % move scaling + ); + return struct_combine(struct_combine(defaults, __qualifiers), settings); % combine settings +} +%}}}% + +% Provide moves: +EMCEE_MOVES["stretch"] = &emcee_move_stretch; diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index a7b881f4..3c5bb5b2 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1,321 +1,6 @@ % -*- mode: slang; mode: fold; -*- % require("rand"); - -%{{{% defined moves for the ensemble walkers -%%% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY - -% define inverse cumulative distribution function for generating -% random numbers following 1/z^2 when z in [1/a, a] -% TODO: should make this an adjustable thing -private define inverse_cdf (u, a) { - return (u*(a-1)+1)^2/a; -} - -% stretch move as of Goodman & Weare 2010 -% Move must evaluate the fit function -private define stretch_move (fit_object, x, x_j, u, a) { - variable z = inverse_cdf(u[0], a); - variable xstat = 1e32, ystat = -1e32; % from mikes code - variable y; % step proposition - variable x_t1 = x; % resulting step - variable update = 0; % update indicator - - % evaluate fit function for 'x' - xstat = fit_object.eval_statistic(x;nocopy); - - % calculate the new position (utilize array operations) - y = x_j + z*(x-x_j); - - % try evaluating, if out of bounds, does nothing - try { - % evaluate fit function for 'y' - ystat = fit_object.eval_statistic(y;nocopy); - - % caluculate if we accept the step based on the statistics of the - % model. We assume that the statistic is given as -2 log likelihood - if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { - x_t1 = y; - update = 1; - } - } catch IsisError; - - % return new walker position, proposed position, xstat, ystat - return (x_t1, update, xstat, ystat); -} - -private define get_move_args (m) { - return m.args; -} - -private define set_move_args () { - variable args, m; - if (_NARGS>2) { - args = __pop_list(_NARGS-1); - m = (); - } else - (m,args) = (); - if (length(args) != m.nargs) - throw UsageError, sprintf("%s expects %d arguments, set it with .set_move_args(...)!", m.name, m.nargs); - m.args = args; -} - -variable EMCEE_STRETCH_MOVE = struct { - name = "stretch move", - move = &stretch_move, % the function - nrands = 2, % the randoms needed - args = {2}, % additional arguments - nargs = 1, % number additional arguments - get = &get_move_args, - set = &set_move_args, -}; - -% calculate the move for the fit 'fit_object' based on the current walker position x, -% a randomly choosen walker x_j and an array of uniform random numbers enough to -% calculate the next step. move is the struct encapsulating the move function -private define __move (fit_object, x, x_j, u, move) { - % for alternative step functions, must be symmetric (that is, - % Pr(x -> y) = Pr(y -> x)) otherwise no detailed balance - - % push standard arguments & arguments for specified move - return move(__push_list(list_concat({fit_object, x, x_j, u}, move.get))); -} - -%}}}% - -%{{{% initialization of the parameters - -% get all free param values, index and min max -private define free_par_sets () { - variable all = get_params(); - variable i, ind = {}, v = {}, mi = {}, ma = {}; - _for i (0, length(all)-1, 1) { - ifnot (all[i].freeze==0 && all[i].tie==NULL && all[i].fun==NULL) - continue; - list_append(ind, all[i].index); - list_append(v, all[i].value); - list_append(mi, all[i].min); - list_append(ma, all[i].ma); - } - - return list_to_array(ind), list_to_array(v), list_to_array(mi), list_to_array(ma); -} - -% pick random parameter values within the boundaries -private define init_parameter_walker_uniform (n) { - variable walkers = Array_Type[n]; - variable i; - variable ind, p, pmin, pmax; - (ind, p, pmin, pmax) = free_par_sets(); - variable num_p = length(ind); - - _for i (0, n-1, 1) - walkers[i] = rand_uniform(num_p)*(p_max-p_min)+p_min; - - return walkers; -} - -%}}}% - -%{{{% write and read functions -private define emcee_write_chain_fits_init (io, filename, total_walkers, create, sloppy) { - % write ensemble evolution to fits file -#ifexists rcl_mpi_init - variable modified_name = (io.all) ? sprintf("%s_%d%s", path_sans_extname(filename), rcl_mpi_rank(), path_extname(filename)) : filename; -#else - variable modified_name = filename; -#endif - variable init_values; - variable data_info; - variable par_names; - variable i; - list_data(&data_info); - variable freep = freeParameters(); - if (create) { % create the file(s) initially - io.handle = fits_open_file(modified_name, "c"); - - % write first table - par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; - fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), - ["FREE_PAR", "FREE_PAR_NAME"], - ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], - [" parameter indices", " parameter names"]); - fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); - fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); - array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) - throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) - throw IOError; - - % write second table - fits_create_binary_table(io.handle, "MCMCCHAIN", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], - ["D", "J", ["D"][freep*0]], - [" fit statistics", " update indicator", [" parameter values"][freep*0]]); - fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); - fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); - fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); - - % write third table - fits_create_binary_table(io.handle, "CHAINSTATS", 0, - ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], - [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); - fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); - - init_values = NULL; - } else { % if loading, do some sanity checks - io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); - - if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current model and loaded chain model differ, unable to continue chain"; - return NULL; - } - - variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab, "free_par")) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current model and chain model have different free parameters"; - } - - variable fsloppy = fits_read_key(io.handle, "SLOPPY"); - sloppy = (fsloppy > sloppy) ? fsloppy : sloppy; % largest sloppyness - fits_update_key(io.handle, "SLOPPY", sloppy); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - - if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; - return NULL; - } - fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - - init_values = Array_Type[total_walkers]; - tab = fits_read_table(io.handle); - variable j; - variable names = get_struct_field_names(tab); - print(names); - variable l = length(names)-2; - _for j (0, total_walkers-1, 1) { - init_values[j] = Double_Type[l]; - _for i (0, l-1, 1) - init_values[j][i] = get_struct_field(tab, names[i+2])[-total_walkers+j]; - } - } - - % IMPORTANT: stay on mcmcchain table - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "No a mcmc chain file"; - return NULL; - } - - io.storage = fits_get_num_rows(io.handle); - - () = _fits_get_rowsize(io.handle, &(io.cycle)); - io.cycle = io.cycle/total_walkers; - if (io.cycle < 1) - io.cycle = 1; - io.msg = NULL; - - return init_values; -} - -private define emcee_write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, prev_stat_cycle) { - variable tmp, update; - variable freep = freeParameters(); - variable npar = length(freep); - variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle - variable i,j; - variable collen = fits_get_num_rows(io.handle); - _for j (0, npar-1, 1) { - tmp = Double_Type[steps_walkers]; - _for i (0, steps_walkers-1, 1) - tmp[i] = walkers_cycle[i][j]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", freep[j])), collen+1, 1, tmp); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); - tmp = Int_Type[steps_walkers]; - - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); -} - -private define emcee_finalize_chain_fits (io, steps, this_walkers, walker_per_parameter, number_parameter) { - variable tmp, tmp2; - variable reread; - variable total_walkers = walker_per_parameter*number_parameter; - variable collen = fits_get_num_rows(io.handle); % length of chain - variable all_steps = fits_read_key(io.handle, "NSTEPS"); - all_steps = (all_steps<0) ? steps : all_steps + steps; - fits_update_key(io.handle, "NSTEPS", all_steps); - fits_update_key(io.handle, "NWALKERS", walker_per_parameter); - fits_update_key(io.handle, "NFREEPAR", number_parameter); - - () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], - io.storage+1, collen-io.storage, &reread); - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); - collen = fits_get_num_rows(io.handle); % read length of chain summary - variable j; - variable frac_update = Double_Type[steps]; - variable min_stat = Double_Type[steps]; - variable med_stat = Double_Type[steps]; - variable max_stat = Double_Type[steps]; - - _for j (0, steps-1, 1) { - frac_update[j] = sum(reread[0][[0:this_walkers-1]+j*this_walkers])/this_walkers; - tmp2 = reread[1][[0:this_walkers-1]+j*this_walkers]; - min_stat[j] = min(tmp2); - max_stat[j] = max(tmp2); - med_stat[j] = median(tmp2); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FRAC_UPDATE"), collen+1, 1, frac_update); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); - - fits_close_file(io.handle); - io.handle = NULL; - io.storage = NULL; - io.msg = NULL; -} - -variable EMCEE_IO_FITS = struct { - open = &emcee_write_chain_fits_init, % open file for reading or appending, takes +4 arguments: 1: usually filename, 2: num walkers, 3: indicator if create or read, 4: sloppy level, returns null or last walkers if in open mode - write = &emcee_write_chain_fits, % write function, takes +? arguments - close = &emcee_finalize_chain_fits, % takes +1 argument, can be information only available after finish, i.e., calculation time - handle = NULL, % output handle, usually file pointer, if null after open() indicates error - all = 0, % if 1, all nodes will execute the function CARE: IF THIS IS THE CASE, THE WRITE FUNCTION MUST BE AWARE OF THE OTHER PROCESSES! - name = "emcee_io_fits", - storage = NULL, - msg = NULL, % error message - cycle = 1, % write every steps -}; - -%}}}% %{{{% some helpers ... % get walkers per node @@ -502,7 +187,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { if (cont != NULL) node_walkers = init_walkers; else - node_walkers = (@init)(total_walkers); % initialize the walkers & other collectors + node_walkers = init.init(total_walkers); % initialize the walkers & other collectors node_update = Int_Type[total_walkers]; node_stat = Double_Type[total_walkers]+_Inf; node_prev_stat = Double_Type[total_walkers]+_Inf; @@ -541,11 +226,10 @@ private define emcee_mpi (walker_per_par, number_par, steps) { &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... (new_pos, update_pos, prev_stat, new_stat) = - __move(fit_handle, + move.move(fit_handle, node_walkers[tmp], node_pivots[tmp], - node_randoms[[0:move.nrands-1]+move.nrands*tmp], - move); + node_randoms[[0:move.nrands-1]+move.nrands*tmp]); node_walkers[tmp] = new_pos; node_update[tmp] = update_pos; node_prev_stat[tmp] = prev_stat; @@ -588,12 +272,32 @@ private define emcee_mpi (walker_per_par, number_par, steps) { %}}}% +define emcee_get (s) { + variable split = strchop(s, '/', 0); + if (split[0] == "move" || split[0] == "moves") { + if (length(split)==1) return emcee_get_moves(); + else if (any(emcee_get_moves() == split[1])) return emcee_get_move(split[1];; __qualifiers()); + else vmessage("*** unknown move: '%s'", split[1]); + } else if(split[0] == "init" || split[0] == "inits") { + if (length(split)==1) return emcee_get_inits(); + else if (any(emcee_get_inits() == split[1])) return emcee_get_init(split[1];; __qualifiers()); + else vmessage("*** unknown init: '%s'", split[1]); + } else if (split[0] == "io" || split[0] == "ios") { + if (length(split)==1) return emcee_get_ios(); + else if (any(emcee_get_moves() == split[1])) return emcee_get_io(split[1];; __qualifiers()); + else vmessage("*** unknown io: '%s'", split[1]); + } else { + vmessage("*** unknown target: '%s'", s); + } + return NULL; +} + define emcee_new (walkers_per_par, steps) { - variable move = qualifier("move", EMCEE_STRETCH_MOVE); % defined move + variable move = qualifier("move", emcee_get_move("stretch")); % defined move variable urand = qualifier("urand", &rand_uniform); % double random generator variable upick = qualifier("upick", &rand_int); % int random generator - variable init = qualifier("init", &init_parameter_walker_uniform); % initialization function - variable write_hook = qualifier("write_hook", EMCEE_IO_FITS); % output routine + variable init = qualifier("init", emcee_get_init("uniform")); % initialization function + variable write_hook = qualifier("io", emcee_get_io("fits")); % output routine variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); variable sloppy = qualifier("sloppy", 0); variable cont = qualifier("continue", NULL); -- GitLab From d24f06e98a232308c14b195658938364d73a8233 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 9 Apr 2020 20:08:47 +0200 Subject: [PATCH 51/89] Fix objective code bug Uniform init function did not respect object oriented slang syntax and was defined with one missing argument --- src/fitting/ensemble-samplers/emcee-init.sl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index cb265ec7..b0cb80fe 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -43,7 +43,7 @@ private define free_par_sets () { %{{{% Uniform initialization function % pick random parameter values within the boundaries -private define init_parameter_walker_uniform (n) { +private define init_parameter_walker_uniform (init, n) { variable walkers = Array_Type[n]; variable i; variable ind, p, pmin, pmax; -- GitLab From 10c81dd0be7b3b08e0a110b433f879a6a583aa2c Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 9 Apr 2020 20:19:07 +0200 Subject: [PATCH 52/89] Add Emcee init for gaussian sphere Initial walker parameters are drawn around the current parameter set with gaussian distribution. Sigma defaults to 1/10 of the parameter range. --- src/fitting/ensemble-samplers/emcee-init.sl | 40 ++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index b0cb80fe..1b9e8c32 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -61,8 +61,46 @@ private define emcee_init_uniform () { name="uniform init", init=&init_parameter_walker_uniform ); - return struct_combine(struct_combine(defaults, __qualifiers()), settings); + return struct { @defaults, @__qualifiers(), @settings }; +} +%}}}% + +%{{{% Sphere initialization function with exponential decresing probability +% pick random parameters from gauss((x-x0 +private define init_parameter_walker_gauss_sphere (init, n) { + variable walkers = Array_Type[n]; + variable i,j; + variable ind, p, pmin, pmax; + (ind, p, pmin, pmax) = free_par_sets(); + variable num_p = length(ind); + variable sigma = qualifier("sigma", 10); % default to p-pmin = 10 sigma (pmax-p = 10 sigma) if p-pmin>(<)pmax-p + variable relative = qualifier_exists("relative") || + not qualifier_exists("sigma") || + not (typeof(sigma) == Array_Type); % imply relative if sigma is not given or not given as arrays + + variable s_par; + variable s_len = length(sigma); + if (Array_Type == typeof(sigma) && s_len != num_p) + throw UsageError, "Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p; + _for i (0, n-1, 1) { + if (relative) + s_par = _min(p-pmin, pmax-p)/sigma; + else + s_par = sigma; + walkers[i] = rand_gauss(1, num_par)*s_par+p; + } + + return walkers; +} +private define emcee_init_gauss_sphere () { + variable settings = (_NARGS==1) ? () : NULL; + variable defaults = __init_globals(; + name="gauss sphere init", + init=&init_parameter_walker_gauss_sphere + ); + return struct { @defaults, @__qualifier(), @settings }; } %}}}% EMCEE_INIT["uniform"] = &emcee_init_uniform; +EMCEE_INIT["gauss-sphere"] = &emcee_init_gauss_sphere; -- GitLab From 5274874a515ae92bf649282d90f49ead0ca83ae3 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 9 Apr 2020 22:47:51 +0200 Subject: [PATCH 53/89] Fix EMCEE initial chain output The write routine was called after the first collector entry was already replaced. Now we write first if necessary and then update the cycle buffer. Also removed useless symbol s-=1. The write routine did not properly handle a load chain. Now we check if we have load one and if so we do not write initialized walkers. --- src/fitting/ensemble-samplers/emcee.sl | 39 +++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 3c5bb5b2..885fdc92 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -215,7 +215,8 @@ private define emcee_mpi (walker_per_par, number_par, steps) { stat_cycle = Double_Type[collector_len]+_Inf; prev_stat_cycle = Double_Type[collector_len]+_Inf; - _for tmp (0, collector_len-1, 1) + walker_cycle[0] = node_walkers; + _for tmp (1, collector_len-1, 1) walker_cycle[tmp] = Double_Type[npar]; } @@ -239,32 +240,32 @@ private define emcee_mpi (walker_per_par, number_par, steps) { walkers_per_node, handled_walkers); % ... and catch 'em! if (io.all || node == 0) { - if (cont != NULL) { - % if we continue chain, do not write initial walkers - cont = NULL; - s -= 1; - } else { - _for tmp (0, node_walker_array_len-1, 1) { - walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; - update_cycle[tmp+cycle_step*node_walker_array_len] = node_update[tmp]; - stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; - prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; - } - - ifnot (cycle_step) + ifnot (cycle_step) { + if (cont != NULL) { % if we continue do not write the initial step as they will be doubled + io.write(walker_cycle[[1:]], update_cycle[[1:]], stat_cycle[[1:]], prev_stat_cycle[[1:]]); + cont = NULL; + } else io.write(walker_cycle, update_cycle, stat_cycle, prev_stat_cycle); } + + _for tmp (0, node_walker_array_len-1, 1) { + walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; + update_cycle[tmp+cycle_step*node_walker_array_len] = node_update[tmp]; + stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; + prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; + } } } % write missing pieces - variable write_initial = (steps < io.cycle); % if we have not written out any cycle, we have to treat the init walkers special + variable write_initial = (steps < io.cycle) && (cont == NULL); % if we have not written out any cycle, we have to treat the init walkers special + variable skip_load = (cont != NULL); if (io.all || node == 0) { if (cycle_step) - io.write(walker_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], - update_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], - stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]], - prev_stat_cycle[[0:(cycle_step+write_initial)*node_walker_array_len-1]]); + io.write(walker_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], + update_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], + stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], + prev_stat_cycle[[skip:load:(cycle_step+write_initial)*node_walker_array_len-1]]); io.close(steps, node_walker_array_len, walker_per_par, number_par); } -- GitLab From f8be3b582b1589737931c8cb8727af11e79c50c8 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 9 Apr 2020 22:57:56 +0200 Subject: [PATCH 54/89] Fix typos in emcee and emcee-init --- src/fitting/ensemble-samplers/emcee-init.sl | 6 +++--- src/fitting/ensemble-samplers/emcee.sl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index 1b9e8c32..fe17b7dc 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -81,13 +81,13 @@ private define init_parameter_walker_gauss_sphere (init, n) { variable s_par; variable s_len = length(sigma); if (Array_Type == typeof(sigma) && s_len != num_p) - throw UsageError, "Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p; + throw UsageError, sprintf("Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p); _for i (0, n-1, 1) { if (relative) s_par = _min(p-pmin, pmax-p)/sigma; else s_par = sigma; - walkers[i] = rand_gauss(1, num_par)*s_par+p; + walkers[i] = rand_gauss(1, num_p)*s_par+p; } return walkers; @@ -98,7 +98,7 @@ private define emcee_init_gauss_sphere () { name="gauss sphere init", init=&init_parameter_walker_gauss_sphere ); - return struct { @defaults, @__qualifier(), @settings }; + return struct { @defaults, @__qualifiers(), @settings }; } %}}}% diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 885fdc92..957f3b69 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -265,7 +265,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { io.write(walker_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], update_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], - prev_stat_cycle[[skip:load:(cycle_step+write_initial)*node_walker_array_len-1]]); + prev_stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]]); io.close(steps, node_walker_array_len, walker_per_par, number_par); } -- GitLab From 3a4f161060e814660c7e8419a00f4bdd164cc9f3 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 13 Apr 2020 16:46:45 +0200 Subject: [PATCH 55/89] Fix wrong use of array copy Slang has a natural copy mechanism of the form array_a[[0:n]] = array_b[[m:m+n]] and copies the entries for natrual types accordingly. However if array_a is of type Array_Type it sets *all* indices to array_b[[m:m+n]]. We have to copy each element individualy to do it correctly. Also removed a print statement used for debugging. --- src/fitting/ensemble-samplers/emcee-io.sl | 1 - src/fitting/ensemble-samplers/emcee.sl | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index 433206ea..e8fd8671 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -122,7 +122,6 @@ private define write_chain_fits_init (io, filename, total_walkers, create, slopp tab = fits_read_table(io.handle); variable j; variable names = get_struct_field_names(tab); - print(names); variable l = length(names)-2; _for j (0, total_walkers-1, 1) { init_values[j] = Double_Type[l]; diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 957f3b69..e6b21a4f 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -215,9 +215,8 @@ private define emcee_mpi (walker_per_par, number_par, steps) { stat_cycle = Double_Type[collector_len]+_Inf; prev_stat_cycle = Double_Type[collector_len]+_Inf; - walker_cycle[0] = node_walkers; - _for tmp (1, collector_len-1, 1) - walker_cycle[tmp] = Double_Type[npar]; + _for tmp (0, length(node_walkers)-1, 1) + walker_cycle[tmp] = node_walkers[tmp]; } % the main loop where the magic happens -- GitLab From d5af386b4aeccaa52d47473555bafe7425859ca1 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 18 Apr 2020 10:41:52 +0200 Subject: [PATCH 56/89] Optimization of emcee_new Since for most models the function evaluation is the computationally expensive part it is not smart to evaluate it twice per step. Now each step should just evaluate the fit model once except for the very first for each walker chain. --- src/fitting/ensemble-samplers/emcee-moves.sl | 8 +++++--- src/fitting/ensemble-samplers/emcee.sl | 7 +++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl index 81bbd91d..1345337a 100644 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -43,15 +43,17 @@ private define inverse_cdf (u, a) { % stretch move as of Goodman & Weare 2010 % Move must evaluate the fit function -private define stretch_move (move, fit_object, x, x_j, u) { +private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { variable z = inverse_cdf(u[0], move.a); variable xstat = 1e32, ystat = -1e32; % from mikes code variable y; % step proposition variable x_t1 = x; % resulting step variable update = 0; % update indicator - % evaluate fit function for 'x' - xstat = fit_object.eval_statistic(x;nocopy); + if (isinf(prev_stat)) + xstat = fit_object.eval_statistic(x;nocopy); + else + xstat = prev_stat; % calculate the new position (utilize array operations) y = x_j + z*(x-x_j); diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index e6b21a4f..00cddefc 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -156,7 +156,9 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable walkers_per_node; variable handled_walkers; variable sort; - variable new_pos, update_pos, prev_stat, new_stat; + variable new_pos, update_pos, prev_stat, new_stat; % only first step runs eval_statistic twice (if prev_stat[i] == Inf) + % if the fit statistic evaluates to Inf this also happens, but this should not be the case + % for any good statistical function variable tmp; variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); variable node_walker_array_len = (node == 0) ? total_walkers : node_walkers_len; @@ -229,7 +231,8 @@ private define emcee_mpi (walker_per_par, number_par, steps) { move.move(fit_handle, node_walkers[tmp], node_pivots[tmp], - node_randoms[[0:move.nrands-1]+move.nrands*tmp]); + node_randoms[[0:move.nrands-1]+move.nrands*tmp], + node_prev_stat[tmp]); node_walkers[tmp] = new_pos; node_update[tmp] = update_pos; node_prev_stat[tmp] = prev_stat; -- GitLab From 54e4d24c9f1ed614938c84395ea6e06dc0a3769a Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sat, 18 Apr 2020 12:14:18 +0200 Subject: [PATCH 57/89] Fix emcee when called with unconstrained parameter ranges Throw a warning now instead of calculating with infinities --- src/fitting/ensemble-samplers/emcee-init.sl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index fe17b7dc..9a5b7ae1 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -49,7 +49,11 @@ private define init_parameter_walker_uniform (init, n) { variable ind, p, pmin, pmax; (ind, p, pmin, pmax) = free_par_sets(); variable num_p = length(ind); - + + % throw an error on unspecified bounds + if (any(pmin == -DOUBLE_MAX) || any(pmax == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges"; + _for i (0, n-1, 1) walkers[i] = rand_uniform(num_p)*(pmax-pmin)+pmin; @@ -69,7 +73,7 @@ private define emcee_init_uniform () { % pick random parameters from gauss((x-x0 private define init_parameter_walker_gauss_sphere (init, n) { variable walkers = Array_Type[n]; - variable i,j; + variable i,j,w; variable ind, p, pmin, pmax; (ind, p, pmin, pmax) = free_par_sets(); variable num_p = length(ind); @@ -78,6 +82,10 @@ private define init_parameter_walker_gauss_sphere (init, n) { not qualifier_exists("sigma") || not (typeof(sigma) == Array_Type); % imply relative if sigma is not given or not given as arrays + % throw an error on unspecified bounds + if (any(pmin == -DOUBLE_MAX) || any(pmax == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges"; + variable s_par; variable s_len = length(sigma); if (Array_Type == typeof(sigma) && s_len != num_p) @@ -88,6 +96,10 @@ private define init_parameter_walker_gauss_sphere (init, n) { else s_par = sigma; walkers[i] = rand_gauss(1, num_p)*s_par+p; + w = where(walkers[i]pmax); + walkers[i][w] = pmax[w]; } return walkers; -- GitLab From 635b45b5d44b706665c9763ea2719657a9199f6f Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 27 Apr 2020 15:53:55 +0200 Subject: [PATCH 58/89] Fix processor macro When the rcl_mpi routines are not available a brace was missing. --- src/fitting/ensemble-samplers/emcee.sl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 00cddefc..37a98a19 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -74,7 +74,9 @@ private define release_walkers_mpi (node, nodes, total_walkers, } () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 3 } +#endif } +#ifexists rcl_mpi_init } else { _for j (0, length(node_walkers)-1, 1) { () = rcl_mpi_org_recv_double(node_walkers[j], length(node_walkers[j]), 0, 0); -- GitLab From 817a6b116c0c2404c2e716ce46812c54ecd5b312 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 30 Apr 2020 01:51:25 +0200 Subject: [PATCH 59/89] Fix bug when continue chain When continue a chain it was intended that the loaded walkers are not written again to the chain, but only the first walker was skipped. The change in steps was not passed to the closing function causing index error. --- src/fitting/ensemble-samplers/emcee-io.sl | 6 ++++-- src/fitting/ensemble-samplers/emcee.sl | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index e8fd8671..bea154b4 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -188,10 +188,12 @@ private define finalize_chain_fits (io, steps, this_walkers, walker_per_paramete variable min_stat = Double_Type[steps]; variable med_stat = Double_Type[steps]; variable max_stat = Double_Type[steps]; + variable step_stat = reread[1]; + variable step_update = reread[0]; _for j (0, steps-1, 1) { - frac_update[j] = sum(reread[0][[0:this_walkers-1]+j*this_walkers])/this_walkers; - tmp2 = reread[1][[0:this_walkers-1]+j*this_walkers]; + frac_update[j] = sum(step_update[[0:this_walkers-1]+j*this_walkers])/this_walkers; + tmp2 = step_stat[[0:this_walkers-1]+j*this_walkers]; min_stat[j] = min(tmp2); max_stat[j] = max(tmp2); med_stat[j] = median(tmp2); diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 37a98a19..73a48397 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -134,8 +134,11 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable sloppy = qualifier("sloppy", NULL); variable cont = qualifier("continue", NULL); variable init_walkers; - if (cont != NULL) + variable is_cont = 0; + if (cont != NULL) { + is_cont = 1; output = cont; + } #ifexists rcl_mpi_init variable node = rcl_mpi_init(); @@ -246,7 +249,10 @@ private define emcee_mpi (walker_per_par, number_par, steps) { if (io.all || node == 0) { ifnot (cycle_step) { if (cont != NULL) { % if we continue do not write the initial step as they will be doubled - io.write(walker_cycle[[1:]], update_cycle[[1:]], stat_cycle[[1:]], prev_stat_cycle[[1:]]); + io.write(walker_cycle[[node_walker_array_len:]], + update_cycle[[node_walker_array_len:]], + stat_cycle[[node_walker_array_len:]], + prev_stat_cycle[[node_walker_array_len:]]); cont = NULL; } else io.write(walker_cycle, update_cycle, stat_cycle, prev_stat_cycle); @@ -266,12 +272,12 @@ private define emcee_mpi (walker_per_par, number_par, steps) { variable skip_load = (cont != NULL); if (io.all || node == 0) { if (cycle_step) - io.write(walker_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], - update_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], - stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]], - prev_stat_cycle[[skip_load:(cycle_step+write_initial)*node_walker_array_len-1]]); + io.write(walker_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], + update_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], + stat_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], + prev_stat_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]]); - io.close(steps, node_walker_array_len, walker_per_par, number_par); + io.close(steps-is_cont, node_walker_array_len, walker_per_par, number_par); } } -- GitLab From 29ae5975e91dde252df7af2d5dcf674a0cc22834 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 4 May 2020 19:13:40 +0200 Subject: [PATCH 60/89] Emcee overhaul Changed sub function calls, comparable to set_fit_method. Fixed ordering bug of parameters thanks to new isis version. Improvements in function handling. Removed unecessary calculations in loop. --- src/fitting/ensemble-samplers/emcee-init.sl | 93 ++-- src/fitting/ensemble-samplers/emcee-io.sl | 307 ++++++------ src/fitting/ensemble-samplers/emcee-moves.sl | 42 +- src/fitting/ensemble-samplers/emcee.sl | 469 ++++++++++--------- 4 files changed, 455 insertions(+), 456 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl index 9a5b7ae1..0895ceab 100644 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ b/src/fitting/ensemble-samplers/emcee-init.sl @@ -9,110 +9,87 @@ require("rand"); % Distribution is only done by the master process, so we don't have to care % about the random numbers. % -% Init function takes one argument directly which is the number of walkers +% Init function takes an initialized walker array, and the fit handle +% setup function must be called emcee_init_ %{{{% helpers -private define emcee_init_uniform(); -private variable EMCEE_INIT = Assoc_Type[Ref_Type, &emcee_init_uniform]; private define __init_globals () { return struct_combine( struct { name = "unspecified init", - init = NULL, + __f = NULL, }, __qualifiers()); } -define emcee_get_init(key) { return (@EMCEE_INIT[key])(;; __qualifiers); } -define emcee_get_inits () { return assoc_get_keys(EMCEE_INIT); } -% get all free param values, index and min max -private define free_par_sets () { - variable all = get_params(); - variable i, ind = {}, v = {}, mi = {}, ma = {}; - _for i (0, length(all)-1, 1) { - ifnot (all[i].freeze==0 && all[i].tie==NULL && all[i].fun==NULL) - continue; - list_append(ind, all[i].index); - list_append(v, all[i].value); - list_append(mi, all[i].min); - list_append(ma, all[i].max); - } - - return list_to_array(ind), list_to_array(v), list_to_array(mi), list_to_array(ma); -} %}}}% %{{{% Uniform initialization function % pick random parameter values within the boundaries -private define init_parameter_walker_uniform (init, n) { - variable walkers = Array_Type[n]; +private define init_parameter_walker_uniform (init, walkers, fit_handle) { variable i; - variable ind, p, pmin, pmax; - (ind, p, pmin, pmax) = free_par_sets(); - variable num_p = length(ind); + variable par = __parameters(fit_handle.object); + variable num_p = length(par.value); % throw an error on unspecified bounds - if (any(pmin == -DOUBLE_MAX) || any(pmax == DOUBLE_MAX)) + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) throw UsageError, "Some parameters have unspecified parameter ranges"; - _for i (0, n-1, 1) - walkers[i] = rand_uniform(num_p)*(pmax-pmin)+pmin; - - return walkers; + _for i (0, length(walkers)-1, 1) + walkers[i] = rand_uniform(num_p)*(par.max-par.min)+par.min; } -private define emcee_init_uniform () { - variable settings = (_NARGS==1) ? () : NULL; +public define emcee_init_uniform () { + if (qualifier_exists("help")) { + help("emcee_init_uniform"); + return NULL; + } + variable defaults = __init_globals(; name="uniform init", - init=&init_parameter_walker_uniform ); - return struct { @defaults, @__qualifiers(), @settings }; + return struct { @defaults, @__qualifiers(), __f=&init_parameter_walker_uniform }; } %}}}% %{{{% Sphere initialization function with exponential decresing probability % pick random parameters from gauss((x-x0 -private define init_parameter_walker_gauss_sphere (init, n) { - variable walkers = Array_Type[n]; +private define init_parameter_walker_gauss_sphere (init, walkers, fit_handle) { variable i,j,w; - variable ind, p, pmin, pmax; - (ind, p, pmin, pmax) = free_par_sets(); - variable num_p = length(ind); + variable par = __parameters(fit_handle.object); + variable num_p = length(par.value); variable sigma = qualifier("sigma", 10); % default to p-pmin = 10 sigma (pmax-p = 10 sigma) if p-pmin>(<)pmax-p variable relative = qualifier_exists("relative") || not qualifier_exists("sigma") || not (typeof(sigma) == Array_Type); % imply relative if sigma is not given or not given as arrays % throw an error on unspecified bounds - if (any(pmin == -DOUBLE_MAX) || any(pmax == DOUBLE_MAX)) + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) throw UsageError, "Some parameters have unspecified parameter ranges"; variable s_par; variable s_len = length(sigma); if (Array_Type == typeof(sigma) && s_len != num_p) throw UsageError, sprintf("Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p); - _for i (0, n-1, 1) { + _for i (0, length(walkers)-1, 1) { if (relative) - s_par = _min(p-pmin, pmax-p)/sigma; + s_par = _min(par.value-par.min, par.max-par.value)/sigma; else s_par = sigma; - walkers[i] = rand_gauss(1, num_p)*s_par+p; - w = where(walkers[i]pmax); - walkers[i][w] = pmax[w]; + walkers[i] = rand_gauss(1, num_p)*s_par+par.value; + w = where(walkers[i]par.max); + walkers[i][w] = par.max[w]; } - - return walkers; } -private define emcee_init_gauss_sphere () { - variable settings = (_NARGS==1) ? () : NULL; +public define emcee_init_gauss () { + if (qualifier_exists("help")) { + help("emcee_init_gauss"); + return NULL; + } + variable defaults = __init_globals(; - name="gauss sphere init", - init=&init_parameter_walker_gauss_sphere + name="gauss init", ); - return struct { @defaults, @__qualifiers(), @settings }; + return struct { @defaults, @__qualifiers(), __f=&init_parameter_walker_gauss_sphere }; } %}}}% - -EMCEE_INIT["uniform"] = &emcee_init_uniform; -EMCEE_INIT["gauss-sphere"] = &emcee_init_gauss_sphere; diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index bea154b4..684241bf 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -5,154 +5,185 @@ % here. %{{{% helpers -private define emcee_io_fits(); -private variable EMCEE_IO = Assoc_Type[Ref_Type, &emcee_io_fits]; -define emcee_get_io (key) { return (@EMCEE_IO[key])(;; __qualifiers); } -define emcee_get_ios () { return assoc_get_keys(EMCEE_IO); } private define __io_globals () { - return struct_combine(struct { + return struct { name = "unspecified io", - open = NULL, % open function, takes 'io-object', 'filename', 'number walkers', 'create flag', 'sloppy flag' - write = NULL, % write to file, takes 'io-object', 'walkers array', 'update array', 'statistic array', 'pre statistisc array' - close = NULL, % finalizes output, takes 'io-object', 'current number steps', 'number walkers', 'walker per parameter', 'number parameter' + __f_create = NULL, % create function, takes 'io-object', 'filename', 'fit_handle', 'total walkers' + __f_open = NULL, % open function, takes 'io-object', 'filename', 'initialized walkers'. + __f_read = NULL, % same as open but is used for seting the walkers, requires less consitency + __f_write = NULL, % write to file, takes 'io-object', 'walkers array', 'update array', 'statistic array' + __f_finalize = NULL, % finalizes output, takes 'io-object', 'current number steps', 'walker per parameter', 'number parameter' + __f_close = NULL, % close any handles if necessary, takes 'io-object' handle = NULL, % io access (usually file pointer) - all = 0, % flag indicating if all nodes execute io or just master - msg = NULL, % current io message (used only for feedback) cycle = 1, % number of steps to perform before write - }, __qualifiers()); + @__qualifiers() }; } %}}}% + %{{{% FITS input output routines -private define write_chain_fits_init (io, filename, total_walkers, create, sloppy) { +private define emcee_init_chain_fits (io, filename, fit_handle, total_walkers) { % write ensemble evolution to fits file -#ifexists rcl_mpi_init - variable modified_name = (io.all) ? sprintf("%s_%d%s", path_sans_extname(filename), rcl_mpi_rank(), path_extname(filename)) : filename; -#else - variable modified_name = filename; -#endif variable init_values; variable data_info; variable par_names; variable i; list_data(&data_info); - variable freep = freeParameters(); - if (create) { % create the file(s) initially - io.handle = fits_open_file(modified_name, "c"); - - % write first table - par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[freep-1]; - fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), - ["FREE_PAR", "FREE_PAR_NAME"], - ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], - [" parameter indices", " parameter names"]); - fits_update_key(io.handle, "MODEL", get_fit_fun(), ""); - fits_update_key(io.handle, "SLOPPY", sloppy, " sloppy level"); - array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, freep)) - throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) - throw IOError; - - % write second table - fits_create_binary_table(io.handle, "MCMCCHAIN", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", freep)], - ["D", "J", ["D"][freep*0]], - [" fit statistics", " update indicator", [" parameter values"][freep*0]]); - fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); - fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); - fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); - - % write third table - fits_create_binary_table(io.handle, "CHAINSTATS", 0, - ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], - [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); - fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); - - init_values = NULL; - } else { % if loading, do some sanity checks - io.handle = fits_open_file(modified_name+"[PARAMETERS]", "w"); - - if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current model and loaded chain model differ, unable to continue chain"; - return NULL; - } - - variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab, "free_par")) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != freeParameters())) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current model and chain model have different free parameters"; - } - - variable fsloppy = fits_read_key(io.handle, "SLOPPY"); - sloppy = (fsloppy > sloppy) ? fsloppy : sloppy; % largest sloppyness - fits_update_key(io.handle, "SLOPPY", sloppy); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - - if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (sloppy<1)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; - return NULL; - } - fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - io.msg = "Not a mcmc chain file"; - return NULL; - } - - init_values = Array_Type[total_walkers]; - tab = fits_read_table(io.handle); - variable j; - variable names = get_struct_field_names(tab); - variable l = length(names)-2; - _for j (0, total_walkers-1, 1) { - init_values[j] = Double_Type[l]; - _for i (0, l-1, 1) - init_values[j][i] = get_struct_field(tab, names[i+2])[-total_walkers+j]; - } + variable par = __parameters(fit_handle.object); + io.handle = fits_open_file(filename, "c"); + + % write first table + par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[par.index-1]; + fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), + ["FREE_PAR", "FREE_PAR_NAME"], + ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], + [" parameter indices", " parameter names"]); + fits_update_key(io.handle, "MODEL", get_fit_fun(), "model function"); + fits_update_key(io.handle, "SLOPPY", 0, " sloppy level"); + array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, par.index[array_sort(par.index)])) % sort here, so at least they are in index order + throw IOError; + if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) + throw IOError; + + % write second table + fits_create_binary_table(io.handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], + ["D", "J", ["D"][par.index*0]], + [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); + fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); + fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); + fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); + + % write third table + fits_create_binary_table(io.handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + + % move back to chain table + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); + + io.num_steps = 0; + io.sloppy = 0; + + () = _fits_get_rowsize(io.handle, &(io.cycle)); + io.cycle = io.cycle/total_walkers; + if (io.cycle < 1) + io.cycle = 1; +} + +private define emcee_open_chain_fits (io, filename, fit_handle, walkers) { + io.handle = fits_open_file(filename+"[PARAMETERS]", "w"); + + if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { + fits_close_file(io.handle); + io.handle = NULL; + throw IsisError, "Current model and chain model do not match"; + } + + variable tab = fits_read_table(io.handle); + ifnot (struct_field_exists(tab, "free_par")) { + fits_close_file(io.handle); + io.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + variable par = __parameters(fit_handle.object); + if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != par.index[array_sort(par.index)])) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, "Free parameters and chain parameters differ"; + } + + variable fsloppy = fits_read_key(io.handle, "SLOPPY"); + io.sloppy = (fsloppy > io.sloppy) ? fsloppy : io.sloppy; % largest sloppyness + fits_update_key(io.handle, "SLOPPY", io.sloppy); + + if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { + fits_close_file(io.handle); + io.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (io.sloppy<1)) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; } + fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); - % IMPORTANT: stay on mcmcchain table if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { fits_close_file(io.handle); io.handle = NULL; - io.msg = "No a mcmc chain file"; - return NULL; + throw IOError, "Not a emcee chain file"; + } + + variable file_nw = fits_read_key(io.handle, "NWALKERS")*fits_read_key(io.handle, "NFREEPAR"); + if (file_nw != length(walkers)) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, sprintf("Unable to continue chain with %d walkers with chain with %d walkers", file_nw, length(walkers)); } - io.storage = fits_get_num_rows(io.handle); + tab = fits_read_table(io.handle); + variable i,j; + variable names = get_struct_field_names(tab); + variable l = length(names)-2; + _for j (0, length(walkers)-1, 1) + _for i (0, l-1, 1) + walkers[j][i] = get_struct_field(tab, names[i+2])[-length(walkers)+j]; () = _fits_get_rowsize(io.handle, &(io.cycle)); - io.cycle = io.cycle/total_walkers; + io.cycle = io.cycle/length(walkers); if (io.cycle < 1) io.cycle = 1; - io.msg = NULL; - return init_values; + io.num_steps = fits_get_num_rows(io.handle); } -private define write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, prev_stat_cycle) { +private define emcee_read_chain_fits (io, filename, fit_handle, walkers) { + io.handle = fits_open_file(filename+"[PARAMETERS]", "r"); + + if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { + fits_close_file(io.handle); + io.handle = NULL; + throw IsisError, "Current model and chain model do not match"; + } + + variable tab = fits_read_table(io.handle); + ifnot (struct_field_exists(tab, "free_par")) { + fits_close_file(io.handle); + io.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + variable par = __parameters(fit_handle.object); + if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != par.index[array_sort(par.index)])) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, "Free parameters and chain parameters differ"; + } + + variable file_nw = fits_read_key(io.handle, "NWALKERS")*fits_read_key(io.handle, "NFREEPAR"); + if (file_nw != length(walkers)) { + fits_close_file(io.handle); + io.handle = NULL; + throw UsageError, sprintf("Unable to set chain with %d walkers from file with %d walkers", file_nw, length(walkers)); + } + + tab = fits_read_table(io.handle); + variable i,j; + variable names = get_struct_field_names(tab); + variable l = length(names)-2; + _for j (0, length(walkers)-1, 1) + _for i (0, l-1, 1) + walkers[j][i] = get_struct_field(tab, names[i+2])[-length(walkers)+j]; +} + +private define emcee_write_chain_fits (io, fit_handle, walkers_cycle, update_cycle, stat_cycle) { variable tmp, update; - variable freep = freeParameters(); - variable npar = length(freep); + variable par = __parameters(fit_handle.object); + variable npar = length(par.index); variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle variable i,j; variable collen = fits_get_num_rows(io.handle); @@ -160,15 +191,13 @@ private define write_chain_fits (io, walkers_cycle, update_cycle, stat_cycle, pr tmp = Double_Type[steps_walkers]; _for i (0, steps_walkers-1, 1) tmp[i] = walkers_cycle[i][j]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", freep[j])), collen+1, 1, tmp); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", par.index[j])), collen+1, 1, tmp); } () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); - tmp = Int_Type[steps_walkers]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); } -private define finalize_chain_fits (io, steps, this_walkers, walker_per_parameter, number_parameter) { +private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter, fit_handle) { variable tmp, tmp2; variable reread; variable total_walkers = walker_per_parameter*number_parameter; @@ -180,7 +209,7 @@ private define finalize_chain_fits (io, steps, this_walkers, walker_per_paramete fits_update_key(io.handle, "NFREEPAR", number_parameter); () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], - io.storage+1, collen-io.storage, &reread); + io.num_steps+1, collen-io.num_steps, &reread); () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); collen = fits_get_num_rows(io.handle); % read length of chain summary variable j; @@ -192,8 +221,8 @@ private define finalize_chain_fits (io, steps, this_walkers, walker_per_paramete variable step_update = reread[0]; _for j (0, steps-1, 1) { - frac_update[j] = sum(step_update[[0:this_walkers-1]+j*this_walkers])/this_walkers; - tmp2 = step_stat[[0:this_walkers-1]+j*this_walkers]; + frac_update[j] = sum(step_update[[0:total_walkers-1]+j*total_walkers])/total_walkers; + tmp2 = step_stat[[0:total_walkers-1]+j*total_walkers]; min_stat[j] = min(tmp2); max_stat[j] = max(tmp2); med_stat[j] = median(tmp2); @@ -202,26 +231,28 @@ private define finalize_chain_fits (io, steps, this_walkers, walker_per_paramete () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); +} +private define emcee_close_chain_fits (io) { fits_close_file(io.handle); io.handle = NULL; - io.storage = NULL; - io.msg = NULL; } -private define emcee_io_fits () { +public define emcee_io_fits () { variable settings = (_NARGS==1) ? () : NULL; variable defaults = __io_globals(; name = "io fits", - open = &write_chain_fits_init, - write = &write_chain_fits, - close = &finalize_chain_fits, - storage = NULL, + num_steps = NULL, + sloppy = 0, ); - return struct_combine(struct_combine(defaults, __qualifiers()), settings); + return struct { @defaults, @__qualifiers(), + __f_create=&emcee_init_chain_fits, + __f_open=&emcee_open_chain_fits, + __f_write=&emcee_write_chain_fits, + __f_read=&emcee_read_chain_fits, + __f_finalize=&emcee_finalize_chain_fits, + __f_close=&emcee_close_chain_fits, + }; } %}}}% - -% Provide io functions -EMCEE_IO["fits"] = &emcee_io_fits; diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl index 1345337a..c1a933f6 100644 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -3,20 +3,16 @@ % DEFINED MOVES FOR THE EMCEE ENSEMBLE WALKER % Walker moves should be defined as functions where the function returns a % structure suitable to describe the step algorithm. For adjustment the -% function should combine the qualifiers with the default structure and -% should handle an argument that may be NULL or a structure with the same -% properties, where the argument should overwrite any qualifiers given to -% the function. The move function MUST evaluate the fit model (in the -% simplest case by using the fit object) and return new position and -% statistics. +% function should combine the qualifiers with the default structure. The +% move function MUST evaluate the fit model by using the fit handle and +% return new position and statistics. % % Besides additional arguments given in the structure the algorithm must % process the fit object, current position, pivot position and an array of % random numbers. % -% Functions should be private and only accessible through the access function -% 'get_emcee' or 'get_emcee_move'. -% Required default parameters can be retrieved with '__move_globals'. +% For constructing the structure the __move_globals function is convenient. +% The constructur function must be names emcee_move_. % % For an example see the STRETCH_MOVE %{{{% Helpers for move steps @@ -27,10 +23,6 @@ private define __move_globals () { nrands = 0, % number of required random numbers for each step }, __qualifiers); } -private define emcee_move_stretch(); % for default value -private variable EMCEE_MOVES = Assoc_Type[Ref_Type, &emcee_move_stretch]; -define emcee_get_move (key) { return (@EMCEE_MOVES[key])(;;__qualifiers); } -define emcee_get_moves () { return assoc_get_keys(EMCEE_MOVES); } %}}}% %{{{% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY @@ -45,47 +37,37 @@ private define inverse_cdf (u, a) { % Move must evaluate the fit function private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { variable z = inverse_cdf(u[0], move.a); - variable xstat = 1e32, ystat = -1e32; % from mikes code + variable ystat = -1e32; % from mikes code variable y; % step proposition variable x_t1 = x; % resulting step variable update = 0; % update indicator - if (isinf(prev_stat)) - xstat = fit_object.eval_statistic(x;nocopy); - else - xstat = prev_stat; - % calculate the new position (utilize array operations) y = x_j + z*(x-x_j); - % try evaluating, if out of bounds, does nothing + % try evaluating, if out of bounds, does nothing TODO: This is biasing the result, check how to do this correct try { % evaluate fit function for 'y' ystat = fit_object.eval_statistic(y;nocopy); % caluculate if we accept the step based on the statistics of the % model. We assume that the statistic is given as -2 log likelihood - if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(xstat-ystat)/2.)) { + if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(prev_stat-ystat)/2.)) { x_t1 = y; update = 1; } } catch IsisError; - % return new walker position, proposed position, xstat, ystat - return (x_t1, update, xstat, ystat); + % return new walker position, update, new statistic + return (x_t1, update, ystat); } -private define emcee_move_stretch () { - variable settings = (_NARGS==1)? () : NULL; +public define emcee_move_stretch () { variable defaults = __move_globals(; name="stretch move", - move=&stretch_move, nrands=2, a=2. % move scaling ); - return struct_combine(struct_combine(defaults, __qualifiers), settings); % combine settings + return struct { @defaults, @__qualifiers(), __f=&stretch_move }; % combine settings } %}}}% - -% Provide moves: -EMCEE_MOVES["stretch"] = &emcee_move_stretch; diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 73a48397..b6c6bcf1 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -14,7 +14,7 @@ private define distribute_walkers (nodes, number_walkers) { return walkers_per_node; } -% get walkers per handled up to this node +% get walkers handled up to this node private define previous_number_walkers (walkers_per_node) { variable l = length(walkers_per_node); variable handled_walkers = Int_Type[l]; @@ -22,295 +22,304 @@ private define previous_number_walkers (walkers_per_node) { _for i (0, l-1, 1) { handled_walkers[i] = c; - if (i 2) + throw UsageError, sprintf("Failed parsing option '%s'", exec_string); + + variable fname = strtrim(s[0]); + variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); + if (NULL == f) + throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); + + variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; + variable call = (@f)(;; opt); + + if (typeof(call) != Struct_Type) + throw UsageError, sprintf("Function '%s' not returning valid type"); + + return call; +} + +private define setup_node (node, num_nodes, total_walkers, nrands) { + variable walkers_per_node = distribute_walkers(num_nodes, total_walkers); + variable handled_walkers = previous_number_walkers(walkers_per_node); + variable set1 = total_walkers >> 1; + variable j; + variable N = struct { + walkers, % current walker position + pivots, % current pivot positions + randoms, % required randoms + update, % updater track + stat, % step statistic + total_walkers, % number of totals walkers + walkers_per_node = walkers_per_node, % number of walkers handled by each node + handled_walkers = handled_walkers, % handled_walkers[i] = sum(walkers_per_node[[0:i-1]]) + set1 = set1, % size of set 1 + set2 = total_walkers - set1, % size of set 2 + fit = open_fit(), % the fit object, will fail if no model is loaded + num_pars = num_free_params(), % number of fit parameters + num_rands = nrands, % number of random numbers per walker + }; + if (node) { % setup for slaves + N.walkers = Array_Type[walkers_per_node[node]]; + N.pivots = Array_Type[walkers_per_node[node]]; + N.update = Int_Type[walkers_per_node[node]]; + N.stat = Double_Type[walkers_per_node[node]]; + N.randoms = Double_Type[walkers_per_node[node]*nrands]; + } else { % setup for master + N.walkers = Array_Type[total_walkers]; + N.pivots = Array_Type[total_walkers]; + N.update = Int_Type[total_walkers]; + N.stat = Double_Type[total_walkers]; + N.randoms = Double_Type[total_walkers*nrands]; + } + + _for j (0, length(N.walkers)-1, 1) { + N.walkers[j] = Double_Type[N.num_pars]; + N.pivots[j] = Double_Type[N.num_pars]; + } + + return N; +} + %}}}% %{{{% mpi functions -private define release_walkers_mpi (node, nodes, total_walkers, - nrand, walkers_per_node, handled_walkers, % <--- those are only relevant for master process - node_walkers_ref, node_pivots_ref, node_randoms_ref) { - variable node_walkers = @(node_walkers_ref); - variable node_pivots = @(node_pivots_ref); - variable node_randoms = @(node_randoms_ref); +private define release_walkers_mpi_master (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable urand = qualifier("urand", NULL); + if (NULL == urand) + throw InternalError, "No random generator given"; + variable upick = qualifier("upick", NULL); + if (NULL == upick) + throw InternalError, "No random generator given"; + +% variable set2_len = total_walkers>>1; % number of walkers in set 2 + % variable set1_len = total_walkers-set2_len; % number of walkers in set 1 + + variable all_pick1 = (@upick)(0, N.set2-1, N.set1)+N.set1; % pick pivot *for* set 1 + variable all_pick2 = (@upick)(0, N.set1-1, N.set2); % pick pivot *for* set 2 + variable pick = [all_pick1, all_pick2]; + variable this_walkers, this_pivots, this_randoms; variable i,j; - if (node == 0) { % master - variable urand = qualifier("urand", NULL); - if (NULL == urand) - throw InternalError, "No random generator given"; - variable upick = qualifier("upick", NULL); - if (NULL == upick) - throw InternalError, "No random generator given"; - - variable set2_len = total_walkers>>1; % number of walkers in set 2 - variable set1_len = total_walkers-set2_len; % number of walkers in set 1 - variable all_u = (@urand)(total_walkers*nrand); % all random numbers for the next step - variable all_pick1 = (@upick)(0, set2_len-1, set1_len); % pick for set 1 - variable all_pick2 = (@upick)(0, set1_len-1, set2_len); % pick for set 2 - variable this_walkers, this_pivots, this_randoms; - - _for i (0, nodes-1, 1) { - this_walkers = node_walkers[[0:walkers_per_node[i]-1]+handled_walkers[i]]; - this_pivots = node_walkers[[all_pick1+set1_len, all_pick2]][[0:walkers_per_node[i]-1]+handled_walkers[i]]; - this_randoms = all_u[[0:walkers_per_node[i]*nrand-1]+handled_walkers[i]*nrand]; - if (i == 0) { % set master locals, we utilize that the walkers the master handles are the first in the array - _for j (0, walkers_per_node[i]-1, 1) { - node_randoms[j] = this_randoms[j]; - node_pivots[j] = this_pivots[j]; - } - } -#ifexists rcl_mpi_init - else { - _for j (0, length(this_walkers)-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j], length(this_walkers[j]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j], length(this_walkers[j]), i, 1); % send pivots from other set with tag 1 - } - () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 3 - } -#endif + _for i (0, length(N.walkers)-1, 1) + N.pivots[i] = N.walkers[pick[i]]; % get the pivot points + N.randoms[*] = (@urand)(length(N.walkers)*N.num_rands); % get new random numbers + + _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data + % set the walkers for node i + this_walkers = N.walkers[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]]; + % pick the pivots for node i + this_pivots = N.pivots[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]]; + % set the randoms for node i + this_randoms = N.randoms[[0:N.walkers_per_node[i]*N.num_rands-1]+N.handled_walkers[i]*N.num_rands]; + _for j (0, length(this_walkers)-1, 1) { + () = rcl_mpi_org_isend_double(this_walkers[j], length(this_walkers[j]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j], length(this_walkers[j]), i, 1); % send pivots from other set with tag 1 } -#ifexists rcl_mpi_init - } else { - _for j (0, length(node_walkers)-1, 1) { - () = rcl_mpi_org_recv_double(node_walkers[j], length(node_walkers[j]), 0, 0); - () = rcl_mpi_org_recv_double(node_pivots[j], length(node_pivots[j]), 0, 1); - } - () = rcl_mpi_org_recv_double(node_randoms, length(node_randoms), 0, 2); + () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 2 + } #endif +} + +private define release_walkers_mpi_slave (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable j; + _for j (0, length(N.walkers)-1, 1) { + () = rcl_mpi_org_recv_double(N.walkers[j], length(N.walkers[j]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(N.pivots[j], length(N.pivots[j]), 0, 1); % receive pivot points (tag 1) } + () = rcl_mpi_org_recv_double(N.randoms, length(N.randoms), 0, 2); % receive random numbers (tag 2) +#endif } -private define catch_walkers_mpi (node, nodes, walkers_ref, update_ref, stat_ref, prev_stat_ref, - walkers_per_node, handled_walkers) { % <--- only relevant for master - variable walkers = @walkers_ref; - variable update = @update_ref; - variable stat = @stat_ref; - variable prev_stat = @prev_stat_ref; - variable i,j; - variable npar = length(walkers[0]); % walkers are all equal +private define release_walkers_mpi (node, num_nodes, N) { + % distribute walkers, pivots and random numbers - variable stat_part; - variable update_part; + if (node) + release_walkers_mpi_slave(node, num_nodes, N); + else + release_walkers_mpi_master(node, num_nodes, N;; __qualifiers()); +} +private define catch_walkers_mpi_master (node, num_nodes, N) +{ #ifexists rcl_mpi_init - if (node == 0) { % master, collect all walkers - _for i (1, nodes-1, 1) { - stat_part = Double_Type[walkers_per_node[i]]; - update_part = Int_Type[walkers_per_node[i]]; - _for j (0, walkers_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(walkers[handled_walkers[i]+j], npar, i, i); - () = rcl_mpi_org_recv_int(update_part, walkers_per_node[i], i, i); - update[[0:walkers_per_node[i]-1]+handled_walkers[i]] = update_part; - () = rcl_mpi_org_recv_double(stat_part, walkers_per_node[i], i, i); - stat[[0:walkers_per_node[i]-1]+handled_walkers[i]] = stat_part; - () = rcl_mpi_org_recv_double(stat_part, walkers_per_node[i], i, i); - prev_stat[[0:walkers_per_node[i]-1]+handled_walkers[i]] = stat_part; - } - } else { - _for j (0, length(walkers)-1, 1) - () = rcl_mpi_org_isend_double(walkers[j], npar, 0, node); - () = rcl_mpi_org_isend_int(update, length(update), 0, node); - () = rcl_mpi_org_isend_double(stat, length(stat), 0, node); - () = rcl_mpi_org_isend_double(prev_stat, length(prev_stat), 0, node); + variable i,j; + variable this_stat, this_update; % we have to use intermediate storage, slang creates a copy of an array when addressed by index + _for i (1, num_nodes-1, 1) { + this_stat = Double_Type[N.walkers_per_node[i]]; + this_update = Int_Type[N.walkers_per_node[i]]; + _for j (0, N.walkers_per_node[i]-1, 1) + () = rcl_mpi_org_recv_double(N.walkers[N.handled_walkers[i]+j], length(N.walkers[0]), i, i); + () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); + () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); + N.update[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]] = this_update; + N.stat[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]] = this_stat; } #endif } +private define catch_walkers_mpi_slave (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable i; + _for i (0, length(N.walkers)-1, 1) + () = rcl_mpi_org_isend_double(N.walkers[i], length(N.walkers[0]), 0, node); + () = rcl_mpi_org_isend_int(N.update, length(N.update), 0, node); + () = rcl_mpi_org_isend_double(N.stat, length(N.stat), 0, node); +#endif +} + +private define catch_walkers_mpi (node, num_nodes, N) +{ + if (node) + catch_walkers_mpi_slave(node, num_nodes, N); + else + catch_walkers_mpi_master(node, num_nodes, N); +} + private define emcee_mpi (walker_per_par, number_par, steps) { variable total_walkers = walker_per_par*number_par; variable init = qualifier("init", NULL); variable move = qualifier("move", NULL); variable urand = qualifier("urand", NULL); variable upick = qualifier("upick", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write_hook", NULL); - variable sloppy = qualifier("sloppy", NULL); variable cont = qualifier("continue", NULL); - variable init_walkers; - variable is_cont = 0; - if (cont != NULL) { - is_cont = 1; - output = cont; - } + variable output = qualifier("output", NULL); + variable io = qualifier("write", NULL); + variable load_hook = qualifier("read", NULL); + variable load = qualifier("load", NULL); + + variable node, num_nodes; #ifexists rcl_mpi_init - variable node = rcl_mpi_init(); - variable nodes = rcl_mpi_numtasks(); - rcl_init_mpi_request(nodes); % this is a stupid memory leak!!!!!!!!! BUT: since we have a memory leak from the mpi module anyway we accept it currently ... + node = rcl_mpi_init(); + num_nodes = rcl_mpi_numtasks(); + num_nodes = (num_nodes<1) ? 1 : num_nodes; + rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! BUT: since we have a memory leak from the mpi module anyway we accept it currently ... #else - variable node = 0; - variable nodes = 1; + node = 0; + num_nodes = 1; #endif - if (NULL == init || NULL == move || NULL == urand || NULL == upick) - throw InternalError, "Initialization failed"; - - if (io.all || node == 0) { - init_walkers = io.open(output, total_walkers, cont == NULL, sloppy); - if (NULL == io.handle) - throw IOError, (NULL == io.msg) ? "" : io.msg; - } - % master only variables - variable walkers_per_node; - variable handled_walkers; - variable sort; - variable new_pos, update_pos, prev_stat, new_stat; % only first step runs eval_statistic twice (if prev_stat[i] == Inf) - % if the fit statistic evaluates to Inf this also happens, but this should not be the case - % for any good statistical function - variable tmp; - variable node_walkers_len = total_walkers/nodes + ((total_walkers mod nodes) > node ? 1 : 0); - variable node_walker_array_len = (node == 0) ? total_walkers : node_walkers_len; - variable npar = num_free_params(); - - variable node_walkers; % send & recv (differ between master and slaves) - variable node_update; % recv (differ between master and slaves) - variable node_pivots = Array_Type[node_walkers_len]; % send (all the same) - variable node_randoms = Double_Type[node_walkers_len*move.nrands]; % send (all the same) - variable node_stat; % recv (differ between master and slaves) - variable node_prev_stat; % recv (differ between master and slaves) - - variable s; - variable fit_handle = open_fit(); % if no model is loaded this will crash - variable collector_len = 0; - if (io.all || node == 0) - collector_len = node_walker_array_len*io.cycle; - variable walker_cycle; % enough to collect the walkers for one cycle - variable update_cycle; - variable stat_cycle; - variable prev_stat_cycle; - variable cycle_step = 0; + move = emcee_call_setup_fun(move, "move"); + variable this = setup_node(node, num_nodes, total_walkers, move.nrands); - _for tmp (0, node_walkers_len-1, 1) - node_pivots[tmp] = Double_Type[npar]; + % read the settings, if one of them is NULL help was called + io = emcee_call_setup_fun(io, "io"); + load_hook = emcee_call_setup_fun(load_hook, "io"); + init = emcee_call_setup_fun(init, "init"); + if (NULL == io || NULL == load_hook || NULL == init) return; - if (node == 0) { % setup master things + ifnot (node) { % master only if (cont != NULL) - node_walkers = init_walkers; - else - node_walkers = init.init(total_walkers); % initialize the walkers & other collectors - node_update = Int_Type[total_walkers]; - node_stat = Double_Type[total_walkers]+_Inf; - node_prev_stat = Double_Type[total_walkers]+_Inf; - - sort = array_sort(rand_uniform(total_walkers)); - node_walkers = node_walkers[sort]; % randomize them to be on the safe side, init may introduce bias - walkers_per_node = distribute_walkers(nodes, total_walkers); - handled_walkers = previous_number_walkers(walkers_per_node); - } else { - walkers_per_node = NULL; - handled_walkers = NULL; - - node_walkers = Array_Type[node_walkers_len]; - node_update = Int_Type[node_walkers_len]; - node_stat = Double_Type[node_walkers_len]+_Inf; - node_prev_stat = Double_Type[node_walkers_len]+_Inf; - - _for tmp (0, node_walkers_len-1, 1) - node_walkers[tmp] = Double_Type[npar]; + io.__f_open(cont, this.walkers); + else if (load != NULL) { + load_hook.__f_read(load, this.walkers); + load_hook.__f_close(); + io.__f_create(output, this.fit, total_walkers); + } else { + io.__f_create(output, this.fit, total_walkers); + init.__f(this.walkers, this.fit); + } } - if (io.all || node==0) { % setup collector and write initial - walker_cycle = Array_Type[collector_len]; % enough to collect the walkers for one cycle - update_cycle = Int_Type[collector_len]; - stat_cycle = Double_Type[collector_len]+_Inf; - prev_stat_cycle = Double_Type[collector_len]+_Inf; + variable collector_length; % the collector so we can skip turns before writing to disk + variable walker_cycle; + variable update_cycle; + variable stat_cycle; + variable cycle_step = 0; - _for tmp (0, length(node_walkers)-1, 1) - walker_cycle[tmp] = node_walkers[tmp]; + % setup space + variable j; + if (node) % slave + collector_length = 0; + else % master + collector_length = length(this.walkers)*io.cycle; + + walker_cycle = Array_Type[collector_length]; + update_cycle = Int_Type[collector_length]; + stat_cycle = Double_Type[collector_length]; + + if (NULL == urand || NULL == upick) + throw InternalError, "Missing random number generator"; + + % evaluate the model at the walker positions to get the statistics + % and if not continuing a chain, write them out + _for j (0, length(this.walkers)-1, 1) { + this.stat[j] = this.fit.eval_statistic(this.walkers[j]); + this.update[j] = 1; } + if ((cont == NULL) && (node == 0)) % only master is writing the initials + io.__f_write(this.fit, this.walkers, this.update, this.stat); % the main loop where the magic happens + variable s, walker, update, stat; _for s (1, steps, 1) { cycle_step = s mod io.cycle; - release_walkers_mpi(node, nodes, total_walkers, move.nrands, walkers_per_node, handled_walkers, % release walkers to freedom ... - &(node_walkers), &(node_pivots), &(node_randoms); upick=upick, urand=urand); - _for tmp (0, node_walkers_len-1, 1) { % ... let them move ... - (new_pos, update_pos, prev_stat, new_stat) = - move.move(fit_handle, - node_walkers[tmp], - node_pivots[tmp], - node_randoms[[0:move.nrands-1]+move.nrands*tmp], - node_prev_stat[tmp]); - node_walkers[tmp] = new_pos; - node_update[tmp] = update_pos; - node_prev_stat[tmp] = prev_stat; - node_stat[tmp] = new_stat; + release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... + _for j (0, this.walkers_per_node[node]-1, 1) { % ... let them move ... + (walker, update, stat) = move.__f(this.fit, this.walkers[j], this.pivots[j], + this.randoms[[0:this.num_rands-1]+j*this.num_rands], this.stat[j]); + this.walkers[j] = walker; + this.update[j] = update; + this.stat[j] = stat; } - catch_walkers_mpi(node, nodes, &node_walkers, &node_update, &node_stat, &node_prev_stat, - walkers_per_node, handled_walkers); % ... and catch 'em! - - if (io.all || node == 0) { - ifnot (cycle_step) { - if (cont != NULL) { % if we continue do not write the initial step as they will be doubled - io.write(walker_cycle[[node_walker_array_len:]], - update_cycle[[node_walker_array_len:]], - stat_cycle[[node_walker_array_len:]], - prev_stat_cycle[[node_walker_array_len:]]); - cont = NULL; - } else - io.write(walker_cycle, update_cycle, stat_cycle, prev_stat_cycle); - } - - _for tmp (0, node_walker_array_len-1, 1) { - walker_cycle[tmp+cycle_step*node_walker_array_len] = node_walkers[tmp]; - update_cycle[tmp+cycle_step*node_walker_array_len] = node_update[tmp]; - stat_cycle[tmp+cycle_step*node_walker_array_len] = node_stat[tmp]; - prev_stat_cycle[tmp+cycle_step*node_walker_array_len] = node_prev_stat[tmp]; + catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! + + % if cycle end is reached write the chain + ifnot (node) { % master only + ifnot (cycle_step) + io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); + _for j (0, length(this.walkers)-1, 1) { + walker_cycle[j+(cycle_step-1)*length(this.walkers)] = @(this.walkers[j]); + update_cycle[j+(cycle_step-1)*length(this.walkers)] = this.update[j]; + stat_cycle[j+(cycle_step-1)*length(this.walkers)] = this.stat[j]; } } } - % write missing pieces - variable write_initial = (steps < io.cycle) && (cont == NULL); % if we have not written out any cycle, we have to treat the init walkers special - variable skip_load = (cont != NULL); - if (io.all || node == 0) { + % we might have unwritten steps left, so better write them here + ifnot (node) { if (cycle_step) - io.write(walker_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], - update_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], - stat_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]], - prev_stat_cycle[[skip_load*node_walker_array_len:(cycle_step+write_initial)*node_walker_array_len-1]]); - - io.close(steps-is_cont, node_walker_array_len, walker_per_par, number_par); + io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], + update_cycle[[:cycle_step*length(this.walkers)-1]], + stat_cycle[[:cycle_step*length(this.walkers)-1]]); } -} -%}}}% - -define emcee_get (s) { - variable split = strchop(s, '/', 0); - if (split[0] == "move" || split[0] == "moves") { - if (length(split)==1) return emcee_get_moves(); - else if (any(emcee_get_moves() == split[1])) return emcee_get_move(split[1];; __qualifiers()); - else vmessage("*** unknown move: '%s'", split[1]); - } else if(split[0] == "init" || split[0] == "inits") { - if (length(split)==1) return emcee_get_inits(); - else if (any(emcee_get_inits() == split[1])) return emcee_get_init(split[1];; __qualifiers()); - else vmessage("*** unknown init: '%s'", split[1]); - } else if (split[0] == "io" || split[0] == "ios") { - if (length(split)==1) return emcee_get_ios(); - else if (any(emcee_get_moves() == split[1])) return emcee_get_io(split[1];; __qualifiers()); - else vmessage("*** unknown io: '%s'", split[1]); - } else { - vmessage("*** unknown target: '%s'", s); + % and finally call the finalizing function + ifnot (node) { % master only + io.__f_finalize(steps, walker_per_par, number_par, this.fit); + io.__f_close(); } - return NULL; } +%}}}% define emcee_new (walkers_per_par, steps) { - variable move = qualifier("move", emcee_get_move("stretch")); % defined move + variable move = qualifier("move", "stretch"); % defined move variable urand = qualifier("urand", &rand_uniform); % double random generator variable upick = qualifier("upick", &rand_int); % int random generator - variable init = qualifier("init", emcee_get_init("uniform")); % initialization function - variable write_hook = qualifier("io", emcee_get_io("fits")); % output routine + variable init = qualifier("init", "uniform"); % initialization function + variable load = qualifier("load", NULL); % initialize from file + variable read_hook = qualifier("read", "fits"); + variable io = qualifier("write", "fits"); % output routine variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); - variable sloppy = qualifier("sloppy", 0); variable cont = qualifier("continue", NULL); if (NULL == get_fit_fun()) @@ -325,7 +334,7 @@ define emcee_new (walkers_per_par, steps) { throw UsageError, "Unable to create ensemble for this large number of walkers"; emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, - init=init, write_hook=write_hook, output=output, sloppy=sloppy, continue=cont); + init=init, output=output, continue=cont, write=io, load=load, read=read_hook); #ifexists rcl_mpi_init rcl_mpi_finalize(); #endif -- GitLab From 9d77658ed686e1d8346e923e8072c65cff7c3d30 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 4 May 2020 23:30:25 +0200 Subject: [PATCH 61/89] Some minor code restructure & help for sub-functions --- src/fitting/ensemble-samplers/emcee-io.sl | 4 +++ src/fitting/ensemble-samplers/emcee-moves.sl | 4 +++ src/fitting/ensemble-samplers/emcee.sl | 30 +++++++++----------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index 684241bf..8e433788 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -239,6 +239,10 @@ private define emcee_close_chain_fits (io) { } public define emcee_io_fits () { + if (qualifier_exists("help")) { + help("emcee_init_uniform"); + return NULL; + } variable settings = (_NARGS==1) ? () : NULL; variable defaults = __io_globals(; name = "io fits", diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl index c1a933f6..5dc52fa4 100644 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -63,6 +63,10 @@ private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { } public define emcee_move_stretch () { + if (qualifier_exists("help")) { + help("emcee_init_uniform"); + return NULL; + } variable defaults = __move_globals(; name="stretch move", nrands=2, diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index b6c6bcf1..820dcbb9 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -43,12 +43,7 @@ private define emcee_call_setup_fun (exec_string, type) % string similar to stat throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; - variable call = (@f)(;; opt); - - if (typeof(call) != Struct_Type) - throw UsageError, sprintf("Function '%s' not returning valid type"); - - return call; + return (@f)(;; opt); } private define setup_node (node, num_nodes, total_walkers, nrands) { @@ -312,15 +307,17 @@ private define emcee_mpi (walker_per_par, number_par, steps) { %}}}% define emcee_new (walkers_per_par, steps) { - variable move = qualifier("move", "stretch"); % defined move - variable urand = qualifier("urand", &rand_uniform); % double random generator - variable upick = qualifier("upick", &rand_int); % int random generator - variable init = qualifier("init", "uniform"); % initialization function - variable load = qualifier("load", NULL); % initialize from file - variable read_hook = qualifier("read", "fits"); - variable io = qualifier("write", "fits"); % output routine - variable output = qualifier("output", strftime("%Y%m%d-%H%M%S_mcmc_chain.fits")); - variable cont = qualifier("continue", NULL); + variable qs = struct { + move = "stretch", % defined move + urand = &rand_uniform, % double random generator + upick = &rand_int, % int random generator + init = "uniform", % initialization function + load = NULL, % initialize from file + read = "fits", % specifier for read + write = "fits", % specifier for write + output = strftime("%Y%m%d-%H%M%S_mcmc_chain.fits"), % output file + continue = NULL, % continue file + }; if (NULL == get_fit_fun()) throw UsageError, "No fit function loaded"; @@ -333,8 +330,7 @@ define emcee_new (walkers_per_par, steps) { if (total_walkers > ((1<<29)-1)) throw UsageError, "Unable to create ensemble for this large number of walkers"; - emcee_mpi(walkers_per_par, num_free_params(), steps; move=move, urand=urand, upick=upick, - init=init, output=output, continue=cont, write=io, load=load, read=read_hook); + emcee_mpi(walkers_per_par, num_free_params(), steps;; struct { @qs, @__qualifiers() }); #ifexists rcl_mpi_init rcl_mpi_finalize(); #endif -- GitLab From 65f4fa71b05090f531dff0c34619bd9f0183b4d5 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 5 May 2020 17:28:17 +0200 Subject: [PATCH 62/89] Fix write call Indexing was wrong, write function was trying to write uninitialized array entires. --- src/fitting/ensemble-samplers/emcee.sl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 820dcbb9..b47cb27a 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -266,7 +266,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { % the main loop where the magic happens variable s, walker, update, stat; - _for s (1, steps, 1) { + _for s (0, steps-1, 1) { cycle_step = s mod io.cycle; release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... _for j (0, this.walkers_per_node[node]-1, 1) { % ... let them move ... @@ -280,12 +280,12 @@ private define emcee_mpi (walker_per_par, number_par, steps) { % if cycle end is reached write the chain ifnot (node) { % master only - ifnot (cycle_step) + if (not cycle_step && s > 0) io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); _for j (0, length(this.walkers)-1, 1) { - walker_cycle[j+(cycle_step-1)*length(this.walkers)] = @(this.walkers[j]); - update_cycle[j+(cycle_step-1)*length(this.walkers)] = this.update[j]; - stat_cycle[j+(cycle_step-1)*length(this.walkers)] = this.stat[j]; + walker_cycle[j+cycle_step*length(this.walkers)] = @(this.walkers[j]); + update_cycle[j+cycle_step*length(this.walkers)] = this.update[j]; + stat_cycle[j+cycle_step*length(this.walkers)] = this.stat[j]; } } } -- GitLab From 71704d64c100c402ce450ffa76cea67c825f69ae Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 12 May 2020 09:50:48 +0200 Subject: [PATCH 63/89] Fix eval_statistic call from slaves Wrong if clause caused slaves to try to evaluate fit statistic on not initialized values. --- src/fitting/ensemble-samplers/emcee.sl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index b47cb27a..324779b6 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -257,12 +257,14 @@ private define emcee_mpi (walker_per_par, number_par, steps) { % evaluate the model at the walker positions to get the statistics % and if not continuing a chain, write them out - _for j (0, length(this.walkers)-1, 1) { - this.stat[j] = this.fit.eval_statistic(this.walkers[j]); - this.update[j] = 1; + ifnot (node) { + _for j (0, length(this.walkers)-1, 1) { + this.stat[j] = this.fit.eval_statistic(this.walkers[j]); + this.update[j] = 1; + } + if (cont == NULL) + io.__f_write(this.fit, this.walkers, this.update, this.stat); } - if ((cont == NULL) && (node == 0)) % only master is writing the initials - io.__f_write(this.fit, this.walkers, this.update, this.stat); % the main loop where the magic happens variable s, walker, update, stat; -- GitLab From 02ab3624b8410e3d3f633bc81ac11962f6d2014a Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 24 Jun 2020 11:00:45 +0200 Subject: [PATCH 64/89] True seperate sets --- src/fitting/ensemble-samplers/emcee.sl | 192 +++++++++++++++++-------- 1 file changed, 133 insertions(+), 59 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 324779b6..5e41b6f2 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -8,24 +8,30 @@ private define distribute_walkers (nodes, number_walkers) { variable walkers_per_node = Int_Type[nodes]; variable n = number_walkers/nodes + 1; variable missing = nodes - (number_walkers mod nodes); + variable set1_per_node, set2_per_node; walkers_per_node[[0:nodes-missing-1]] = n; walkers_per_node[[nodes-missing:nodes-1]] = n-1; - return walkers_per_node; + set2_per_node = walkers_per_node >> 1; % half walkers handled by each node per set + set1_per_node = walkers_per_node - set2_per_node; + return set1_per_node, set2_per_node; } % get walkers handled up to this node -private define previous_number_walkers (walkers_per_node) { - variable l = length(walkers_per_node); - variable handled_walkers = Int_Type[l]; - variable i, c = 0; +private define previous_number_walkers (set1_per_node, set2_per_node) { + variable l = length(set1_per_node); + variable set1_handled = Int_Type[l]; + variable set2_handled = Int_Type[l]; + variable i, c1 = 0, c2 = 0; _for i (0, l-1, 1) { - handled_walkers[i] = c; - c += walkers_per_node[i]; + set1_handled = c1; + set2_handled = c2; + c1 += set1_handled[i]; + c2 += set2_handled[i]; } - return handled_walkers; + return set1_handled, set2_handled; } private define emcee_call_setup_fun (exec_string, type) % string similar to statistic or fit method definitions "name; option1=foo, option2=bar" @@ -47,31 +53,39 @@ private define emcee_call_setup_fun (exec_string, type) % string similar to stat } private define setup_node (node, num_nodes, total_walkers, nrands) { - variable walkers_per_node = distribute_walkers(num_nodes, total_walkers); - variable handled_walkers = previous_number_walkers(walkers_per_node); - variable set1 = total_walkers >> 1; + variable set1_per_node, set2_per_node; + variable set1_handled, set2_handled; + variable set2_len = total_walkers >> 1; % split walker in two sets + variable set1_len = total_walkers - set2_len; variable j; + (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); + (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); + variable N = struct { walkers, % current walker position - pivots, % current pivot positions + pivots, % current pivot positions, should not change for one loop! randoms, % required randoms update, % updater track stat, % step statistic total_walkers, % number of totals walkers - walkers_per_node = walkers_per_node, % number of walkers handled by each node - handled_walkers = handled_walkers, % handled_walkers[i] = sum(walkers_per_node[[0:i-1]]) - set1 = set1, % size of set 1 - set2 = total_walkers - set1, % size of set 2 + set1 = set1_len, % walkers for 1st update + set2 = set2_len, % walkers set for 2nd update + set1_per_node = set1_per_node, % walkers of set1 processed per node + set2_per_node = set2_per_node, % walkers of set2 processed per node + set1_handled = set1_handled, % walkers of set1 handled by all previous nodes + set2_handled = set2_handled, % walkers of set2 handled by all previous nodes + set = 0, % indicates which set to update (1: first set, 2: second set) fit = open_fit(), % the fit object, will fail if no model is loaded num_pars = num_free_params(), % number of fit parameters num_rands = nrands, % number of random numbers per walker }; + if (node) { % setup for slaves - N.walkers = Array_Type[walkers_per_node[node]]; - N.pivots = Array_Type[walkers_per_node[node]]; - N.update = Int_Type[walkers_per_node[node]]; - N.stat = Double_Type[walkers_per_node[node]]; - N.randoms = Double_Type[walkers_per_node[node]*nrands]; + N.walkers = Array_Type[set1_per_node[node]+set2_per_node[node]]; + N.pivots = Array_Type[set1_per_node[node]+set2_per_node[node]]; + N.update = Int_Type[set1_per_node[node]+set2_per_node[node]]; + N.stat = Double_Type[set1_per_node[node]+set2_per_node[node]]; + N.randoms = Double_Type[(set1_per_node[node]+set2_per_node[node])*nrands]; } else { % setup for master N.walkers = Array_Type[total_walkers]; N.pivots = Array_Type[total_walkers]; @@ -102,31 +116,45 @@ private define release_walkers_mpi_master (node, num_nodes, N) if (NULL == upick) throw InternalError, "No random generator given"; -% variable set2_len = total_walkers>>1; % number of walkers in set 2 - % variable set1_len = total_walkers-set2_len; % number of walkers in set 1 + % select pivot walkers for current set + variable set_len, complement_len, len_offset; + variable set_per_node, set_handled; + variable pick; + if (1 == N.set) { + set_len = N.set1; + complement_len = N.set2; + len_offset = 0; + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; + } else if (2 == N.set) { + set_len = N.set2; + complement_len = N.set1; + len_offset = N.set1; + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; + } + pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set - variable all_pick1 = (@upick)(0, N.set2-1, N.set1)+N.set1; % pick pivot *for* set 1 - variable all_pick2 = (@upick)(0, N.set1-1, N.set2); % pick pivot *for* set 2 - variable pick = [all_pick1, all_pick2]; variable this_walkers, this_pivots, this_randoms; variable i,j; - _for i (0, length(N.walkers)-1, 1) - N.pivots[i] = N.walkers[pick[i]]; % get the pivot points - N.randoms[*] = (@urand)(length(N.walkers)*N.num_rands); % get new random numbers + % set current walkers and complement pivots + _for i (0, set_len-1, 1) + N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = (@urand)(set_len*N.num_rands); % get new random numbers for current set _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data % set the walkers for node i - this_walkers = N.walkers[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]]; + this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; % pick the pivots for node i - this_pivots = N.pivots[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]]; + this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; % set the randoms for node i - this_randoms = N.randoms[[0:N.walkers_per_node[i]*N.num_rands-1]+N.handled_walkers[i]*N.num_rands]; - _for j (0, length(this_walkers)-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j], length(this_walkers[j]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j], length(this_walkers[j]), i, 1); % send pivots from other set with tag 1 + this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1]+(set_handled[i]+len_offset)*N.num_rands]; + _for j (0, set_len-1, 1) { + () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 } - () = rcl_mpi_org_isend_double(this_randoms, length(this_randoms), i, 2); % send random numbers with tag 2 + () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands], set_len*N.num_rands, i, 2); % send random numbers with tag 2 } #endif } @@ -134,12 +162,24 @@ private define release_walkers_mpi_master (node, num_nodes, N) private define release_walkers_mpi_slave (node, num_nodes, N) { #ifexists rcl_mpi_init + variable set_len, len_offset; + variable this_randoms; + if (1 == N.set) { + set_len = N.set1_per_node; + len_offset = 0; + } else if (2 == N.set) { + set_len = N.set2_per_node; + len_offset = N.set1_per_node[node]; + } + variable j; - _for j (0, length(N.walkers)-1, 1) { - () = rcl_mpi_org_recv_double(N.walkers[j], length(N.walkers[j]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(N.pivots[j], length(N.pivots[j]), 0, 1); % receive pivot points (tag 1) + this_randoms = Double_Type[set_len*N.num_rands]; + _for j (0, set_len-1, 1) { + () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) } - () = rcl_mpi_org_recv_double(N.randoms, length(N.randoms), 0, 2); % receive random numbers (tag 2) + () = rcl_mpi_org_recv_double(this_randoms, set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; #endif } @@ -155,17 +195,28 @@ private define release_walkers_mpi (node, num_nodes, N) { private define catch_walkers_mpi_master (node, num_nodes, N) { #ifexists rcl_mpi_init + variable set_len, len_offset, set_per_node, set_handled; + if (1 == N.set) { + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; + len_offset = 0; + } else if (2 == N.set) { + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; + len_offset = N.set1; + } + variable i,j; variable this_stat, this_update; % we have to use intermediate storage, slang creates a copy of an array when addressed by index _for i (1, num_nodes-1, 1) { - this_stat = Double_Type[N.walkers_per_node[i]]; - this_update = Int_Type[N.walkers_per_node[i]]; - _for j (0, N.walkers_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(N.walkers[N.handled_walkers[i]+j], length(N.walkers[0]), i, i); + this_stat = Double_Type[set_per_node[i]]; + this_update = Int_Type[set_per_node[i]]; + _for j (0, set_per_node[i]-1, 1) + () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], length(N.walkers[0]), i, i); () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); - N.update[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]] = this_update; - N.stat[[0:N.walkers_per_node[i]-1]+N.handled_walkers[i]] = this_stat; + N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; + N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; } #endif } @@ -173,11 +224,20 @@ private define catch_walkers_mpi_master (node, num_nodes, N) private define catch_walkers_mpi_slave (node, num_nodes, N) { #ifexists rcl_mpi_init + variable set_len, len_offset; + if (1 == N.set) { + set_len = N.set1_per_node[node]; + len_offset = 0; + } else if (2 == N.set) { + set_len = N.set2_per_node[node]; + len_offset = N.set1_per_node[node]; + } + variable i; - _for i (0, length(N.walkers)-1, 1) - () = rcl_mpi_org_isend_double(N.walkers[i], length(N.walkers[0]), 0, node); - () = rcl_mpi_org_isend_int(N.update, length(N.update), 0, node); - () = rcl_mpi_org_isend_double(N.stat, length(N.stat), 0, node); + _for i (0, set_len-1, 1) + () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], length(N.walkers[len_offset]), 0, node); + () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); + () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); #endif } @@ -267,18 +327,32 @@ private define emcee_mpi (walker_per_par, number_par, steps) { } % the main loop where the magic happens - variable s, walker, update, stat; + variable s, walker, update, stat, set, set_len, len_offset; _for s (0, steps-1, 1) { cycle_step = s mod io.cycle; - release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... - _for j (0, this.walkers_per_node[node]-1, 1) { % ... let them move ... - (walker, update, stat) = move.__f(this.fit, this.walkers[j], this.pivots[j], - this.randoms[[0:this.num_rands-1]+j*this.num_rands], this.stat[j]); - this.walkers[j] = walker; - this.update[j] = update; - this.stat[j] = stat; + _for set (1, 2, 1) { + this.set = set; + + release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... + + if (1 == set) { + set_len = this.set1_per_node[node]; + len_offset = 0; + } else if (2 == set) { + set_len = this.set2_per_node[node]; + len_offset = this.set1_per_node[node]; + } + + _for j (0, set_len-1, 1) { % ... let them move ... + (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], this.pivots[j+len_offset], + this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], this.stat[j+len_offset]); + this.walkers[j+len_offset] = walker; + this.update[j+len_offset] = update; + this.stat[j+len_offset] = stat; + } + + catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! } - catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! % if cycle end is reached write the chain ifnot (node) { % master only -- GitLab From 9dfe506ea43a381b46b60e95f30b8c5bc273b081 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 29 Jun 2020 13:03:28 +0200 Subject: [PATCH 65/89] Fix statistic bug in stretch_move Stretch move returned wrong statistic when rejecting the step. This caused a wired bias in the resulting distribution. Kudos to Philipp T. finding this! --- src/fitting/ensemble-samplers/emcee-moves.sl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl index 5dc52fa4..145c5027 100644 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ b/src/fitting/ensemble-samplers/emcee-moves.sl @@ -37,7 +37,7 @@ private define inverse_cdf (u, a) { % Move must evaluate the fit function private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { variable z = inverse_cdf(u[0], move.a); - variable ystat = -1e32; % from mikes code + variable ystat, ret_stat = prev_stat; variable y; % step proposition variable x_t1 = x; % resulting step variable update = 0; % update indicator @@ -53,13 +53,14 @@ private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { % caluculate if we accept the step based on the statistics of the % model. We assume that the statistic is given as -2 log likelihood if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(prev_stat-ystat)/2.)) { + ret_stat = ystat; x_t1 = y; update = 1; } } catch IsisError; % return new walker position, update, new statistic - return (x_t1, update, ystat); + return (x_t1, update, ret_stat); } public define emcee_move_stretch () { -- GitLab From d0306ffdf4f002bdb42ee7b645c7e93a6cacb884 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 29 Jun 2020 19:30:07 +0200 Subject: [PATCH 66/89] Fix missing argument when continuing chain call to io.__fopen was missing the fit handle argument --- src/fitting/ensemble-samplers/emcee.sl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 5e41b6f2..ee3c194e 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -284,7 +284,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { ifnot (node) { % master only if (cont != NULL) - io.__f_open(cont, this.walkers); + io.__f_open(cont, this.fit, this.walkers); else if (load != NULL) { load_hook.__f_read(load, this.walkers); load_hook.__f_close(); -- GitLab From 11f662898806de2c9fe50934bb8e3d7d89c1591f Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Sun, 5 Jul 2020 12:32:12 +0200 Subject: [PATCH 67/89] Fix missing step write at the end of chain Index confusion lead to missing step write causing a index error when trying to continue a chain. --- src/fitting/ensemble-samplers/emcee.sl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index ee3c194e..9de50f2a 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -367,11 +367,11 @@ private define emcee_mpi (walker_per_par, number_par, steps) { } % we might have unwritten steps left, so better write them here + cycle_step++; % the last step is never written in the loop, we have to deal with it here ifnot (node) { - if (cycle_step) - io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], - update_cycle[[:cycle_step*length(this.walkers)-1]], - stat_cycle[[:cycle_step*length(this.walkers)-1]]); + io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], + update_cycle[[:cycle_step*length(this.walkers)-1]], + stat_cycle[[:cycle_step*length(this.walkers)-1]]); } % and finally call the finalizing function @@ -408,6 +408,6 @@ define emcee_new (walkers_per_par, steps) { emcee_mpi(walkers_per_par, num_free_params(), steps;; struct { @qs, @__qualifiers() }); #ifexists rcl_mpi_init - rcl_mpi_finalize(); +% rcl_mpi_finalize(); #endif } -- GitLab From 69ec3c84f54db5eae7e1ad3326105ea9b2d5d40c Mon Sep 17 00:00:00 2001 From: Philipp Thalhammer Date: Wed, 8 Jul 2020 23:35:05 +0200 Subject: [PATCH 68/89] Fix missing statistic of last step Changed emcee_finalize_chain_fits to be able to deal with the varying number of written steps for new and continued chains. Also adjusted the value of NSTEPS that is written to the header accordingly. --- src/fitting/ensemble-samplers/emcee-io.sl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index 8e433788..b1357255 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -200,27 +200,29 @@ private define emcee_write_chain_fits (io, fit_handle, walkers_cycle, update_cyc private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter, fit_handle) { variable tmp, tmp2; variable reread; + variable written_steps; variable total_walkers = walker_per_parameter*number_parameter; variable collen = fits_get_num_rows(io.handle); % length of chain variable all_steps = fits_read_key(io.handle, "NSTEPS"); - all_steps = (all_steps<0) ? steps : all_steps + steps; - fits_update_key(io.handle, "NSTEPS", all_steps); fits_update_key(io.handle, "NWALKERS", walker_per_parameter); fits_update_key(io.handle, "NFREEPAR", number_parameter); () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], io.num_steps+1, collen-io.num_steps, &reread); + written_steps = length(reread[0])/total_walkers; % need not be equal to steps + all_steps = (all_steps<0) ? written_steps : all_steps + written_steps; + fits_update_key(io.handle, "NSTEPS", all_steps); () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); collen = fits_get_num_rows(io.handle); % read length of chain summary variable j; - variable frac_update = Double_Type[steps]; - variable min_stat = Double_Type[steps]; - variable med_stat = Double_Type[steps]; - variable max_stat = Double_Type[steps]; + variable frac_update = Double_Type[written_steps]; + variable min_stat = Double_Type[written_steps]; + variable med_stat = Double_Type[written_steps]; + variable max_stat = Double_Type[written_steps]; variable step_stat = reread[1]; variable step_update = reread[0]; - _for j (0, steps-1, 1) { + _for j (0, written_steps-1, 1) { frac_update[j] = sum(step_update[[0:total_walkers-1]+j*total_walkers])/total_walkers; tmp2 = step_stat[[0:total_walkers-1]+j*total_walkers]; min_stat[j] = min(tmp2); -- GitLab From beac71ba2ce48da93e0d237f744a947a9f140028 Mon Sep 17 00:00:00 2001 From: Philipp Thalhammer Date: Mon, 13 Jul 2020 13:29:37 +0200 Subject: [PATCH 69/89] Revert "Fix missing statistic of last step" This reverts commit c691bdbdff21c63064a5a52ef8a8bf11120a9122. --- src/fitting/ensemble-samplers/emcee-io.sl | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index b1357255..8e433788 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -200,29 +200,27 @@ private define emcee_write_chain_fits (io, fit_handle, walkers_cycle, update_cyc private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter, fit_handle) { variable tmp, tmp2; variable reread; - variable written_steps; variable total_walkers = walker_per_parameter*number_parameter; variable collen = fits_get_num_rows(io.handle); % length of chain variable all_steps = fits_read_key(io.handle, "NSTEPS"); + all_steps = (all_steps<0) ? steps : all_steps + steps; + fits_update_key(io.handle, "NSTEPS", all_steps); fits_update_key(io.handle, "NWALKERS", walker_per_parameter); fits_update_key(io.handle, "NFREEPAR", number_parameter); () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], io.num_steps+1, collen-io.num_steps, &reread); - written_steps = length(reread[0])/total_walkers; % need not be equal to steps - all_steps = (all_steps<0) ? written_steps : all_steps + written_steps; - fits_update_key(io.handle, "NSTEPS", all_steps); () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); collen = fits_get_num_rows(io.handle); % read length of chain summary variable j; - variable frac_update = Double_Type[written_steps]; - variable min_stat = Double_Type[written_steps]; - variable med_stat = Double_Type[written_steps]; - variable max_stat = Double_Type[written_steps]; + variable frac_update = Double_Type[steps]; + variable min_stat = Double_Type[steps]; + variable med_stat = Double_Type[steps]; + variable max_stat = Double_Type[steps]; variable step_stat = reread[1]; variable step_update = reread[0]; - _for j (0, written_steps-1, 1) { + _for j (0, steps-1, 1) { frac_update[j] = sum(step_update[[0:total_walkers-1]+j*total_walkers])/total_walkers; tmp2 = step_stat[[0:total_walkers-1]+j*total_walkers]; min_stat[j] = min(tmp2); -- GitLab From 991568dd791818efb2a5e2754704cf4da320bad0 Mon Sep 17 00:00:00 2001 From: Philipp Thalhammer Date: Mon, 13 Jul 2020 14:48:18 +0200 Subject: [PATCH 70/89] Add function to write random initial walkers to seperate extension Added function emcee_write_init_step to write the first randomly chosen set of walkers to the seperate extension INITWALKER. This should ensure, that the actual chain in the extention MCMCCHAIN is always steps*num_walkers long. --- src/fitting/ensemble-samplers/emcee-io.sl | 28 +++++++++++++++++++++++ src/fitting/ensemble-samplers/emcee.sl | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl index 8e433788..0bd8a35b 100644 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ b/src/fitting/ensemble-samplers/emcee-io.sl @@ -141,6 +141,33 @@ private define emcee_open_chain_fits (io, filename, fit_handle, walkers) { io.num_steps = fits_get_num_rows(io.handle); } + +private define emcee_write_init_step (io, fit_handle, init_walkers, init_update, init_stat){ + variable par = __parameters(fit_handle.object); + fits_create_binary_table(io.handle, "INITWALKER", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], + ["D", "J", ["D"][par.index*0]], + [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "INITWALKER", 0); + variable tmp, update; + variable npar = length(par.index); + variable nwalkers = length(init_walkers); + fits_update_key(io.handle, "NFREEPAR", npar, " Number of free parameters"); + fits_update_key(io.handle, "NWALKERS", nwalkers/npar, " Number of walkers per free parameter"); + + variable i,j; + _for j (0, npar-1, 1) { + tmp = Double_Type[nwalkers]; + _for i (0, nwalkers-1, 1) + tmp[i] = init_walkers[i][j]; + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", par.index[j])), 1, 1, tmp); + } + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), 1, 1, init_stat); + () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), 1, 1, init_update); + () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); +} + + private define emcee_read_chain_fits (io, filename, fit_handle, walkers) { io.handle = fits_open_file(filename+"[PARAMETERS]", "r"); @@ -256,6 +283,7 @@ public define emcee_io_fits () { __f_read=&emcee_read_chain_fits, __f_finalize=&emcee_finalize_chain_fits, __f_close=&emcee_close_chain_fits, + __f_initwrite=&emcee_write_init_step, }; } diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 9de50f2a..0f40f092 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -323,7 +323,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { this.update[j] = 1; } if (cont == NULL) - io.__f_write(this.fit, this.walkers, this.update, this.stat); + io.__f_initwrite(this.fit, this.walkers, this.update, this.stat); } % the main loop where the magic happens -- GitLab From 0aad865daff6825238b20df6116ef807eecb3c17 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 27 Jan 2021 04:24:14 +0100 Subject: [PATCH 71/89] Fix bug in previous_number_walkers Variables got overwriten instead of written into the array --- src/fitting/ensemble-samplers/emcee.sl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 0f40f092..e5c95624 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -25,8 +25,8 @@ private define previous_number_walkers (set1_per_node, set2_per_node) { variable i, c1 = 0, c2 = 0; _for i (0, l-1, 1) { - set1_handled = c1; - set2_handled = c2; + set1_handled[i] = c1; + set2_handled[i] = c2; c1 += set1_handled[i]; c2 += set2_handled[i]; } -- GitLab From e2bb186ab886a8d61822506b506627c7ef280f9a Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 27 Jan 2021 14:20:43 +0100 Subject: [PATCH 72/89] A bit of code cleaning --- src/fitting/ensemble-samplers/emcee.sl | 172 ++++++++++++++++--------- 1 file changed, 108 insertions(+), 64 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index e5c95624..17bd8e27 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -6,14 +6,16 @@ require("rand"); % get walkers per node private define distribute_walkers (nodes, number_walkers) { variable walkers_per_node = Int_Type[nodes]; - variable n = number_walkers/nodes + 1; - variable missing = nodes - (number_walkers mod nodes); + variable n = number_walkers/nodes + 1; + variable missing = nodes - (number_walkers mod nodes); variable set1_per_node, set2_per_node; walkers_per_node[[0:nodes-missing-1]] = n; walkers_per_node[[nodes-missing:nodes-1]] = n-1; + set2_per_node = walkers_per_node >> 1; % half walkers handled by each node per set set1_per_node = walkers_per_node - set2_per_node; + return set1_per_node, set2_per_node; } @@ -40,15 +42,18 @@ private define emcee_call_setup_fun (exec_string, type) % string similar to stat throw UsageError, sprintf("Unable to parse qualifier for %s", type); variable s = strchop(exec_string, ';', 0); + if (length(s) > 2) throw UsageError, sprintf("Failed parsing option '%s'", exec_string); variable fname = strtrim(s[0]); variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); + if (NULL == f) throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; + return (@f)(;; opt); } @@ -58,6 +63,7 @@ private define setup_node (node, num_nodes, total_walkers, nrands) { variable set2_len = total_walkers >> 1; % split walker in two sets variable set1_len = total_walkers - set2_len; variable j; + (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); @@ -110,28 +116,31 @@ private define release_walkers_mpi_master (node, num_nodes, N) { #ifexists rcl_mpi_init variable urand = qualifier("urand", NULL); - if (NULL == urand) - throw InternalError, "No random generator given"; variable upick = qualifier("upick", NULL); + + if (NULL == urand) + throw InternalError, "Missing random generator"; + if (NULL == upick) - throw InternalError, "No random generator given"; + throw InternalError, "Missing random generator"; % select pivot walkers for current set variable set_len, complement_len, len_offset; variable set_per_node, set_handled; variable pick; - if (1 == N.set) { - set_len = N.set1; + + if (1 == N.set) { % + set_len = N.set1; complement_len = N.set2; - len_offset = 0; - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; + len_offset = 0; + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; } else if (2 == N.set) { - set_len = N.set2; + set_len = N.set2; complement_len = N.set1; - len_offset = N.set1; - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; + len_offset = N.set1; + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; } pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set @@ -141,20 +150,29 @@ private define release_walkers_mpi_master (node, num_nodes, N) % set current walkers and complement pivots _for i (0, set_len-1, 1) N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = (@urand)(set_len*N.num_rands); % get new random numbers for current set + + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] + = (@urand)(set_len*N.num_rands); % get new random numbers for current set _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data % set the walkers for node i this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; % pick the pivots for node i - this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; % set the randoms for node i - this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1]+(set_handled[i]+len_offset)*N.num_rands]; + this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1] + +(set_handled[i]+len_offset)*N.num_rands]; + _for j (0, set_len-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 + () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], + length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], + length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 } - () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands], set_len*N.num_rands, i, 2); % send random numbers with tag 2 + + () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1] + +len_offset*N.num_rands], + set_len*N.num_rands, i, 2); % send random numbers with tag 2 } #endif } @@ -164,21 +182,26 @@ private define release_walkers_mpi_slave (node, num_nodes, N) #ifexists rcl_mpi_init variable set_len, len_offset; variable this_randoms; + if (1 == N.set) { - set_len = N.set1_per_node; + set_len = N.set1_per_node; len_offset = 0; } else if (2 == N.set) { - set_len = N.set2_per_node; + set_len = N.set2_per_node; len_offset = N.set1_per_node[node]; } variable j; this_randoms = Double_Type[set_len*N.num_rands]; _for j (0, set_len-1, 1) { - () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) + () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], + length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], + length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) } - () = rcl_mpi_org_recv_double(this_randoms, set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + () = rcl_mpi_org_recv_double(this_randoms, + set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; #endif } @@ -196,27 +219,34 @@ private define catch_walkers_mpi_master (node, num_nodes, N) { #ifexists rcl_mpi_init variable set_len, len_offset, set_per_node, set_handled; + if (1 == N.set) { set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - len_offset = 0; + set_handled = N.set1_handled; + len_offset = 0; } else if (2 == N.set) { set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - len_offset = N.set1; + set_handled = N.set2_handled; + len_offset = N.set1; } variable i,j; - variable this_stat, this_update; % we have to use intermediate storage, slang creates a copy of an array when addressed by index + variable this_stat, this_update; % we have to use intermediate storage, + % slang creates a copy of an array when addressed by index + _for i (1, num_nodes-1, 1) { - this_stat = Double_Type[set_per_node[i]]; + this_stat = Double_Type[set_per_node[i]]; this_update = Int_Type[set_per_node[i]]; + _for j (0, set_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], length(N.walkers[0]), i, i); + () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], + length(N.walkers[0]), i, i); + () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); + N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; - N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; + N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; } #endif } @@ -225,17 +255,20 @@ private define catch_walkers_mpi_slave (node, num_nodes, N) { #ifexists rcl_mpi_init variable set_len, len_offset; + if (1 == N.set) { - set_len = N.set1_per_node[node]; + set_len = N.set1_per_node[node]; len_offset = 0; } else if (2 == N.set) { - set_len = N.set2_per_node[node]; + set_len = N.set2_per_node[node]; len_offset = N.set1_per_node[node]; } variable i; _for i (0, set_len-1, 1) - () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], length(N.walkers[len_offset]), 0, node); + () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], + length(N.walkers[len_offset]), 0, node); + () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); #endif @@ -251,15 +284,16 @@ private define catch_walkers_mpi (node, num_nodes, N) private define emcee_mpi (walker_per_par, number_par, steps) { variable total_walkers = walker_per_par*number_par; - variable init = qualifier("init", NULL); - variable move = qualifier("move", NULL); - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - variable cont = qualifier("continue", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write", NULL); + + variable init = qualifier("init", NULL); + variable move = qualifier("move", NULL); + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); + variable cont = qualifier("continue", NULL); + variable output = qualifier("output", NULL); + variable io = qualifier("write", NULL); variable load_hook = qualifier("read", NULL); - variable load = qualifier("load", NULL); + variable load = qualifier("load", NULL); variable node, num_nodes; @@ -267,7 +301,8 @@ private define emcee_mpi (walker_per_par, number_par, steps) { node = rcl_mpi_init(); num_nodes = rcl_mpi_numtasks(); num_nodes = (num_nodes<1) ? 1 : num_nodes; - rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! BUT: since we have a memory leak from the mpi module anyway we accept it currently ... + rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! + % BUT: since we have a memory leak from the mpi module anyway we accept it currently ... #else node = 0; num_nodes = 1; @@ -275,11 +310,13 @@ private define emcee_mpi (walker_per_par, number_par, steps) { move = emcee_call_setup_fun(move, "move"); variable this = setup_node(node, num_nodes, total_walkers, move.nrands); + % 'this' is the mpi_emcee handle for this node! + % It contains all relevant data to do the calculation % read the settings, if one of them is NULL help was called - io = emcee_call_setup_fun(io, "io"); + io = emcee_call_setup_fun(io, "io"); load_hook = emcee_call_setup_fun(load_hook, "io"); - init = emcee_call_setup_fun(init, "init"); + init = emcee_call_setup_fun(init, "init"); if (NULL == io || NULL == load_hook || NULL == init) return; ifnot (node) { % master only @@ -310,7 +347,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { walker_cycle = Array_Type[collector_length]; update_cycle = Int_Type[collector_length]; - stat_cycle = Double_Type[collector_length]; + stat_cycle = Double_Type[collector_length]; if (NULL == urand || NULL == upick) throw InternalError, "Missing random number generator"; @@ -319,7 +356,7 @@ private define emcee_mpi (walker_per_par, number_par, steps) { % and if not continuing a chain, write them out ifnot (node) { _for j (0, length(this.walkers)-1, 1) { - this.stat[j] = this.fit.eval_statistic(this.walkers[j]); + this.stat[j] = this.fit.eval_statistic(this.walkers[j]); this.update[j] = 1; } if (cont == NULL) @@ -333,22 +370,26 @@ private define emcee_mpi (walker_per_par, number_par, steps) { _for set (1, 2, 1) { this.set = set; - release_walkers_mpi(node, num_nodes, this; upick=upick, urand=urand); % release walkers to freedom ... + release_walkers_mpi(node, + num_nodes, + this; upick=upick, urand=urand); % release walkers to freedom ... if (1 == set) { - set_len = this.set1_per_node[node]; + set_len = this.set1_per_node[node]; len_offset = 0; } else if (2 == set) { - set_len = this.set2_per_node[node]; + set_len = this.set2_per_node[node]; len_offset = this.set1_per_node[node]; } _for j (0, set_len-1, 1) { % ... let them move ... - (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], this.pivots[j+len_offset], - this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], this.stat[j+len_offset]); + (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], + this.pivots[j+len_offset], + this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], + this.stat[j+len_offset]); this.walkers[j+len_offset] = walker; - this.update[j+len_offset] = update; - this.stat[j+len_offset] = stat; + this.update[j+len_offset] = update; + this.stat[j+len_offset] = stat; } catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! @@ -384,25 +425,28 @@ private define emcee_mpi (walker_per_par, number_par, steps) { define emcee_new (walkers_per_par, steps) { variable qs = struct { - move = "stretch", % defined move - urand = &rand_uniform, % double random generator - upick = &rand_int, % int random generator - init = "uniform", % initialization function - load = NULL, % initialize from file - read = "fits", % specifier for read - write = "fits", % specifier for write - output = strftime("%Y%m%d-%H%M%S_mcmc_chain.fits"), % output file + move = "stretch", % defined move + urand = &rand_uniform, % double random generator + upick = &rand_int, % int random generator + init = "uniform", % initialization function + load = NULL, % initialize from file + read = "fits", % specifier for read + write = "fits", % specifier for write + output = strftime("%Y%m%d-%H%M%S_mcmc_chain.fits"), % output file continue = NULL, % continue file }; if (NULL == get_fit_fun()) throw UsageError, "No fit function loaded"; + if (NULL == all_data()) throw UsageError, "No data set loaded"; + ifnot (0 ((1<<29)-1)) throw UsageError, "Unable to create ensemble for this large number of walkers"; -- GitLab From faa8ed7c5044a36e018f65006b355820dcba6b7a Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 2 Feb 2021 11:13:25 +0100 Subject: [PATCH 73/89] Start emcee resturcture --- src/fitting/ensemble-samplers/emcee-mpi.sl | 683 +++++++++++++++++++++ 1 file changed, 683 insertions(+) create mode 100644 src/fitting/ensemble-samplers/emcee-mpi.sl diff --git a/src/fitting/ensemble-samplers/emcee-mpi.sl b/src/fitting/ensemble-samplers/emcee-mpi.sl new file mode 100644 index 00000000..a0381dac --- /dev/null +++ b/src/fitting/ensemble-samplers/emcee-mpi.sl @@ -0,0 +1,683 @@ +% -*- mode: slang; mode: fold; -*- % + +require("rand"); + +%%% START FROM SCRATCH... well sort of + +% Implementation of the emcee hammer () with the principle idea +% that multiple nodes (engines) are responsible for a part of the +% walkers. For efficiency the walkers are distributed equally to +% each engine. To keep the statistical properties the walkers are +% seperated in to two groups (see ref) where the next step of +% group one depends on the current position of group two and the +% next step of group two depends on the new position of group one. +% For most efficiency we try to reduce the required computations +% to the minimum possible such that the model evaluation plus +% the necessary communication is everything that happens in the +% main loop. +% +% To prevent any side effects from the PRNG we let the master +% calculate enough for each step and distribute them to the +% slaves. + +private define emceeEvalFunction () %{{{ +{ + variable args = __pop_list(_NARGS); + variable handle = args[0]; + variable fun = handle.function; + + return @fun(__push_list(args[[1:]]), handle.info;; handle.options); +} +%}}} + +private define emceeAddFunction (name, function, list) %{{{ +{ + list[strtrim(name)] = struct { + function = function, + userdata = qualifier("userdata"), + data = qualifier("data"); + help = qualifier("help", name), + }; +} +%}}} + +private define emceeSetFunction (functionString, list, pointer) %{{{ +{ + variable s = strchop(functionString, ';', 0); + variable name = strtrim(s[0]); + variable options = NULL; + + if (length(s)>1) + options = eval(sprintf("struct { %s }", s[1])); + + ifnot (assoc_key_exists(list, name)) + throw UndefinedNameError, sprintf("'%s' is not registere"); + + variable function = struct { + eval = &emceeEvalFunction, + @(list[name]), + options = options, + functionString = functionString, + }; + + @pointer = function; +} +%}}} + +private define emceeGetFunction (pointer) %{{{ +{ + return pointer.functionString; +} +%}}} + +%{{{ move functions + +% access to the engine. Can set how many random numbers are required + +private variable Emcee_Move_List = Assoc_Type[Struct_Type]; +define add_emcee_move (name, function, nrand, list) %{{{ +{ + variable data = struct { nrand = nrand }; + variable userdata = qualifier("userdata"); + + emceeAddFunction(__push_list(args), Emcee_Move_List; + data=data, userdata=userdata); +} +%}}} + +private variable Emcee_Move; +define set_emcee_move (moveString) %{{{ +{ + emceeSetFunction(moveString, Emcee_Move_List, &Emcee_Move); +} +%}}} + +define get_emcee_move () %{{{ +{ + return emceeGetFunction(&emcee_Move); +} +%}}} + +%}}} + +%{{{ file functions + +% have acces to the engine. Gets the cycle number + +private variable Emcee_Write_List = Assoc_Type[Struct_Type]; +private variable Emcee_Read_List = Assoc_Type[Struct_Type]; +define add_emcee_write( + +%}}} + +private define emceeSetupEngine (id, numberEngines, totalNumberWalkers) %{{{ +{ + variable engine = struct { + id = id, % the engines id (0 is the master) + numberEngines = numberEngines, % total number of engines + + walkers, % array of current walker position for this engine + pivots, % array of current pivot positions, should not change for one loop! + rolls, % array of required random numbers + update, % array updater track + stat, % array step statistic + totalNumberWalkers = totalNumberWalkers, % number of all walkers + totalNumberSet1, % number walkers in set 1 numWalkers/2 + totalNumberSet2, % number walkers in set 2 numWalkers-numWalkers/2 + + numberWalkers, % number walkers handled by this engine + numberWalkersSet1, % number walkers in set 1 belonging to this engine + numberWalkersSet2, % number walkers in set 2 belonging to this engine + + firstWalkerSet1, % index of first walker in set 1 handle by this engine + firstWalkerSet2, % index of first walker in set 2 handle by this engine + + % set = 0, % indicates which set to update (1: first set, 2: second set) + fit = open_fit(), % the fit object, will fail if no model is loaded + numberParameters = num_free_params(), % number of fit parameters + numberWalkerRolls, % number of random numbers each walker uses + }; + + engine.totalNumberSet1 = totalNumberWalkers/2; + engine.totalNumberSet2 = totalNumberWalkers - engine.totalNumberSet1; + + % divide walkers evenly (remainders are given to highest ids) + engine.numberWalkers = engine.totalNumberWalkers/engine.numberEngines + + 1 - ((engine.totalNumberWalkers mod engine.numberEngines) <= engine.id); + engine.numberWalkersSet1 = engine.numberWalkers/2; + engine.numberWalkersSet2 = engine.numberWalkers-engine.numberWalkersSet1; + + % master stores all values + variable size = id ? engine.numberWalkers : engine.totalNumberWalkers; + variable j; + engine.walkers = Array_Type[size]; + engine.pivots = Array_Type[size]; + engine.rolls = Double_Type[size]; + engine.update = Int_Type[size]; + engine.stat = Double_Type[size]; + + _for j (0, size-1) { + engine.walkers[j] = Double_Type[engine.numberParameters]; + engine.pivots[j] = Double_Type[engine.numberParameters]; + } + + return engine; +} +%}}} + +private define emceeSetupGears (engine) %{{{ +{ + variable gears = struct { + initialize, % initialize function + step, % step function + random_uniform, % uniform random number generator (float) + random_pick, % uniform random number generator (int) + read, % read handle + write, % write handle (may be null, which means that read handle is used) + }; + + gears.move = NULL; + + if (0 == engine.id) { + % load or init, open file/use open file + } + + return gears; +} +%}}} + +private define emceeReleaseWalkers (engine) %{{{ +{ +} +%}}} + +private define emceeMoveWalkers (engine) %{{{ +{ +} +%}}} + +private define emceeCatchWalkers (engine) %{{{ +{ +} +%}}} + +%{{{ some helpers + +% get walkers per node +private define distributeWalkers (numNodes, numWalkers) %{{{ +{ + variable walkersPerNode = Int_Type[numNodes]; + variable n = numWalkers/numNodes + 1; + variable missing = numNodes - (numWalkers mod numNodes); + variable set1PerNode, set2PerNode; % TODO: does not have to be array + + walkersPerNode[[0:numNodes-missing-1]] = n; + walkersPerNode[[numNodes-missing:numNodes-1]] = n-1; + + set2PerNode = walkersPerNode >> 1; % half walkers handled by each node per set + set1PerNode = walkersPerNode - set2PerNode; + + return set1PerNode, set2PerNode; +} +%}}} + +% get walkers handled up to this node +private define getHandledNumberWalkers (set1PerNode, set2PerNode) %{{{ +{ + variable l = length(set1PerNode); + variable set1Handled = Int_Type[l]; + variable set2Handled = Int_Type[l]; + variable i, c1 = 0, c2 = 0; + + _for i (0, l-1, 1) { + set1Handled[i] = c1; + set2Handled[i] = c2; + c1 += set1Handled[i]; + c2 += set2Handled[i]; + } + + return set1Handled, set2Handled; +} +%}}} + +% string similar to statistic or fit method definitions "name; option1=foo, option2=bar" +private define emceeInterfaceSetup (execString, type) %{{{ +{ + if (NULL == execString || typeof(execString) != String_Type) + throw UsageError, sprintf("Unable to parse qualifier for %s", type); + + variable s = strchop(execString, ';', 0); + + if (length(s) > 2) + throw UsageError, sprintf("Failed parsing option '%s'", execString); + + variable fname = strtrim(s[0]); + variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); + + if (NULL == f) + throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); + + variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; + + return (@f)(;; opt); +} +%}}} + +private define emceeMPISetupNode (node, numNodes, numTotalWalkers, numRands) %{{{ +{ + variable set1PerNode, set2PerNode; + variable set1Handled, set2Handled; + variable set2Len = numTotalWwalkers >> 1; % split walker in two sets + variable set1Len = numTotalWalkers - set2Len; + variable j; + + (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); + (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); + + variable N = struct { + node = node, % this nodes number (0 is master) + num_nodes = num_nodes, % total number of nodes + walkers, % current walker position + pivots, % current pivot positions, should not change for one loop! + randoms, % required randoms + update, % updater track + stat, % step statistic + total_walkers, % number of totals walkers + set1 = set1_len, % walkers for 1st update + set2 = set2_len, % walkers set for 2nd update + set1_per_node = set1_per_node, % walkers of set1 processed per node + set2_per_node = set2_per_node, % walkers of set2 processed per node + set1_handled = set1_handled, % walkers of set1 handled by all previous nodes + set2_handled = set2_handled, % walkers of set2 handled by all previous nodes + set = 0, % indicates which set to update (1: first set, 2: second set) + fit = open_fit(), % the fit object, will fail if no model is loaded + num_pars = num_free_params(), % number of fit parameters + num_rands = nrands, % number of random numbers per walker + }; + + if (node) { % setup for slaves + N.walkers = Array_Type[set1_per_node[node]+set2_per_node[node]]; + N.pivots = Array_Type[set1_per_node[node]+set2_per_node[node]]; + N.update = Int_Type[set1_per_node[node]+set2_per_node[node]]; + N.stat = Double_Type[set1_per_node[node]+set2_per_node[node]]; + N.randoms = Double_Type[(set1_per_node[node]+set2_per_node[node])*nrands]; + } else { % setup for master + N.walkers = Array_Type[total_walkers]; + N.pivots = Array_Type[total_walkers]; + N.update = Int_Type[total_walkers]; + N.stat = Double_Type[total_walkers]; + N.randoms = Double_Type[total_walkers*nrands]; + } + + _for j (0, length(N.walkers)-1, 1) { + N.walkers[j] = Double_Type[N.num_pars]; + N.pivots[j] = Double_Type[N.num_pars]; + } + + return N; +} +%}}} + +%}}} + +%{{{ mpi functions + +private define release_walkers_mpi_master (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); + + if (NULL == urand) + throw InternalError, "Missing random generator"; + + if (NULL == upick) + throw InternalError, "Missing random generator"; + + % select pivot walkers for current set + variable set_len, complement_len, len_offset; + variable set_per_node, set_handled; + variable pick; + + if (1 == N.set) { % + set_len = N.set1; + complement_len = N.set2; + len_offset = 0; + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; + } else if (2 == N.set) { + set_len = N.set2; + complement_len = N.set1; + len_offset = N.set1; + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; + } + pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set + + variable this_walkers, this_pivots, this_randoms; + variable i,j; + + % set current walkers and complement pivots + _for i (0, set_len-1, 1) + N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points + + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] + = (@urand)(set_len*N.num_rands); % get new random numbers for current set + + _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data + % set the walkers for node i + this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + % pick the pivots for node i + this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + % set the randoms for node i + this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1] + +(set_handled[i]+len_offset)*N.num_rands]; + + _for j (0, set_len-1, 1) { + () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], + length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], + length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 + } + + () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1] + +len_offset*N.num_rands], + set_len*N.num_rands, i, 2); % send random numbers with tag 2 + } +#endif +} + +private define release_walkers_mpi_slave (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable set_len, len_offset; + variable this_randoms; + + if (1 == N.set) { + set_len = N.set1_per_node; + len_offset = 0; + } else if (2 == N.set) { + set_len = N.set2_per_node; + len_offset = N.set1_per_node[node]; + } + + variable j; + this_randoms = Double_Type[set_len*N.num_rands]; + _for j (0, set_len-1, 1) { + () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], + length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], + length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) + } + () = rcl_mpi_org_recv_double(this_randoms, + set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + + N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; +#endif +} + +private define release_walkers_mpi (node, num_nodes, N) { + % distribute walkers, pivots and random numbers + + if (node) + release_walkers_mpi_slave(node, num_nodes, N); + else + release_walkers_mpi_master(node, num_nodes, N;; __qualifiers()); +} + +private define catch_walkers_mpi_master (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable set_len, len_offset, set_per_node, set_handled; + + if (1 == N.set) { + set_per_node = N.set1_per_node; + set_handled = N.set1_handled; + len_offset = 0; + } else if (2 == N.set) { + set_per_node = N.set2_per_node; + set_handled = N.set2_handled; + len_offset = N.set1; + } + + variable i,j; + variable this_stat, this_update; % we have to use intermediate storage, + % slang creates a copy of an array when addressed by index + + _for i (1, num_nodes-1, 1) { + this_stat = Double_Type[set_per_node[i]]; + this_update = Int_Type[set_per_node[i]]; + + _for j (0, set_per_node[i]-1, 1) + () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], + length(N.walkers[0]), i, i); + + () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); + () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); + + N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; + N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; + } +#endif +} + +private define catch_walkers_mpi_slave (node, num_nodes, N) +{ +#ifexists rcl_mpi_init + variable set_len, len_offset; + + if (1 == N.set) { + set_len = N.set1_per_node[node]; + len_offset = 0; + } else if (2 == N.set) { + set_len = N.set2_per_node[node]; + len_offset = N.set1_per_node[node]; + } + + variable i; + _for i (0, set_len-1, 1) + () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], + length(N.walkers[len_offset]), 0, node); + + () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); + () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); +#endif +} + +private define catch_walkers_mpi (node, num_nodes, N) +{ + if (node) + catch_walkers_mpi_slave(node, num_nodes, N); + else + catch_walkers_mpi_master(node, num_nodes, N); +} + +private define emceeMPIInitFile (emceeT) %{{{ +{ + variable init = qualifier("init", NULL); + variable cont = qualifier("continue", NULL); + variable output = qualifier("output", NULL); + variable io = qualifier("write", NULL); + variable load_hook = qualifier("read", NULL); + variable load = qualifier("load", NULL); + + % read the settings, if one of them is NULL help was called + io = emcee_call_setup_fun(io, "io"); + load_hook = emcee_call_setup_fun(load_hook, "io"); + init = emcee_call_setup_fun(init, "init"); + if (NULL == io || NULL == load_hook || NULL == init) return; + + ifnot (node) { % master only + if (cont != NULL) + io.__f_open(cont, emceeT.fit, emcee.walkers); + else if (load != NULL) { + load_hook.__f_read(load, emceeT.walkers); + load_hook.__f_close(); + io.__f_create(output, emceeT.fit, emceeT.numTotalWalkers); + } else { + io.__f_create(output, emceeT.fit, total_walkers); + init.__f(emceeT.walkers, emceeT.fit); + } + } +} +%}}} + +private define emceeMPIInit () %{{{ +{ + variable move = qualifier("move", NULL); + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); + + variable node, numNodes; +#ifexists rcl_mpi_init + node = rcl_mpi_init(); + numNodes = rcl_mpi_numtasks(); + numNodes = (numNodes<1) ? 1 : numNodes; + rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! + % BUT: since we have a memory leak from the mpi module anyway we accept it currently ... +#else + node = 0; + num_nodes = 1; +#endif + + move = emceeInterfaceSetup(move, "move"); + + % This is the mpi_emcee handle for this node! + % It contains all relevant data to do the calculation + variable emceeHandle = setup_node(node, num_nodes, total_walkers, move.nrands); + +} +%}}} + +%}}}% + +private define emcee_mpi (walker_per_par, number_par, steps) { + variable total_walkers = walker_per_par*number_par; + + variable init = qualifier("init", NULL); + variable move = qualifier("move", NULL); + variable urand = qualifier("urand", NULL); + variable upick = qualifier("upick", NULL); + variable cont = qualifier("continue", NULL); + variable output = qualifier("output", NULL); + variable io = qualifier("write", NULL); + variable load_hook = qualifier("read", NULL); + variable load = qualifier("load", NULL); + + variable node, num_nodes; + (node, num_nodes) = emceeMPIInit(); + + move = emcee_call_setup_fun(move, "move"); + variable this = setup_node(node, num_nodes, total_walkers, move.nrands); + % 'this' is the mpi_emcee handle for this node! + % It contains all relevant data to do the calculation + + % read the settings, if one of them is NULL help was called + io = emcee_call_setup_fun(io, "io"); + load_hook = emcee_call_setup_fun(load_hook, "io"); + init = emcee_call_setup_fun(init, "init"); + if (NULL == io || NULL == load_hook || NULL == init) return; + + ifnot (node) { % master only + if (cont != NULL) + io.__f_open(cont, this.fit, this.walkers); + else if (load != NULL) { + load_hook.__f_read(load, this.walkers); + load_hook.__f_close(); + io.__f_create(output, this.fit, total_walkers); + } else { + io.__f_create(output, this.fit, total_walkers); + init.__f(this.walkers, this.fit); + } + } + + variable collector_length; % the collector so we can skip turns before writing to disk + variable walker_cycle; + variable update_cycle; + variable stat_cycle; + variable cycle_step = 0; + + % setup space + variable j; + if (node) % slave + collector_length = 0; + else % master + collector_length = length(this.walkers)*io.cycle; + + walker_cycle = Array_Type[collector_length]; + update_cycle = Int_Type[collector_length]; + stat_cycle = Double_Type[collector_length]; + + if (NULL == urand || NULL == upick) + throw InternalError, "Missing random number generator"; + + % evaluate the model at the walker positions to get the statistics + % and if not continuing a chain, write them out + ifnot (node) { + _for j (0, length(this.walkers)-1, 1) { + this.stat[j] = this.fit.eval_statistic(this.walkers[j]); + this.update[j] = 1; + } + if (cont == NULL) + io.__f_initwrite(this.fit, this.walkers, this.update, this.stat); + } + + % the main loop where the magic happens + variable s, walker, update, stat, set, set_len, len_offset; + _for s (0, steps-1, 1) { + cycle_step = s mod io.cycle; + _for set (1, 2, 1) { + this.set = set; + + release_walkers_mpi(node, + num_nodes, + this; upick=upick, urand=urand); % release walkers to freedom ... + + if (1 == set) { + set_len = this.set1_per_node[node]; + len_offset = 0; + } else if (2 == set) { + set_len = this.set2_per_node[node]; + len_offset = this.set1_per_node[node]; + } + + _for j (0, set_len-1, 1) { % ... let them move ... + (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], + this.pivots[j+len_offset], + this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], + this.stat[j+len_offset]); + this.walkers[j+len_offset] = walker; + this.update[j+len_offset] = update; + this.stat[j+len_offset] = stat; + } + + catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! + } + + % if cycle end is reached write the chain + ifnot (node) { % master only + if (not cycle_step && s > 0) + io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); + _for j (0, length(this.walkers)-1, 1) { + walker_cycle[j+cycle_step*length(this.walkers)] = @(this.walkers[j]); + update_cycle[j+cycle_step*length(this.walkers)] = this.update[j]; + stat_cycle[j+cycle_step*length(this.walkers)] = this.stat[j]; + } + } + } + + % we might have unwritten steps left, so better write them here + cycle_step++; % the last step is never written in the loop, we have to deal with it here + ifnot (node) { + io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], + update_cycle[[:cycle_step*length(this.walkers)-1]], + stat_cycle[[:cycle_step*length(this.walkers)-1]]); + } + + % and finally call the finalizing function + ifnot (node) { % master only + io.__f_finalize(steps, walker_per_par, number_par, this.fit); + io.__f_close(); + } +} +%}}}% -- GitLab From 390f9ff5ab16b93f4c024b5061bdaf47fd504516 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 22 Feb 2021 02:11:56 +0100 Subject: [PATCH 74/89] Communication not working ... --- src/fitting/ensemble-samplers/emcee-init.sl | 95 - src/fitting/ensemble-samplers/emcee-io.sl | 290 --- src/fitting/ensemble-samplers/emcee-moves.sl | 78 - src/fitting/ensemble-samplers/emcee-mpi.sl | 683 -------- src/fitting/ensemble-samplers/emcee.sl | 1653 ++++++++++++++---- 5 files changed, 1305 insertions(+), 1494 deletions(-) delete mode 100644 src/fitting/ensemble-samplers/emcee-init.sl delete mode 100644 src/fitting/ensemble-samplers/emcee-io.sl delete mode 100644 src/fitting/ensemble-samplers/emcee-moves.sl delete mode 100644 src/fitting/ensemble-samplers/emcee-mpi.sl diff --git a/src/fitting/ensemble-samplers/emcee-init.sl b/src/fitting/ensemble-samplers/emcee-init.sl deleted file mode 100644 index 0895ceab..00000000 --- a/src/fitting/ensemble-samplers/emcee-init.sl +++ /dev/null @@ -1,95 +0,0 @@ -% -*- mode: slang; mode: fold; -*- - -require("rand"); - -% WALKER INIT FUNCTIONS -% Allow different functions for initializing walkers. Follows the same idea -% as for the move steps. -% -% Distribution is only done by the master process, so we don't have to care -% about the random numbers. -% -% Init function takes an initialized walker array, and the fit handle -% setup function must be called emcee_init_ - -%{{{% helpers -private define __init_globals () { return struct_combine( - struct { - name = "unspecified init", - __f = NULL, - }, - __qualifiers()); -} - -%}}}% - -%{{{% Uniform initialization function -% pick random parameter values within the boundaries -private define init_parameter_walker_uniform (init, walkers, fit_handle) { - variable i; - variable par = __parameters(fit_handle.object); - variable num_p = length(par.value); - - % throw an error on unspecified bounds - if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) - throw UsageError, "Some parameters have unspecified parameter ranges"; - - _for i (0, length(walkers)-1, 1) - walkers[i] = rand_uniform(num_p)*(par.max-par.min)+par.min; -} -public define emcee_init_uniform () { - if (qualifier_exists("help")) { - help("emcee_init_uniform"); - return NULL; - } - - variable defaults = __init_globals(; - name="uniform init", - ); - return struct { @defaults, @__qualifiers(), __f=&init_parameter_walker_uniform }; -} -%}}}% - -%{{{% Sphere initialization function with exponential decresing probability -% pick random parameters from gauss((x-x0 -private define init_parameter_walker_gauss_sphere (init, walkers, fit_handle) { - variable i,j,w; - variable par = __parameters(fit_handle.object); - variable num_p = length(par.value); - variable sigma = qualifier("sigma", 10); % default to p-pmin = 10 sigma (pmax-p = 10 sigma) if p-pmin>(<)pmax-p - variable relative = qualifier_exists("relative") || - not qualifier_exists("sigma") || - not (typeof(sigma) == Array_Type); % imply relative if sigma is not given or not given as arrays - - % throw an error on unspecified bounds - if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) - throw UsageError, "Some parameters have unspecified parameter ranges"; - - variable s_par; - variable s_len = length(sigma); - if (Array_Type == typeof(sigma) && s_len != num_p) - throw UsageError, sprintf("Sigma array length (%d) does not match free parameters (%d)", length(sigma), num_p); - _for i (0, length(walkers)-1, 1) { - if (relative) - s_par = _min(par.value-par.min, par.max-par.value)/sigma; - else - s_par = sigma; - walkers[i] = rand_gauss(1, num_p)*s_par+par.value; - w = where(walkers[i]par.max); - walkers[i][w] = par.max[w]; - } -} -public define emcee_init_gauss () { - if (qualifier_exists("help")) { - help("emcee_init_gauss"); - return NULL; - } - - variable defaults = __init_globals(; - name="gauss init", - ); - return struct { @defaults, @__qualifiers(), __f=&init_parameter_walker_gauss_sphere }; -} -%}}}% diff --git a/src/fitting/ensemble-samplers/emcee-io.sl b/src/fitting/ensemble-samplers/emcee-io.sl deleted file mode 100644 index 0bd8a35b..00000000 --- a/src/fitting/ensemble-samplers/emcee-io.sl +++ /dev/null @@ -1,290 +0,0 @@ -% -*- mode: slang; mode: fold; -*- % - -% EMCEE INPUT OUTPUT ROUTINES -% Same structure as moves. Routines for output and input are describeed -% here. - -%{{{% helpers -private define __io_globals () { - return struct { - name = "unspecified io", - __f_create = NULL, % create function, takes 'io-object', 'filename', 'fit_handle', 'total walkers' - __f_open = NULL, % open function, takes 'io-object', 'filename', 'initialized walkers'. - __f_read = NULL, % same as open but is used for seting the walkers, requires less consitency - __f_write = NULL, % write to file, takes 'io-object', 'walkers array', 'update array', 'statistic array' - __f_finalize = NULL, % finalizes output, takes 'io-object', 'current number steps', 'walker per parameter', 'number parameter' - __f_close = NULL, % close any handles if necessary, takes 'io-object' - handle = NULL, % io access (usually file pointer) - cycle = 1, % number of steps to perform before write - @__qualifiers() }; -} -%}}}% - -%{{{% FITS input output routines -private define emcee_init_chain_fits (io, filename, fit_handle, total_walkers) { - % write ensemble evolution to fits file - variable init_values; - variable data_info; - variable par_names; - variable i; - list_data(&data_info); - variable par = __parameters(fit_handle.object); - io.handle = fits_open_file(filename, "c"); - - % write first table - par_names = array_map(String_Type, &get_struct_field, get_params(), "name")[par.index-1]; - fits_create_binary_table(io.handle, "PARAMETERS", num_free_params(), - ["FREE_PAR", "FREE_PAR_NAME"], - ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, par_names)))], - [" parameter indices", " parameter names"]); - fits_update_key(io.handle, "MODEL", get_fit_fun(), "model function"); - fits_update_key(io.handle, "SLOPPY", 0, " sloppy level"); - array_map(&fits_write_comment, io.handle, strchop(data_info, '\n', 0)); - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR"), 1, 1, par.index[array_sort(par.index)])) % sort here, so at least they are in index order - throw IOError; - if (_fits_write_col(io.handle, fits_get_colnum(io.handle, "FREE_PAR_NAME"), 1, 1, par_names)) - throw IOError; - - % write second table - fits_create_binary_table(io.handle, "MCMCCHAIN", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], - ["D", "J", ["D"][par.index*0]], - [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); - fits_update_key(io.handle, "NWALKERS", -1, " Number of walkers per free parameter"); - fits_update_key(io.handle, "NFREEPAR", -1, " Number of free parameters"); - fits_update_key(io.handle, "NSTEPS", -1, " Numer of iteration steps done"); - - % write third table - fits_create_binary_table(io.handle, "CHAINSTATS", 0, - ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], - [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); - fits_update_key(io.handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); - - % move back to chain table - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); - - io.num_steps = 0; - io.sloppy = 0; - - () = _fits_get_rowsize(io.handle, &(io.cycle)); - io.cycle = io.cycle/total_walkers; - if (io.cycle < 1) - io.cycle = 1; -} - -private define emcee_open_chain_fits (io, filename, fit_handle, walkers) { - io.handle = fits_open_file(filename+"[PARAMETERS]", "w"); - - if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { - fits_close_file(io.handle); - io.handle = NULL; - throw IsisError, "Current model and chain model do not match"; - } - - variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab, "free_par")) { - fits_close_file(io.handle); - io.handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - variable par = __parameters(fit_handle.object); - if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != par.index[array_sort(par.index)])) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, "Free parameters and chain parameters differ"; - } - - variable fsloppy = fits_read_key(io.handle, "SLOPPY"); - io.sloppy = (fsloppy > io.sloppy) ? fsloppy : io.sloppy; % largest sloppyness - fits_update_key(io.handle, "SLOPPY", io.sloppy); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - if ((fits_read_key(io.handle, "STATISTIC") != get_fit_statistic()) && (io.sloppy<1)) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, "Current fit statistic and chain fit statistic differ, increase sloppy level to continue anyway"; - } - fits_update_key(io.handle, "STATISTIC", get_fit_statistic()); - - if (_fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(io.handle); - io.handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - variable file_nw = fits_read_key(io.handle, "NWALKERS")*fits_read_key(io.handle, "NFREEPAR"); - if (file_nw != length(walkers)) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, sprintf("Unable to continue chain with %d walkers with chain with %d walkers", file_nw, length(walkers)); - } - - tab = fits_read_table(io.handle); - variable i,j; - variable names = get_struct_field_names(tab); - variable l = length(names)-2; - _for j (0, length(walkers)-1, 1) - _for i (0, l-1, 1) - walkers[j][i] = get_struct_field(tab, names[i+2])[-length(walkers)+j]; - - () = _fits_get_rowsize(io.handle, &(io.cycle)); - io.cycle = io.cycle/length(walkers); - if (io.cycle < 1) - io.cycle = 1; - - io.num_steps = fits_get_num_rows(io.handle); -} - - -private define emcee_write_init_step (io, fit_handle, init_walkers, init_update, init_stat){ - variable par = __parameters(fit_handle.object); - fits_create_binary_table(io.handle, "INITWALKER", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], - ["D", "J", ["D"][par.index*0]], - [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "INITWALKER", 0); - variable tmp, update; - variable npar = length(par.index); - variable nwalkers = length(init_walkers); - fits_update_key(io.handle, "NFREEPAR", npar, " Number of free parameters"); - fits_update_key(io.handle, "NWALKERS", nwalkers/npar, " Number of walkers per free parameter"); - - variable i,j; - _for j (0, npar-1, 1) { - tmp = Double_Type[nwalkers]; - _for i (0, nwalkers-1, 1) - tmp[i] = init_walkers[i][j]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", par.index[j])), 1, 1, tmp); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), 1, 1, init_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), 1, 1, init_update); - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); -} - - -private define emcee_read_chain_fits (io, filename, fit_handle, walkers) { - io.handle = fits_open_file(filename+"[PARAMETERS]", "r"); - - if (fits_read_key(io.handle, "MODEL") != get_fit_fun()) { - fits_close_file(io.handle); - io.handle = NULL; - throw IsisError, "Current model and chain model do not match"; - } - - variable tab = fits_read_table(io.handle); - ifnot (struct_field_exists(tab, "free_par")) { - fits_close_file(io.handle); - io.handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - variable par = __parameters(fit_handle.object); - if ((length(tab.free_par) != num_free_params()) || any(tab.free_par != par.index[array_sort(par.index)])) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, "Free parameters and chain parameters differ"; - } - - variable file_nw = fits_read_key(io.handle, "NWALKERS")*fits_read_key(io.handle, "NFREEPAR"); - if (file_nw != length(walkers)) { - fits_close_file(io.handle); - io.handle = NULL; - throw UsageError, sprintf("Unable to set chain with %d walkers from file with %d walkers", file_nw, length(walkers)); - } - - tab = fits_read_table(io.handle); - variable i,j; - variable names = get_struct_field_names(tab); - variable l = length(names)-2; - _for j (0, length(walkers)-1, 1) - _for i (0, l-1, 1) - walkers[j][i] = get_struct_field(tab, names[i+2])[-length(walkers)+j]; -} - -private define emcee_write_chain_fits (io, fit_handle, walkers_cycle, update_cycle, stat_cycle) { - variable tmp, update; - variable par = __parameters(fit_handle.object); - variable npar = length(par.index); - variable steps_walkers = length(walkers_cycle); % total_walkers*steps_per_cycle - variable i,j; - variable collen = fits_get_num_rows(io.handle); - _for j (0, npar-1, 1) { - tmp = Double_Type[steps_walkers]; - _for i (0, steps_walkers-1, 1) - tmp[i] = walkers_cycle[i][j]; - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, sprintf("CHAINS%d", par.index[j])), collen+1, 1, tmp); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FITSTAT"), collen+1, 1, stat_cycle); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "UPDATE"), collen+1, 1, update_cycle); -} - -private define emcee_finalize_chain_fits (io, steps, walker_per_parameter, number_parameter, fit_handle) { - variable tmp, tmp2; - variable reread; - variable total_walkers = walker_per_parameter*number_parameter; - variable collen = fits_get_num_rows(io.handle); % length of chain - variable all_steps = fits_read_key(io.handle, "NSTEPS"); - all_steps = (all_steps<0) ? steps : all_steps + steps; - fits_update_key(io.handle, "NSTEPS", all_steps); - fits_update_key(io.handle, "NWALKERS", walker_per_parameter); - fits_update_key(io.handle, "NFREEPAR", number_parameter); - - () = _fits_read_cols(io.handle, [fits_get_colnum(io.handle, "UPDATE"), fits_get_colnum(io.handle, "FITSTAT")], - io.num_steps+1, collen-io.num_steps, &reread); - () = _fits_movnam_hdu(io.handle, _FITS_BINARY_TBL, "CHAINSTATS", 0); - collen = fits_get_num_rows(io.handle); % read length of chain summary - variable j; - variable frac_update = Double_Type[steps]; - variable min_stat = Double_Type[steps]; - variable med_stat = Double_Type[steps]; - variable max_stat = Double_Type[steps]; - variable step_stat = reread[1]; - variable step_update = reread[0]; - - _for j (0, steps-1, 1) { - frac_update[j] = sum(step_update[[0:total_walkers-1]+j*total_walkers])/total_walkers; - tmp2 = step_stat[[0:total_walkers-1]+j*total_walkers]; - min_stat[j] = min(tmp2); - max_stat[j] = max(tmp2); - med_stat[j] = median(tmp2); - } - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "FRAC_UPDATE"), collen+1, 1, frac_update); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MIN_STAT"), collen+1, 1, min_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MED_STAT"), collen+1, 1, med_stat); - () = _fits_write_col(io.handle, fits_get_colnum(io.handle, "MAX_STAT"), collen+1, 1, max_stat); -} - -private define emcee_close_chain_fits (io) { - fits_close_file(io.handle); - io.handle = NULL; -} - -public define emcee_io_fits () { - if (qualifier_exists("help")) { - help("emcee_init_uniform"); - return NULL; - } - variable settings = (_NARGS==1) ? () : NULL; - variable defaults = __io_globals(; - name = "io fits", - num_steps = NULL, - sloppy = 0, - ); - return struct { @defaults, @__qualifiers(), - __f_create=&emcee_init_chain_fits, - __f_open=&emcee_open_chain_fits, - __f_write=&emcee_write_chain_fits, - __f_read=&emcee_read_chain_fits, - __f_finalize=&emcee_finalize_chain_fits, - __f_close=&emcee_close_chain_fits, - __f_initwrite=&emcee_write_init_step, - }; -} - -%}}}% diff --git a/src/fitting/ensemble-samplers/emcee-moves.sl b/src/fitting/ensemble-samplers/emcee-moves.sl deleted file mode 100644 index 145c5027..00000000 --- a/src/fitting/ensemble-samplers/emcee-moves.sl +++ /dev/null @@ -1,78 +0,0 @@ -% -*- mode: slang; mode: fold; -*- % - -% DEFINED MOVES FOR THE EMCEE ENSEMBLE WALKER -% Walker moves should be defined as functions where the function returns a -% structure suitable to describe the step algorithm. For adjustment the -% function should combine the qualifiers with the default structure. The -% move function MUST evaluate the fit model by using the fit handle and -% return new position and statistics. -% -% Besides additional arguments given in the structure the algorithm must -% process the fit object, current position, pivot position and an array of -% random numbers. -% -% For constructing the structure the __move_globals function is convenient. -% The constructur function must be names emcee_move_. -% -% For an example see the STRETCH_MOVE -%{{{% Helpers for move steps -private define __move_globals () { - return struct_combine ( struct { - name = "unspecified move", % name of the step function to be used in output files - move = NULL, % the move function itself, NULL == Error - nrands = 0, % number of required random numbers for each step - }, __qualifiers); -} -%}}}% - -%{{{% THE STRETCH MOVE AS DEFINED IN FOREMAN & MACKEY -% define inverse cumulative distribution function for generating -% random numbers following 1/z^2 when z in [1/a, a] -% TODO: should make this an adjustable thing -private define inverse_cdf (u, a) { - return (u*(a-1.)+1.)^2./a; -} - -% stretch move as of Goodman & Weare 2010 -% Move must evaluate the fit function -private define stretch_move (move, fit_object, x, x_j, u, prev_stat) { - variable z = inverse_cdf(u[0], move.a); - variable ystat, ret_stat = prev_stat; - variable y; % step proposition - variable x_t1 = x; % resulting step - variable update = 0; % update indicator - - % calculate the new position (utilize array operations) - y = x_j + z*(x-x_j); - - % try evaluating, if out of bounds, does nothing TODO: This is biasing the result, check how to do this correct - try { - % evaluate fit function for 'y' - ystat = fit_object.eval_statistic(y;nocopy); - - % caluculate if we accept the step based on the statistics of the - % model. We assume that the statistic is given as -2 log likelihood - if(log(u[1]) <= (log(z)*(fit_object.num_vary-1)+(prev_stat-ystat)/2.)) { - ret_stat = ystat; - x_t1 = y; - update = 1; - } - } catch IsisError; - - % return new walker position, update, new statistic - return (x_t1, update, ret_stat); -} - -public define emcee_move_stretch () { - if (qualifier_exists("help")) { - help("emcee_init_uniform"); - return NULL; - } - variable defaults = __move_globals(; - name="stretch move", - nrands=2, - a=2. % move scaling - ); - return struct { @defaults, @__qualifiers(), __f=&stretch_move }; % combine settings -} -%}}}% diff --git a/src/fitting/ensemble-samplers/emcee-mpi.sl b/src/fitting/ensemble-samplers/emcee-mpi.sl deleted file mode 100644 index a0381dac..00000000 --- a/src/fitting/ensemble-samplers/emcee-mpi.sl +++ /dev/null @@ -1,683 +0,0 @@ -% -*- mode: slang; mode: fold; -*- % - -require("rand"); - -%%% START FROM SCRATCH... well sort of - -% Implementation of the emcee hammer () with the principle idea -% that multiple nodes (engines) are responsible for a part of the -% walkers. For efficiency the walkers are distributed equally to -% each engine. To keep the statistical properties the walkers are -% seperated in to two groups (see ref) where the next step of -% group one depends on the current position of group two and the -% next step of group two depends on the new position of group one. -% For most efficiency we try to reduce the required computations -% to the minimum possible such that the model evaluation plus -% the necessary communication is everything that happens in the -% main loop. -% -% To prevent any side effects from the PRNG we let the master -% calculate enough for each step and distribute them to the -% slaves. - -private define emceeEvalFunction () %{{{ -{ - variable args = __pop_list(_NARGS); - variable handle = args[0]; - variable fun = handle.function; - - return @fun(__push_list(args[[1:]]), handle.info;; handle.options); -} -%}}} - -private define emceeAddFunction (name, function, list) %{{{ -{ - list[strtrim(name)] = struct { - function = function, - userdata = qualifier("userdata"), - data = qualifier("data"); - help = qualifier("help", name), - }; -} -%}}} - -private define emceeSetFunction (functionString, list, pointer) %{{{ -{ - variable s = strchop(functionString, ';', 0); - variable name = strtrim(s[0]); - variable options = NULL; - - if (length(s)>1) - options = eval(sprintf("struct { %s }", s[1])); - - ifnot (assoc_key_exists(list, name)) - throw UndefinedNameError, sprintf("'%s' is not registere"); - - variable function = struct { - eval = &emceeEvalFunction, - @(list[name]), - options = options, - functionString = functionString, - }; - - @pointer = function; -} -%}}} - -private define emceeGetFunction (pointer) %{{{ -{ - return pointer.functionString; -} -%}}} - -%{{{ move functions - -% access to the engine. Can set how many random numbers are required - -private variable Emcee_Move_List = Assoc_Type[Struct_Type]; -define add_emcee_move (name, function, nrand, list) %{{{ -{ - variable data = struct { nrand = nrand }; - variable userdata = qualifier("userdata"); - - emceeAddFunction(__push_list(args), Emcee_Move_List; - data=data, userdata=userdata); -} -%}}} - -private variable Emcee_Move; -define set_emcee_move (moveString) %{{{ -{ - emceeSetFunction(moveString, Emcee_Move_List, &Emcee_Move); -} -%}}} - -define get_emcee_move () %{{{ -{ - return emceeGetFunction(&emcee_Move); -} -%}}} - -%}}} - -%{{{ file functions - -% have acces to the engine. Gets the cycle number - -private variable Emcee_Write_List = Assoc_Type[Struct_Type]; -private variable Emcee_Read_List = Assoc_Type[Struct_Type]; -define add_emcee_write( - -%}}} - -private define emceeSetupEngine (id, numberEngines, totalNumberWalkers) %{{{ -{ - variable engine = struct { - id = id, % the engines id (0 is the master) - numberEngines = numberEngines, % total number of engines - - walkers, % array of current walker position for this engine - pivots, % array of current pivot positions, should not change for one loop! - rolls, % array of required random numbers - update, % array updater track - stat, % array step statistic - totalNumberWalkers = totalNumberWalkers, % number of all walkers - totalNumberSet1, % number walkers in set 1 numWalkers/2 - totalNumberSet2, % number walkers in set 2 numWalkers-numWalkers/2 - - numberWalkers, % number walkers handled by this engine - numberWalkersSet1, % number walkers in set 1 belonging to this engine - numberWalkersSet2, % number walkers in set 2 belonging to this engine - - firstWalkerSet1, % index of first walker in set 1 handle by this engine - firstWalkerSet2, % index of first walker in set 2 handle by this engine - - % set = 0, % indicates which set to update (1: first set, 2: second set) - fit = open_fit(), % the fit object, will fail if no model is loaded - numberParameters = num_free_params(), % number of fit parameters - numberWalkerRolls, % number of random numbers each walker uses - }; - - engine.totalNumberSet1 = totalNumberWalkers/2; - engine.totalNumberSet2 = totalNumberWalkers - engine.totalNumberSet1; - - % divide walkers evenly (remainders are given to highest ids) - engine.numberWalkers = engine.totalNumberWalkers/engine.numberEngines - + 1 - ((engine.totalNumberWalkers mod engine.numberEngines) <= engine.id); - engine.numberWalkersSet1 = engine.numberWalkers/2; - engine.numberWalkersSet2 = engine.numberWalkers-engine.numberWalkersSet1; - - % master stores all values - variable size = id ? engine.numberWalkers : engine.totalNumberWalkers; - variable j; - engine.walkers = Array_Type[size]; - engine.pivots = Array_Type[size]; - engine.rolls = Double_Type[size]; - engine.update = Int_Type[size]; - engine.stat = Double_Type[size]; - - _for j (0, size-1) { - engine.walkers[j] = Double_Type[engine.numberParameters]; - engine.pivots[j] = Double_Type[engine.numberParameters]; - } - - return engine; -} -%}}} - -private define emceeSetupGears (engine) %{{{ -{ - variable gears = struct { - initialize, % initialize function - step, % step function - random_uniform, % uniform random number generator (float) - random_pick, % uniform random number generator (int) - read, % read handle - write, % write handle (may be null, which means that read handle is used) - }; - - gears.move = NULL; - - if (0 == engine.id) { - % load or init, open file/use open file - } - - return gears; -} -%}}} - -private define emceeReleaseWalkers (engine) %{{{ -{ -} -%}}} - -private define emceeMoveWalkers (engine) %{{{ -{ -} -%}}} - -private define emceeCatchWalkers (engine) %{{{ -{ -} -%}}} - -%{{{ some helpers - -% get walkers per node -private define distributeWalkers (numNodes, numWalkers) %{{{ -{ - variable walkersPerNode = Int_Type[numNodes]; - variable n = numWalkers/numNodes + 1; - variable missing = numNodes - (numWalkers mod numNodes); - variable set1PerNode, set2PerNode; % TODO: does not have to be array - - walkersPerNode[[0:numNodes-missing-1]] = n; - walkersPerNode[[numNodes-missing:numNodes-1]] = n-1; - - set2PerNode = walkersPerNode >> 1; % half walkers handled by each node per set - set1PerNode = walkersPerNode - set2PerNode; - - return set1PerNode, set2PerNode; -} -%}}} - -% get walkers handled up to this node -private define getHandledNumberWalkers (set1PerNode, set2PerNode) %{{{ -{ - variable l = length(set1PerNode); - variable set1Handled = Int_Type[l]; - variable set2Handled = Int_Type[l]; - variable i, c1 = 0, c2 = 0; - - _for i (0, l-1, 1) { - set1Handled[i] = c1; - set2Handled[i] = c2; - c1 += set1Handled[i]; - c2 += set2Handled[i]; - } - - return set1Handled, set2Handled; -} -%}}} - -% string similar to statistic or fit method definitions "name; option1=foo, option2=bar" -private define emceeInterfaceSetup (execString, type) %{{{ -{ - if (NULL == execString || typeof(execString) != String_Type) - throw UsageError, sprintf("Unable to parse qualifier for %s", type); - - variable s = strchop(execString, ';', 0); - - if (length(s) > 2) - throw UsageError, sprintf("Failed parsing option '%s'", execString); - - variable fname = strtrim(s[0]); - variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); - - if (NULL == f) - throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); - - variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; - - return (@f)(;; opt); -} -%}}} - -private define emceeMPISetupNode (node, numNodes, numTotalWalkers, numRands) %{{{ -{ - variable set1PerNode, set2PerNode; - variable set1Handled, set2Handled; - variable set2Len = numTotalWwalkers >> 1; % split walker in two sets - variable set1Len = numTotalWalkers - set2Len; - variable j; - - (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); - (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); - - variable N = struct { - node = node, % this nodes number (0 is master) - num_nodes = num_nodes, % total number of nodes - walkers, % current walker position - pivots, % current pivot positions, should not change for one loop! - randoms, % required randoms - update, % updater track - stat, % step statistic - total_walkers, % number of totals walkers - set1 = set1_len, % walkers for 1st update - set2 = set2_len, % walkers set for 2nd update - set1_per_node = set1_per_node, % walkers of set1 processed per node - set2_per_node = set2_per_node, % walkers of set2 processed per node - set1_handled = set1_handled, % walkers of set1 handled by all previous nodes - set2_handled = set2_handled, % walkers of set2 handled by all previous nodes - set = 0, % indicates which set to update (1: first set, 2: second set) - fit = open_fit(), % the fit object, will fail if no model is loaded - num_pars = num_free_params(), % number of fit parameters - num_rands = nrands, % number of random numbers per walker - }; - - if (node) { % setup for slaves - N.walkers = Array_Type[set1_per_node[node]+set2_per_node[node]]; - N.pivots = Array_Type[set1_per_node[node]+set2_per_node[node]]; - N.update = Int_Type[set1_per_node[node]+set2_per_node[node]]; - N.stat = Double_Type[set1_per_node[node]+set2_per_node[node]]; - N.randoms = Double_Type[(set1_per_node[node]+set2_per_node[node])*nrands]; - } else { % setup for master - N.walkers = Array_Type[total_walkers]; - N.pivots = Array_Type[total_walkers]; - N.update = Int_Type[total_walkers]; - N.stat = Double_Type[total_walkers]; - N.randoms = Double_Type[total_walkers*nrands]; - } - - _for j (0, length(N.walkers)-1, 1) { - N.walkers[j] = Double_Type[N.num_pars]; - N.pivots[j] = Double_Type[N.num_pars]; - } - - return N; -} -%}}} - -%}}} - -%{{{ mpi functions - -private define release_walkers_mpi_master (node, num_nodes, N) -{ -#ifexists rcl_mpi_init - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - - if (NULL == urand) - throw InternalError, "Missing random generator"; - - if (NULL == upick) - throw InternalError, "Missing random generator"; - - % select pivot walkers for current set - variable set_len, complement_len, len_offset; - variable set_per_node, set_handled; - variable pick; - - if (1 == N.set) { % - set_len = N.set1; - complement_len = N.set2; - len_offset = 0; - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - } else if (2 == N.set) { - set_len = N.set2; - complement_len = N.set1; - len_offset = N.set1; - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - } - pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set - - variable this_walkers, this_pivots, this_randoms; - variable i,j; - - % set current walkers and complement pivots - _for i (0, set_len-1, 1) - N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points - - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] - = (@urand)(set_len*N.num_rands); % get new random numbers for current set - - _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data - % set the walkers for node i - this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; - % pick the pivots for node i - this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; - % set the randoms for node i - this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1] - +(set_handled[i]+len_offset)*N.num_rands]; - - _for j (0, set_len-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], - length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], - length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 - } - - () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1] - +len_offset*N.num_rands], - set_len*N.num_rands, i, 2); % send random numbers with tag 2 - } -#endif -} - -private define release_walkers_mpi_slave (node, num_nodes, N) -{ -#ifexists rcl_mpi_init - variable set_len, len_offset; - variable this_randoms; - - if (1 == N.set) { - set_len = N.set1_per_node; - len_offset = 0; - } else if (2 == N.set) { - set_len = N.set2_per_node; - len_offset = N.set1_per_node[node]; - } - - variable j; - this_randoms = Double_Type[set_len*N.num_rands]; - _for j (0, set_len-1, 1) { - () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], - length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], - length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) - } - () = rcl_mpi_org_recv_double(this_randoms, - set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) - - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; -#endif -} - -private define release_walkers_mpi (node, num_nodes, N) { - % distribute walkers, pivots and random numbers - - if (node) - release_walkers_mpi_slave(node, num_nodes, N); - else - release_walkers_mpi_master(node, num_nodes, N;; __qualifiers()); -} - -private define catch_walkers_mpi_master (node, num_nodes, N) -{ -#ifexists rcl_mpi_init - variable set_len, len_offset, set_per_node, set_handled; - - if (1 == N.set) { - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - len_offset = 0; - } else if (2 == N.set) { - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - len_offset = N.set1; - } - - variable i,j; - variable this_stat, this_update; % we have to use intermediate storage, - % slang creates a copy of an array when addressed by index - - _for i (1, num_nodes-1, 1) { - this_stat = Double_Type[set_per_node[i]]; - this_update = Int_Type[set_per_node[i]]; - - _for j (0, set_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], - length(N.walkers[0]), i, i); - - () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); - () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); - - N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; - N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; - } -#endif -} - -private define catch_walkers_mpi_slave (node, num_nodes, N) -{ -#ifexists rcl_mpi_init - variable set_len, len_offset; - - if (1 == N.set) { - set_len = N.set1_per_node[node]; - len_offset = 0; - } else if (2 == N.set) { - set_len = N.set2_per_node[node]; - len_offset = N.set1_per_node[node]; - } - - variable i; - _for i (0, set_len-1, 1) - () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], - length(N.walkers[len_offset]), 0, node); - - () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); - () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); -#endif -} - -private define catch_walkers_mpi (node, num_nodes, N) -{ - if (node) - catch_walkers_mpi_slave(node, num_nodes, N); - else - catch_walkers_mpi_master(node, num_nodes, N); -} - -private define emceeMPIInitFile (emceeT) %{{{ -{ - variable init = qualifier("init", NULL); - variable cont = qualifier("continue", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write", NULL); - variable load_hook = qualifier("read", NULL); - variable load = qualifier("load", NULL); - - % read the settings, if one of them is NULL help was called - io = emcee_call_setup_fun(io, "io"); - load_hook = emcee_call_setup_fun(load_hook, "io"); - init = emcee_call_setup_fun(init, "init"); - if (NULL == io || NULL == load_hook || NULL == init) return; - - ifnot (node) { % master only - if (cont != NULL) - io.__f_open(cont, emceeT.fit, emcee.walkers); - else if (load != NULL) { - load_hook.__f_read(load, emceeT.walkers); - load_hook.__f_close(); - io.__f_create(output, emceeT.fit, emceeT.numTotalWalkers); - } else { - io.__f_create(output, emceeT.fit, total_walkers); - init.__f(emceeT.walkers, emceeT.fit); - } - } -} -%}}} - -private define emceeMPIInit () %{{{ -{ - variable move = qualifier("move", NULL); - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - - variable node, numNodes; -#ifexists rcl_mpi_init - node = rcl_mpi_init(); - numNodes = rcl_mpi_numtasks(); - numNodes = (numNodes<1) ? 1 : numNodes; - rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! - % BUT: since we have a memory leak from the mpi module anyway we accept it currently ... -#else - node = 0; - num_nodes = 1; -#endif - - move = emceeInterfaceSetup(move, "move"); - - % This is the mpi_emcee handle for this node! - % It contains all relevant data to do the calculation - variable emceeHandle = setup_node(node, num_nodes, total_walkers, move.nrands); - -} -%}}} - -%}}}% - -private define emcee_mpi (walker_per_par, number_par, steps) { - variable total_walkers = walker_per_par*number_par; - - variable init = qualifier("init", NULL); - variable move = qualifier("move", NULL); - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - variable cont = qualifier("continue", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write", NULL); - variable load_hook = qualifier("read", NULL); - variable load = qualifier("load", NULL); - - variable node, num_nodes; - (node, num_nodes) = emceeMPIInit(); - - move = emcee_call_setup_fun(move, "move"); - variable this = setup_node(node, num_nodes, total_walkers, move.nrands); - % 'this' is the mpi_emcee handle for this node! - % It contains all relevant data to do the calculation - - % read the settings, if one of them is NULL help was called - io = emcee_call_setup_fun(io, "io"); - load_hook = emcee_call_setup_fun(load_hook, "io"); - init = emcee_call_setup_fun(init, "init"); - if (NULL == io || NULL == load_hook || NULL == init) return; - - ifnot (node) { % master only - if (cont != NULL) - io.__f_open(cont, this.fit, this.walkers); - else if (load != NULL) { - load_hook.__f_read(load, this.walkers); - load_hook.__f_close(); - io.__f_create(output, this.fit, total_walkers); - } else { - io.__f_create(output, this.fit, total_walkers); - init.__f(this.walkers, this.fit); - } - } - - variable collector_length; % the collector so we can skip turns before writing to disk - variable walker_cycle; - variable update_cycle; - variable stat_cycle; - variable cycle_step = 0; - - % setup space - variable j; - if (node) % slave - collector_length = 0; - else % master - collector_length = length(this.walkers)*io.cycle; - - walker_cycle = Array_Type[collector_length]; - update_cycle = Int_Type[collector_length]; - stat_cycle = Double_Type[collector_length]; - - if (NULL == urand || NULL == upick) - throw InternalError, "Missing random number generator"; - - % evaluate the model at the walker positions to get the statistics - % and if not continuing a chain, write them out - ifnot (node) { - _for j (0, length(this.walkers)-1, 1) { - this.stat[j] = this.fit.eval_statistic(this.walkers[j]); - this.update[j] = 1; - } - if (cont == NULL) - io.__f_initwrite(this.fit, this.walkers, this.update, this.stat); - } - - % the main loop where the magic happens - variable s, walker, update, stat, set, set_len, len_offset; - _for s (0, steps-1, 1) { - cycle_step = s mod io.cycle; - _for set (1, 2, 1) { - this.set = set; - - release_walkers_mpi(node, - num_nodes, - this; upick=upick, urand=urand); % release walkers to freedom ... - - if (1 == set) { - set_len = this.set1_per_node[node]; - len_offset = 0; - } else if (2 == set) { - set_len = this.set2_per_node[node]; - len_offset = this.set1_per_node[node]; - } - - _for j (0, set_len-1, 1) { % ... let them move ... - (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], - this.pivots[j+len_offset], - this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], - this.stat[j+len_offset]); - this.walkers[j+len_offset] = walker; - this.update[j+len_offset] = update; - this.stat[j+len_offset] = stat; - } - - catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! - } - - % if cycle end is reached write the chain - ifnot (node) { % master only - if (not cycle_step && s > 0) - io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); - _for j (0, length(this.walkers)-1, 1) { - walker_cycle[j+cycle_step*length(this.walkers)] = @(this.walkers[j]); - update_cycle[j+cycle_step*length(this.walkers)] = this.update[j]; - stat_cycle[j+cycle_step*length(this.walkers)] = this.stat[j]; - } - } - } - - % we might have unwritten steps left, so better write them here - cycle_step++; % the last step is never written in the loop, we have to deal with it here - ifnot (node) { - io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], - update_cycle[[:cycle_step*length(this.walkers)-1]], - stat_cycle[[:cycle_step*length(this.walkers)-1]]); - } - - % and finally call the finalizing function - ifnot (node) { % master only - io.__f_finalize(steps, walker_per_par, number_par, this.fit); - io.__f_close(); - } -} -%}}}% diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 17bd8e27..42acc4a4 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1,457 +1,1414 @@ % -*- mode: slang; mode: fold; -*- % require("rand"); - -%{{{% some helpers ... -% get walkers per node -private define distribute_walkers (nodes, number_walkers) { - variable walkers_per_node = Int_Type[nodes]; - variable n = number_walkers/nodes + 1; - variable missing = nodes - (number_walkers mod nodes); - variable set1_per_node, set2_per_node; +require("fork"); +require("socket"); + +% Implementation of the emcee hammer () with the principle idea +% that multiple nodes (engines) are responsible for a part of the +% walkers. For efficiency the walkers are distributed equally to +% each engine. To keep the statistical properties the walkers are +% seperated in to two groups (see ref) where the next step of +% group one depends on the current position of group two and the +% next step of group two depends on the new position of group one. +% For most efficiency we try to reduce the required computations +% to the minimum possible such that the model evaluation plus +% the necessary communication is everything that happens in the +% main loop. +% +% To prevent any side effects from the PRNG we let the master +% calculate enough for each step and distribute them to the +% slaves. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +private variable EmceeInitRegister = Assoc_Type[Ref_Type, &NULL]; +private variable EmceeStepRegister = Assoc_Type[Ref_Type, &NULL]; +private variable EmceeFileRegister = Assoc_Type[Ref_Type, &NULL]; +private variable EmceeShipRegister = Assoc_Type[Ref_Type, &NULL]; + +% Engine and Leader %{{{ + +private variable EmceeEngine = struct { + % engine numbers + id, % engine id (master is 0) + numberEngines, % total number of engines + numberSteps, % total number of steps + + % arrays and length + walkers, % walker array for this engine (master has all) + pivots, % pivot array for this engine (master has all) + rolls, % random numbers required (master has all) + update, % update indicator array (master has all) + stat, % statistic array (master has all) + totalNumberWalkers, % total number of walkers + totalNumberSet1, % total number of walkers in set 1 + totalNumberSet2, % total number of walkers in set 2 + + % per engine walkers + numberWalkers, % number of walkers this engine handles + numberWalkersSet1, % number of walkers this engine handles in set 1 + numberWalkersSet2, % number of walkers this engine handles in set 2 + + % selected set + setOffset, % current set offset (for access in walker array) + setLength, % current set length + + % fit + fit, % fit object + numberParameters, % number of (free) parameters + + gears = NULL, % step, random generators + leader = NULL, % write buffers etc. +}; + +private variable EmceeLeader = struct { + walkersPerEngine, % number of walkers for each engine id + walkersPerSet1, % number of walkers for each engine id in set 1 + walkersPerSet2, % number of walkers for each engine id in set 2 + walkersPerSet, % selected set walkers + totalOffset, % start of ALL walkers in set + writeBuffer, % total write buffer array + inFile, % input file handle + outFile, % output file handle +}; + +private variable EmceeGears = struct { + upick, % function + urand, % function + + step, % step interface +}; + +private define emceeDrawSet (engine, set) %{{{ +{ + variable urand = engine.gears.urand; + variable upick = engine.gears.upick; + + variable totalNumberSet, + totalNumberComplement, + totalOffset; + + if (1 == set) { + totalNumberSet = engine.totalNumberSet1; + totalNumberComplement = engine.totalNumberSet2; + totalOffset = 0; + engine.setOffset = 0; + engine.setLength = engine.numberWalkersSet1; + if (0 == engine.id) { + engine.leader.walkersPerSet = engine.leader.walkersPerSet1; + engine.leader.totalOffset = totalOffset; + } + } else if (2 == set) { + totalNumberSet = engine.totalNumberSet2; + totalNumberComplement = engine.totalNumberSet1; + totalOffset = engine.totalNumberSet1; + engine.setOffset = engine.numberWalkersSet1; + engine.setLength = engine.numberWalkersSet2; + if (0 == engine.id) { + engine.leader.walkersPerSet = engine.leader.walkersPerSet2; + engine.leader.totalOffset = totalOffset; + } + } + + % master picks new pivots & randoms + if (0 == engine.id) { + variable pick = @upick(0, totalNumberComplement-1, totalNumberSet)+totalOffset; + variable i; + _for i (0, totalNumberSet-1) + engine.pivots[i+totalOffset] = @(engine.walkers[pick[i]]); + + variable numberRandoms = engine.gears.step.numberRandoms; + engine.rolls[[0:totalNumberSet*numberRandoms-1]+totalOffset*numberRandoms] + = @urand(totalNumberSet*numberRandoms); + } +} +%}}} - walkers_per_node[[0:nodes-missing-1]] = n; - walkers_per_node[[nodes-missing:nodes-1]] = n-1; +private define emceeSetupGears (engine, urand, upick, step) %{{{ +{ + variable gears = struct { @EmceeGears }; + gears.urand = urand; + gears.upick = upick; + gears.step = step; + + % here we can set the rolls + engine.rolls = Double_Type[length(engine.walkers)*step.numberRandoms]; + + engine.gears = gears; +} +%}}} + +private define emceeSetupWriteBuffer (leader, numberWalkers, numberSteps) %{{{ +{ + % buffer size should ideally be as large as the write routine wants + % but is limited by the maximum array size and must be at least + % as large as one iteration requires + variable size = min([[leader.outFile.cycle, numberSteps]*numberWalkers, INT_MAX-(INT_MAX mod numberWalkers)]); + variable writeBuffer = struct { + size = size, + cycle = size/numberWalkers, + walkers = Array_Type[size], + update = Double_Type[size], + stat = Double_Type[size], + }; - set2_per_node = walkers_per_node >> 1; % half walkers handled by each node per set - set1_per_node = walkers_per_node - set2_per_node; + variable i; + _for i (0, size-1) + writeBuffer.walkers[i] = Double_Type[num_free_params()]; - return set1_per_node, set2_per_node; + leader.writeBuffer = writeBuffer; } +%}}} -% get walkers handled up to this node -private define previous_number_walkers (set1_per_node, set2_per_node) { - variable l = length(set1_per_node); - variable set1_handled = Int_Type[l]; - variable set2_handled = Int_Type[l]; - variable i, c1 = 0, c2 = 0; +private define emceeSetupLeader (engine, inFile, outFile) %{{{ +{ + if (0 == engine.id) { + variable nEngines = engine.numberEngines; + variable id; + variable leader = @EmceeLeader; + leader.walkersPerEngine = Int_Type[nEngines]; + leader.walkersPerSet1 = Int_Type[nEngines]; + leader.walkersPerSet2 = Int_Type[nEngines]; + leader.inFile = inFile; + leader.outFile = outFile; + + emceeSetupWriteBuffer(leader, engine.totalNumberWalkers, engine.numberSteps); + + variable set1 = engine.totalNumberSet1; + variable set2 = engine.totalNumberSet2; + + _for id (0, engine.numberEngines-1) { + leader.walkersPerSet1[id] = set1/nEngines + ((set1 mod nEngines) > (nEngines-id-1)); + leader.walkersPerSet2[id] = set2/nEngines + ((set2 mod nEngines) > (nEngines-id-1)); + } - _for i (0, l-1, 1) { - set1_handled[i] = c1; - set2_handled[i] = c2; - c1 += set1_handled[i]; - c2 += set2_handled[i]; + engine.leader = leader; } +} +%}}} - return set1_handled, set2_handled; +private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ +{ + variable engine = @EmceeEngine; + ship.engine = engine; + + variable set1 = totalNumberWalkers/2; + variable set2 = totalNumberWalkers - set1; + engine.totalNumberSet1 = set1; + engine.totalNumberSet2 = set2; + engine.totalNumberWalkers = totalNumberWalkers; + engine.numberSteps = totalSteps; + + % set sail (get number of engines and set id) + ship.setSail(); + + % divide walkers evenly (remainders are given to highest ids) + engine.numberWalkersSet1 = set1/engine.numberEngines + + ((set1 mod engine.numberEngines) > (engine.numberEngines-engine.id-1)); + engine.numberWalkersSet2 = set2/engine.numberEngines + + ((set2 mod engine.numberEngines) > (engine.numberEngines-engine.id-1)); + engine.numberWalkers = engine.numberWalkersSet1 + engine.numberWalkersSet2; + engine.numberParameters = num_free_params(); + engine.fit = open_fit(); + + % master stores all values + variable size = engine.id ? engine.numberWalkers : totalNumberWalkers; + + engine.walkers = Array_Type[size]; + engine.pivots = Array_Type[size]; + % rolls can only be set up after step is known + % engine.rolls = Double_Type[size*engine.gears.step.numberRandoms]; + engine.update = Int_Type[size]; + engine.stat = Double_Type[size] + DOUBLE_MAX; + + variable j; + _for j (0, size-1) { + engine.walkers[j] = Double_Type[engine.numberParameters]; + engine.pivots[j] = Double_Type[engine.numberParameters]; + } } +%}}} +%}}} + +%{{{ Init interface +%!%+ +%\function{emcee--init} +%\synopsis{Set emcee initialization function} +%\usage{init="method;parameters";} +%\description +% The initialization method can be set with the function string +% "method;parameter" +% +% Available methods: +% uniform : Draw initial walker positions from a uniform distribution +% within the parameter ranges. +% +% gauss : Draw initial walker positions from a gaussian distribution +% within parameter ranges. +% ; sigma : [=10.] Sigma of the gauss function in terms of the +% parameter range. +% +% file : Load initial walkers from a valid chain file created by the +% emcee method +% ; filename : The file to load +% +%!%- +% 1: pick - get walkers from parameters and distribution or file +private variable EmceeInit = struct { + pick, % function + + filename, % the filename (if any) + + % private data +}; + +%{{{ Uniform initialization function +% pick random parameter values within the boundaries +private define emceeInitUniformPick (init, engine) %{{{ +{ + variable i; + variable par = __parameters(engine.fit.object); + variable numParameter = length(par.value); -private define emcee_call_setup_fun (exec_string, type) % string similar to statistic or fit method definitions "name; option1=foo, option2=bar" + % throw an error on unspecified bounds + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges, unable to set inital walkers"; + + _for i (0, engine.totalNumberWalkers-1) + engine.walkers[i] = rand_uniform(numParameter)*(par.max-par.min)+par.min; +} +%}}} +%}}} +private define emceeInitUniform () %{{{ { - if (NULL == exec_string || typeof(exec_string) != String_Type) - throw UsageError, sprintf("Unable to parse qualifier for %s", type); + variable init = struct { @EmceeInit }; + init.pick = &emceeInitUniformPick; + init.filename = NULL; - variable s = strchop(exec_string, ';', 0); + return init; +} +%}}} +EmceeInitRegister["uniform"] = &emceeInitUniform; - if (length(s) > 2) - throw UsageError, sprintf("Failed parsing option '%s'", exec_string); +%{{{ Gauss initialization function +private define rand_gauss_cut (sigma, v, bmin, bmax) %{{{ +{ + variable upper = Real(cerf((bmax-v)/sqrt(2.)/sigma)); + variable lower = Real(cerf((bmin-v)/sqrt(2.)/sigma)); - variable fname = strtrim(s[0]); - variable f = __get_reference(sprintf("emcee_%s_%s", type, fname)); + return sqrt(2)*erfinv(rand_uniform(length(v))*(upper-lower)+lower)*sigma+v; +} +%}}} - if (NULL == f) - throw UsageError, sprintf("Unknown function 'emcee_%s_%s'", type, fname); - - variable opt = length(s) == 2 ? eval(sprintf("struct { %s }", s[1])) : struct { @NULL }; +private define emceeInitGaussPick (init, engine) %{{{ +{ + variable i,w; + variable par = __parameters(engine.fit.object); + variable numParameter = length(par.value); - return (@f)(;; opt); + % throw an error on unspecified bounds + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges, unable to set initial walkers"; + + variable sigma = (par.max-par.min)/init.sigma; + _for i (0, engine.totalNumberWalkers-1) + engine.walkers[i] = rand_gauss_cut(sigma, par.value, par.min, par.max); } +%}}} +%}}} +private define emceeInitGauss () %{{{ +{ + variable init = struct { @EmceeInit, sigma }; + init.pick = &emceeInitGaussPick; + init.filename = NULL; + init.sigma = qualifier("sigma", 10); -private define setup_node (node, num_nodes, total_walkers, nrands) { - variable set1_per_node, set2_per_node; - variable set1_handled, set2_handled; - variable set2_len = total_walkers >> 1; % split walker in two sets - variable set1_len = total_walkers - set2_len; - variable j; + return init; +} +%}}} +EmceeInitRegister["gauss"] = &emceeInitGauss; - (set1_per_node, set2_per_node) = distribute_walkers(num_nodes, total_walkers); - (set1_handled, set2_handled) = previous_number_walkers(set1_per_node, set2_per_node); - - variable N = struct { - walkers, % current walker position - pivots, % current pivot positions, should not change for one loop! - randoms, % required randoms - update, % updater track - stat, % step statistic - total_walkers, % number of totals walkers - set1 = set1_len, % walkers for 1st update - set2 = set2_len, % walkers set for 2nd update - set1_per_node = set1_per_node, % walkers of set1 processed per node - set2_per_node = set2_per_node, % walkers of set2 processed per node - set1_handled = set1_handled, % walkers of set1 handled by all previous nodes - set2_handled = set2_handled, % walkers of set2 handled by all previous nodes - set = 0, % indicates which set to update (1: first set, 2: second set) - fit = open_fit(), % the fit object, will fail if no model is loaded - num_pars = num_free_params(), % number of fit parameters - num_rands = nrands, % number of random numbers per walker - }; +%{{{ file initialization function +private define fisher_yates (a, n) %{{{ +{ + if (n>length(a)) + return rand_int(0, length(a)-1, n); - if (node) { % setup for slaves - N.walkers = Array_Type[set1_per_node[node]+set2_per_node[node]]; - N.pivots = Array_Type[set1_per_node[node]+set2_per_node[node]]; - N.update = Int_Type[set1_per_node[node]+set2_per_node[node]]; - N.stat = Double_Type[set1_per_node[node]+set2_per_node[node]]; - N.randoms = Double_Type[(set1_per_node[node]+set2_per_node[node])*nrands]; - } else { % setup for master - N.walkers = Array_Type[total_walkers]; - N.pivots = Array_Type[total_walkers]; - N.update = Int_Type[total_walkers]; - N.stat = Double_Type[total_walkers]; - N.randoms = Double_Type[total_walkers*nrands]; + variable p = [length(a)-n:length(a)-1]; + variable j; + _for j (0, n-2) + array_swap(p, j, j+rand_int(0, n-1)); + return p; +} +%}}} + +private define emceeInitFilePick (init, engine) %{{{ +{ + variable file = engine.leader.inFile; + variable walkerDistribution, readNumber; + (walkerDistribution, readNumber) = file.read(engine, engine.totalNumberWalkers); + variable i,j; + % randomize (and bootstrap if necessary) + variable randomize = fisher_yates(walkerDistribution, readNumber); + variable parV = Double_Type[engine.numberParameters]; + _for i (0, length(engine.walkers)-1) { + _for j (0, length(parV)-1) + parV[j] = walkerDistribution[randomize[i]][j]; + engine.walkers[i] = @parV; } +} +%}}} +%}}} +private define emceeInitFile () %{{{ +{ + variable init = struct { @EmceeInit }; + init.pick = &emceeInitFilePick; + init.filename = qualifier("filename"); - _for j (0, length(N.walkers)-1, 1) { - N.walkers[j] = Double_Type[N.num_pars]; - N.pivots[j] = Double_Type[N.num_pars]; + return init; +} +%}}} +EmceeInitRegister["file"] = &emceeInitFile; +%}}} + +%{{{ Ship interface: +%!%+ +%\function{emcee--driver} +%\synopsis{Set emcee parallel computation method} +%\usage{driver="method;options"} +%\description +% The driver method can be set with the function string +% "method;parameter" +% +% Available methods: +% mpi : The mpi parallel driver using as many nodes as registered +% in an mpi environment +%!%- +% 1: setSail - set id for engines and how many there are +% 2: leader_send - leader sends to members +% 3: member_send - members send to leader +% 4: leader_receive - leader receives members +% 5: member_receive - member receive leader +% 6: enterHarbor - cleanup if necessary +private variable EmceeShip = struct { + setSail, % function + leaderSend, % function + memberSend, % function + leaderReceive, % function + memberReceive, % function + enterHarbor, % function + + engine, % the working horse + + % private data +}; + +%{{{ fork ship functions +private define elementType (t) %{{{ +{ + switch (t) + { case Char_Type: "c"; } + { case UChar_Type: "C"; } + { case Short_Type: "h"; } + { case UShort_Type: "H"; } + { case Int_Type: "i"; } + { case UInt_Type: "I"; } + { case Long_Type: "l"; } + { case ULong_Type: "L"; } + { case LLong_Type: "m"; } + { case ULLong_Type: "M"; } + { case Int16_Type: "j"; } + { case UInt16_Type: "J"; } + { case Int32_Type: "k"; } + { case UInt32_Type: "K"; } + { case Int64_Type: "q"; } + { case UInt64_Type: "Q"; } + { case Float_Type: "f"; } + { case Double_Type: "d"; } + { case Float32_Type: "F"; } + { case Float64_Type: "D"; } + { case String_Type: "s"; } + { case Null_Type: "x"; } +} +%}}} + +private define writeArray (fp, array) %{{{ +{ + variable bytes, msg, fmt; + fmt = sprintf("%s%d", elementType(_typeof(array)), length(array)); + msg = pack(fmt, array); + bytes = write(fp, msg); + return bstrlen(msg)-bytes; +} +%}}} + +private define readArray (fp, array) %{{{ +{ + variable i, bytes, msg, fmt; + fmt = sprintf("%s%d", elementType(_typeof(array)), length(array)); + bytes = read(fp, &msg, sizeof_pack(fmt)); + array[*] = unpack(fmt, msg); + return bstrlen(msg)-bytes; +} +%}}} + +private define emceeForkSetSail (ship) %{{{ +{ + variable sockRead, sockWrite; + variable pid, cid=0, t; + variable flags; + + _for cid (1, ship.tasks-1) { + (sockRead, sockWrite) = socketpair(AF_UNIX, SOCK_STREAM, 0); + pid = fork(); + if (pid == -1) + throw InternalError, sprintf("Unable to fork process %d", cid); + else if (pid == 0) { + close(sockWrite); + ship.socket = sockRead; + break; + } else { % set master pipes + if (NULL == ship.socket) + ship.socket = FD_Type[ship.tasks]; + flags = fcntl_getfd(sockWrite); +% fcntl_setfd(sockWrite, flags | O_NONBLOCK); + ship.socket[cid] = sockWrite; + close(sockRead); + } } - return N; + ship.engine.id = (pid == 0) ? cid : 0; + ship.engine.numberEngines = ship.tasks; +} +%}}} + +private define emceeForkLeaderSend (ship) %{{{ +{ + variable walkers, + pivots, + rolls; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + + variable i,j; + variable firstIndex = walkersPerSet[0]; + _for i (1, engine.numberEngines-1) { + % set the walkers for node i + walkers = engine.walkers[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; + % pick the pivots for node i + pivots = engine.pivots[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; + % set the randoms for node i + rolls = engine.rolls[[0:walkersPerSet[i]*engine.gears.step.numberRandoms-1] + +(firstIndex+totalOffset)*engine.gears.step.numberRandoms]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = writeArray(ship.socket[i], walkers[j]); + () = writeArray(ship.socket[i], pivots[j]); + } + () = writeArray(ship.socket[i], rolls); + firstIndex += walkersPerSet[i]; + } } +%}}} -%}}}% +private define emceeForkMemberSend (ship) %{{{ +{ + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; -%{{{% mpi functions + variable i; + _for i (0, setLength-1, 1) + () = writeArray(ship.socket, engine.walkers[i+setOffset]); -private define release_walkers_mpi_master (node, num_nodes, N) + () = writeArray(ship.socket, engine.update[[0:setLength-1]+setOffset]); + () = writeArray(ship.socket, engine.stat[[0:setLength-1]+setOffset]); +} +%}}} + +private define emceeForkLeaderReceive (ship) %{{{ { -#ifexists rcl_mpi_init - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - - if (NULL == urand) - throw InternalError, "Missing random generator"; - - if (NULL == upick) - throw InternalError, "Missing random generator"; - - % select pivot walkers for current set - variable set_len, complement_len, len_offset; - variable set_per_node, set_handled; - variable pick; - - if (1 == N.set) { % - set_len = N.set1; - complement_len = N.set2; - len_offset = 0; - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - } else if (2 == N.set) { - set_len = N.set2; - complement_len = N.set1; - len_offset = N.set1; - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - } - pick = (@upick)(0, complement_len-1, set_len)+len_offset; % pick from complement set - - variable this_walkers, this_pivots, this_randoms; + variable walker, + stat, + update; + + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + variable i,j; + variable firstIndex = walkersPerSet[0]; % skip master walkers + + walker = Double_Type[engine.numberParameters]; + _for i (1, engine.numberEngines-1) { + stat = Double_Type[walkersPerSet[i]]; + update = Int_Type[walkersPerSet[i]]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = readArray(ship.socket[i], walker); + engine.walkers[j+firstIndex+totalOffset][*] = walker; + } - % set current walkers and complement pivots - _for i (0, set_len-1, 1) - N.pivots[i+len_offset] = @(N.walkers[pick[i]]); % get the pivot points + () = readArray(ship.socket[i], update); + () = readArray(ship.socket[i], stat); + + engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; + engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; + + firstIndex += walkersPerSet[0]; + } +} +%}}} - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] - = (@urand)(set_len*N.num_rands); % get new random numbers for current set +private define emceeForkMemberReceive (ship) %{{{ +{ + variable rolls, + param; + + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + variable nRolls = engine.gears.step.numberRandoms; + + variable j; + rolls = Double_Type[setLength*nRolls]; + param = Double_Type[engine.numberParameters]; + _for j (0, setLength-1, 1) { + () = readArray(ship.socket, param); + engine.walkers[j+setOffset][*] = param; + () = readArray(ship.socket, param); + engine.pivots[j+setOffset][*] = param; + } + () = readArray(ship.socket, rolls); + + engine.rolls[[0:setLength*nRolls-1]+setOffset*nRolls] = @rolls; +} +%}}} - _for i (1, num_nodes-1, 1) { % loop over the slave nodes and send relevant data +private define emceeForkEnterHarbor (ship) %{{{ +{ + variable id; + if (ship.engine.id == 0) { + _for id (1, ship.engine.numberEngines-1) + close(ship.socket[id]); + } else { + close(ship.socket); + exit(); + } +} +%}}} +%}}} +private define emceeShipFork () %{{{ +{ + variable ship = struct { @EmceeShip, socket, tasks }; + ship.setSail = &emceeForkSetSail; + ship.leaderSend = &emceeForkLeaderSend; + ship.memberSend = &emceeForkMemberSend; + ship.leaderReceive = &emceeForkLeaderReceive; + ship.memberReceive = &emceeForkMemberReceive; + ship.enterHarbor = &emceeForkEnterHarbor; + ship.tasks = qualifier("tasks", _num_cpus()); + + return ship; +} +%}}} +EmceeShipRegister["fork"] = &emceeShipFork; + +%{{{ MPI Ship functions +private define emceeMPISetSail (ship) %{{{ +{ + variable engine = ship.engine; + engine.id = rcl_mpi_init(); + engine.numberEngines = rcl_mpi_numtasks(); + rcl_init_mpi_request(engine.numberEngines); +} +%}}} + +private define emceeMPILeaderSend (ship) %{{{ +{ +#ifexists rcl_mpi_init + variable walkers, + pivots, + rolls; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + + variable i,j; + variable firstIndex = walkersPerSet[0]; % skip master walkers + + _for i (1, engine.numberEngines-1) { % loop over the slave nodes and send relevant data % set the walkers for node i - this_walkers = N.walkers[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + walkers = engine.walkers[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; % pick the pivots for node i - this_pivots = N.pivots[[0:set_per_node[i]-1]+set_handled[i]+len_offset]; + pivots = engine.pivots[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; % set the randoms for node i - this_randoms = N.randoms[[0:set_per_node[i]*N.num_rands-1] - +(set_handled[i]+len_offset)*N.num_rands]; - - _for j (0, set_len-1, 1) { - () = rcl_mpi_org_isend_double(this_walkers[j+len_offset], - length(this_walkers[j+len_offset]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(this_pivots[j+len_offset], - length(this_walkers[j+len_offset]), i, 1); % send pivots from other set with tag 1 + rolls = engine.rolls[[0:walkersPerSet[i]*engine.gears.step.numberRandoms-1] + +(firstIndex+totalOffset)*engine.gears.step.numberRandoms]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = rcl_mpi_org_isend_double(walkers[j], length(walkers[j]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(pivots[j], length(pivots[j]), i, 1); % send pivots from other set with tag 1 } - () = rcl_mpi_org_isend_double(this_randoms[[0:set_len*N.num_rands-1] - +len_offset*N.num_rands], - set_len*N.num_rands, i, 2); % send random numbers with tag 2 + () = rcl_mpi_org_isend_double(rolls, length(rolls), i, 2); % send random numbers with tag 2 + firstIndex += walkersPerSet[i]; } #endif } +%}}} -private define release_walkers_mpi_slave (node, num_nodes, N) +private define emceeMPILeaderReceive (ship) %{{{ { #ifexists rcl_mpi_init - variable set_len, len_offset; - variable this_randoms; + variable walker, + stat, + update; - if (1 == N.set) { - set_len = N.set1_per_node; - len_offset = 0; - } else if (2 == N.set) { - set_len = N.set2_per_node; - len_offset = N.set1_per_node[node]; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + + variable i,j; + variable firstIndex = walkersPerSet[0]; % skip master walkers + + walker = Double_Type[engine.numberParameters]; + _for i (1, engine.numberEngines-1) { + stat = Double_Type[walkersPerSet[i]]; + update = Int_Type[walkersPerSet[i]]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = rcl_mpi_org_recv_double(walker, length(walker), i, i); + engine.walkers[j+firstIndex+totalOffset][*] = walker; +% vmessage("recv %d: (%g, %g)", j+firstIndex+totalOffset, +% engine.walkers[j+firstIndex+totalOffset][0], engine.walkers[j+firstIndex+totalOffset][1]); + } + + () = rcl_mpi_org_recv_int(update, length(update), i, i); + () = rcl_mpi_org_recv_double(stat, length(stat), i, i); + + engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; + engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; + + firstIndex += walkersPerSet[i]; + } +#endif +} +%}}} + +private define emceeMPIMemberSend (ship) %{{{ +{ +#ifexists rcl_mpi_init + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + + variable i; + _for i (0, setLength-1, 1) { +% vmessage("send: (%g, %g)", engine.walkers[i+setOffset][0], engine.walkers[i+setOffset][1]); + () = rcl_mpi_org_isend_double(engine.walkers[i+setOffset], + length(engine.walkers[0]), 0, engine.id); } + () = rcl_mpi_org_isend_int(engine.update[[0:setLength-1]+setOffset], setLength, 0, engine.id); + () = rcl_mpi_org_isend_double(engine.stat[[0:setLength-1]+setOffset], setLength, 0, engine.id); +#endif +} +%}}} + +private define emceeMPIMemberReceive (ship) %{{{ +{ +#ifexists rcl_mpi_init + variable rolls; + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + variable nRolls = engine.gears.step.numberRandoms; + variable j; - this_randoms = Double_Type[set_len*N.num_rands]; - _for j (0, set_len-1, 1) { - () = rcl_mpi_org_recv_double(N.walkers[j+len_offset], - length(N.walkers[j+len_offset]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(N.pivots[j+len_offset], - length(N.pivots[j+len_offset]), 0, 1); % receive pivot points (tag 1) + rolls = Double_Type[setLength*nRolls]; + _for j (0, setLength-1, 1) { + () = rcl_mpi_org_recv_double(engine.walkers[j+setOffset], + length(engine.walkers[j+setOffset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(engine.pivots[j+setOffset], + length(engine.pivots[j+setOffset]), 0, 1); % receive pivot points (tag 1) } - () = rcl_mpi_org_recv_double(this_randoms, - set_len*N.num_rands, 0, 2); % receive random numbers (tag 2) + () = rcl_mpi_org_recv_double(rolls, + setLength*engine.gears.step.numberRandoms, 0, 2); % receive random numbers (tag 2) - N.randoms[[0:set_len*N.num_rands-1]+len_offset*N.num_rands] = this_randoms; + engine.rolls[[0:setLength*nRolls-1]+setOffset*nRolls] = @rolls; #endif } +%}}} + +private define emceeMPIEnterHarbor (ship) %{{{ +{ + +} +%}}} +%}}} +private define emceeShipMPI () %{{{ +{ + variable ship = struct { @EmceeShip }; + ship.setSail = &emceeMPISetSail; + ship.leaderSend = &emceeMPILeaderSend; + ship.memberSend = &emceeMPIMemberSend; + ship.leaderReceive = &emceeMPILeaderReceive; + ship.memberReceive = &emceeMPIMemberReceive; + ship.enterHarbor = &emceeMPIEnterHarbor; + + return ship; +} +%}}} +EmceeShipRegister["mpi"] = &emceeShipMPI; +%}}} + +%{{{ File interface: +%!%+ +%\function{emcee--file} +%\synopsis{Set emcee file input and output methods} +%\usage{input="method;options" +% \altusage{output="method;options"}} +%\description +% The file inpu/output methods can be set with the function string +% "method;parameter" +% +% Available methods: +% fit : Fits file interface to write the chain as fits table extension +%!%- +% 1: create - open new file pointer and write necessary intial values +% 2: open - open existing file for read/write +% 3: read - open file and return n walkers and how many walkers were used +% 4: write - write cycle steps to the file (n) +% 5: close - close open file at end +private variable EmceeFile = struct { + create, % function + open, % function + read, % function + write, % function + close, % function + + mode, % 0 read, 1 write, 2 read | write + handle, % file handle + filename, % full file name + cycle, % number of steps before file gets written + + % additional private data +}; + +%{{{ Fits file functions + +% Create function %{{{ +private define __emceeFitsWriteT1(handle, engine) %{{{ +{ + variable dataInfo; + list_data(&dataInfo); + + variable par = __parameters(engine.fit.object); + variable params = get_params(); + variable numberTotalParams = length(params); + + %variable parNames = array_map(String_Type, &get_struct_field, get_params(), "name")[par.index-1]; + +% fits_create_binary_table(handle, "PARAMETERS", num_free_params(), +% ["FREE_PAR", "FREE_PAR_NAME"], +% ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, parNames)))], +% [" parameter indices", " parameter names"]); + variable paramsTable = struct { + name=String_Type[numberTotalParams], + index=Int_Type[numberTotalParams], + value=Double_Type[numberTotalParams], + min=Double_Type[numberTotalParams], + max=Double_Type[numberTotalParams], + hard_min=Double_Type[numberTotalParams], + hard_max=Double_Type[numberTotalParams], + freeze=Int_Type[numberTotalParams], + tie=String_Type[numberTotalParams], + units=String_Type[numberTotalParams], + fun=String_Type[numberTotalParams], + free=Int_Type[numberTotalParams], % combines freeze, fun and tie + }; + variable j; + _for j (0, numberTotalParams-1) { + paramsTable.name[j] = params[j].name; + paramsTable.index[j] = params[j].index; + paramsTable.value[j] = params[j].value; + paramsTable.min[j] = params[j].min; + paramsTable.max[j] = params[j].max; + paramsTable.hard_min[j] = params[j].hard_min; + paramsTable.hard_max[j] = params[j].hard_max; + paramsTable.freeze[j] = params[j].freeze; + paramsTable.tie[j] = (params[j].tie == NULL) ? "" : params[j].tie; + paramsTable.units[j] = params[j].units; + paramsTable.fun[j] = (params[j].fun == NULL) ? "" : params[j].fun; + paramsTable.free[j] = (not params[j].freeze) and (params[j].fun == NULL) and (params[j].tie == NULL); + } + fits_write_binary_table(handle, "PARAMETERS", paramsTable); + + fits_update_key(handle, "MODEL", get_fit_fun(), "model function"); + fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + fits_update_key(handle, "SLOPPY", 0, " sloppy level"); + + array_map(&fits_write_comment, handle, strchop(dataInfo, '\n', 0)); + + % sort to index order here +% if (_fits_write_col(handle, fits_get_colnum(handle, "FREE_PAR"), 1, 1, par.index[array_sort(par.index)]) +% && _fits_write_col(handle, fits_get_colnum(handle, "FREE_PAR_NAME"), 1, 1, parNames)) +% throw IOError; +} +%}}} -private define release_walkers_mpi (node, num_nodes, N) { - % distribute walkers, pivots and random numbers +private define __emceeFitsWriteT2(handle, engine) %{{{ +{ + variable par = __parameters(engine.fit.object); + + fits_create_binary_table(handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], + ["D", "J", ["D"][par.index*0]], + [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); + fits_update_key(handle, "NWALKERS", engine.totalNumberWalkers/engine.numberParameters, " Number of walkers per free parameter"); + fits_update_key(handle, "NFREEPAR", engine.numberParameters, " Number of free parameters"); + fits_update_key(handle, "NSTEPS", engine.numberSteps, " Numer of iteration steps done"); +} +%}}} - if (node) - release_walkers_mpi_slave(node, num_nodes, N); - else - release_walkers_mpi_master(node, num_nodes, N;; __qualifiers()); +private define __emceeFitsWriteT3(handle, engine) %{{{ +{ + fits_create_binary_table(handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); } +%}}} -private define catch_walkers_mpi_master (node, num_nodes, N) +private define emceeFileFitsCreate (file, engine) %{{{ { -#ifexists rcl_mpi_init - variable set_len, len_offset, set_per_node, set_handled; + file.mode = 1; + + % Create fits file and write headers + file.handle = fits_open_file(file.filename, "c"); + + % write first table + __emceeFitsWriteT1(file.handle, engine); + + % write second table + __emceeFitsWriteT2(file.handle, engine); + + % write third table + %__emceeFitsWriteT3(file.handle, engine); + + % move back to chain table + () = _fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); - if (1 == N.set) { - set_per_node = N.set1_per_node; - set_handled = N.set1_handled; - len_offset = 0; - } else if (2 == N.set) { - set_per_node = N.set2_per_node; - set_handled = N.set2_handled; - len_offset = N.set1; + % set write cycle + () = _fits_get_rowsize(file.handle, &(file.cycle)); + file.cycle = file.cycle/engine.totalNumberWalkers; + if (file.cycle < 1) + file.cycle = 1; + + % fits routine customs + file.numberSteps = 0; + file.sloppy = 0; +} +%}}} +%}}} + +% Open function %{{{ + +private define __emceeFitsReadChecks (file, engine) %{{{ +{ + variable handle = file.handle; + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; } - variable i,j; - variable this_stat, this_update; % we have to use intermediate storage, - % slang creates a copy of an array when addressed by index + if (fits_read_key(handle, "MODEL") != get_fit_fun()) { + fits_close_file(handle); + handle = NULL; + throw IsisError, "Current model and chain model do not match"; + } - _for i (1, num_nodes-1, 1) { - this_stat = Double_Type[set_per_node[i]]; - this_update = Int_Type[set_per_node[i]]; + variable tab = fits_read_table(handle); + ifnot (struct_field_exists(tab, "free") + || struct_field_exists(tab, "value")) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } - _for j (0, set_per_node[i]-1, 1) - () = rcl_mpi_org_recv_double(N.walkers[j+set_handled[i]+len_offset], - length(N.walkers[0]), i, i); + variable par = __parameters(engine.fit.object); + if ((length(where(tab.free)) != num_free_params()) + || any(tab.index[where(tab.free)] != par.index[array_sort(par.index)])) { + fits_close_file(handle); + handle = NULL; + throw UsageError, "Free parameters and chain parameters differ"; + } +} +%}}} - () = rcl_mpi_org_recv_int(this_update, length(this_update), i, i); - () = rcl_mpi_org_recv_double(this_stat, length(this_stat), i, i); +private define __emceeFitsWriteChecks (file, engine) %{{{ +{ + variable handle = file.handle; + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } - N.update[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_update; - N.stat[[0:set_per_node[i]-1]+set_handled[i]+len_offset] = this_stat; + if ((fits_read_key(handle, "STATISTIC") != get_fit_statistic()) && (file.sloppy<2)) { + fits_close_file(handle); + handle = NULL; + throw UsageError, "Current fit statistic and chain fit statistic differ, increase sloppy level (at least 2) to continue anyway"; + } + fits_update_key(handle, "STATISTIC", get_fit_statistic()); + + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + if ((fits_read_key(handle, "NWALKERS")*fits_read_key(handle, "NFREEPAR")) != length(engine.walkers) + && (handle.sloppy<1)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Number of walkers differs from number used in chain file, increase sloppy level to continue"; } -#endif } +%}}} -private define catch_walkers_mpi_slave (node, num_nodes, N) +private define emceeFileFitsOpen (file, engine) %{{{ { -#ifexists rcl_mpi_init - variable set_len, len_offset; + file.mode = 2; + + file.handle = fits_open_file(file.filename, "w"); - if (1 == N.set) { - set_len = N.set1_per_node[node]; - len_offset = 0; - } else if (2 == N.set) { - set_len = N.set2_per_node[node]; - len_offset = N.set1_per_node[node]; + __emceeFitsReadChecks(file, engine); + __emceeFitsWriteChecks(file, engine); + + if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(file.handle); + file.handle = NULL; + throw IOError, "Not a emcee chain file"; } - variable i; - _for i (0, set_len-1, 1) - () = rcl_mpi_org_isend_double(N.walkers[i+len_offset], - length(N.walkers[len_offset]), 0, node); + () = _fits_get_rowsize(file.handle, &(file.cycle)); + file.cycle = file.cycle/length(engine.walkers); + if (file.cycle < 1) + file.cycle = 1; - () = rcl_mpi_org_isend_int(N.update[[0:set_len-1]+len_offset], set_len, 0, node); - () = rcl_mpi_org_isend_double(N.stat[[0:set_len-1]+len_offset], set_len, 0, node); -#endif + file.numberSteps = fits_get_num_rows(file.handle); } +%}}} -private define catch_walkers_mpi (node, num_nodes, N) +%}}} + +% Read function %{{{ +private define emceeFileFitsRead (file, engine, numberWalkers) %{{{ { - if (node) - catch_walkers_mpi_slave(node, num_nodes, N); - else - catch_walkers_mpi_master(node, num_nodes, N); + file.mode = 0; + + file.handle = fits_open_file(file.filename, "r"); + + __emceeFitsReadChecks(file, engine); + + if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(file.handle); + file.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + variable totalNumberWalkers = fits_read_key(file.handle, "NWALKERS") + *fits_read_key(file.handle, "NFREEPAR"); + variable numberParameters = fits_get_num_cols(file.handle); + variable totalNumberRecords = fits_get_num_rows(file.handle); + variable walkerDistribution; % this has to be an array of arrays with the parameter distribution in each + () = _fits_read_cols(file.handle, + [2:numberParameters], + max([0, totalNumberRecords-numberWalkers]), + numberWalkers, + &walkerDistribution); + + fits_close_file(file.handle); + + return walkerDistribution, totalNumberWalkers; } +%}}} +%}}} -private define emcee_mpi (walker_per_par, number_par, steps) { - variable total_walkers = walker_per_par*number_par; +% Write function %{{{ +private define emceeFileFitsWrite (file, engine, numberWalkersSteps) %{{{ +{ + if (numberWalkersSteps > engine.leader.writeBuffer.size) + throw InternalError, "Trying to write more than accessible"; - variable init = qualifier("init", NULL); - variable move = qualifier("move", NULL); - variable urand = qualifier("urand", NULL); - variable upick = qualifier("upick", NULL); - variable cont = qualifier("continue", NULL); - variable output = qualifier("output", NULL); - variable io = qualifier("write", NULL); - variable load_hook = qualifier("read", NULL); - variable load = qualifier("load", NULL); + variable par = __parameters(engine.fit.object); + variable npar = engine.numberParameters; - variable node, num_nodes; + %variable walkersPerCycle = engine.leader.writeBuffer.size; % total_walkers*steps_per_cycle + variable i,j; + variable firstIndex = fits_get_num_rows(file.handle)+1; % first index of this cycle + variable parCycle = Double_Type[numberWalkersSteps]; + + _for j (0, npar-1, 1) { + _for i (0, numberWalkersSteps-1, 1) + parCycle[i] = engine.leader.writeBuffer.walkers[i][j]; + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, sprintf("CHAINS%d", par.index[j])), + firstIndex, + 1, + parCycle); + } + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, "FITSTAT"), + firstIndex, + 1, + engine.leader.writeBuffer.stat[[:numberWalkersSteps-1]]); + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, "UPDATE"), + firstIndex, + 1, + engine.leader.writeBuffer.update[[:numberWalkersSteps-1]]); +} +%}}} +%}}} -#ifexists rcl_mpi_init - node = rcl_mpi_init(); - num_nodes = rcl_mpi_numtasks(); - num_nodes = (num_nodes<1) ? 1 : num_nodes; - rcl_init_mpi_request(num_nodes); % this is a stupid memory leak!!!!!!!!! - % BUT: since we have a memory leak from the mpi module anyway we accept it currently ... -#else - node = 0; - num_nodes = 1; -#endif +% Close function %{{{ +private define emceeFileFitsClose (file, engine) %{{{ +{ + % todo: write fitstat table + variable nHDUs = fits_get_num_hdus(file.handle); + variable i; - move = emcee_call_setup_fun(move, "move"); - variable this = setup_node(node, num_nodes, total_walkers, move.nrands); - % 'this' is the mpi_emcee handle for this node! - % It contains all relevant data to do the calculation - - % read the settings, if one of them is NULL help was called - io = emcee_call_setup_fun(io, "io"); - load_hook = emcee_call_setup_fun(load_hook, "io"); - init = emcee_call_setup_fun(init, "init"); - if (NULL == io || NULL == load_hook || NULL == init) return; - - ifnot (node) { % master only - if (cont != NULL) - io.__f_open(cont, this.fit, this.walkers); - else if (load != NULL) { - load_hook.__f_read(load, this.walkers); - load_hook.__f_close(); - io.__f_create(output, this.fit, total_walkers); - } else { - io.__f_create(output, this.fit, total_walkers); - init.__f(this.walkers, this.fit); + if (file.mode) { + _for i (1, nHDUs) { + () = _fits_movabs_hdu(file.handle, i); + fits_write_chksum(file.handle); } } - variable collector_length; % the collector so we can skip turns before writing to disk - variable walker_cycle; - variable update_cycle; - variable stat_cycle; - variable cycle_step = 0; + fits_close_file(file.handle); +} +%}}} +%}}} +%}}} +private define emceeFileFits () %{{{ +{ + variable file = struct { @EmceeFile, numberSteps, sloppy }; + file.create = &emceeFileFitsCreate; + file.open = &emceeFileFitsOpen; + file.read = &emceeFileFitsRead; + file.write = &emceeFileFitsWrite; + file.close = &emceeFileFitsClose; - % setup space + file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.fits")); + file.cycle = 1; + + file.numberSteps = 0; + file.sloppy = 0; + + return file; +} +%}}} +EmceeFileRegister["fits"] = &emceeFileFits; +%}}} + +%{{{ Step interface: +%!%+ +%\function{emcee--step} +%\synopsis{Set emcee step algorithm} +%\usage{step="method;options"} +%\description +% The step algorithm can be set with the function string +% "method;parameter" +% +% Available algorithms: +% stretch : The stretch move as described in Goodman & Weare 2010 +% ; scale : [=2] Scale for the range of possible moves +%!%- +% 1: move - loop over walkers and update +private variable EmceeStep = struct { + move, % function + + numberRandoms, % random number required per step + + % private data +}; + +%{{{ Stretch move functions (Foreman & Mackey) +% define inverse cumulative distribution function for generating +% random numbers following 1/z^2 when z in [1/a, a] +% TODO: should make this an adjustable thing +private define stretchInverseCDF (u, a) %{{{ +{ + return (u*(a-1.)+1.)^2./a; +} +%}}} + +% stretch move as of Goodman & Weare 2010 +% Move must evaluate the fit function +private define emceeStepStretchMove (step, engine) %{{{ +{ variable j; - if (node) % slave - collector_length = 0; - else % master - collector_length = length(this.walkers)*io.cycle; - - walker_cycle = Array_Type[collector_length]; - update_cycle = Int_Type[collector_length]; - stat_cycle = Double_Type[collector_length]; - - if (NULL == urand || NULL == upick) - throw InternalError, "Missing random number generator"; - - % evaluate the model at the walker positions to get the statistics - % and if not continuing a chain, write them out - ifnot (node) { - _for j (0, length(this.walkers)-1, 1) { - this.stat[j] = this.fit.eval_statistic(this.walkers[j]); - this.update[j] = 1; - } - if (cont == NULL) - io.__f_initwrite(this.fit, this.walkers, this.update, this.stat); - } - - % the main loop where the magic happens - variable s, walker, update, stat, set, set_len, len_offset; - _for s (0, steps-1, 1) { - cycle_step = s mod io.cycle; - _for set (1, 2, 1) { - this.set = set; - - release_walkers_mpi(node, - num_nodes, - this; upick=upick, urand=urand); % release walkers to freedom ... - - if (1 == set) { - set_len = this.set1_per_node[node]; - len_offset = 0; - } else if (2 == set) { - set_len = this.set2_per_node[node]; - len_offset = this.set1_per_node[node]; + variable z; + variable proposed; + variable newStat; + variable startIndex = engine.setOffset; + variable setLength = engine.setLength; + + _for j (startIndex, startIndex+setLength-1) { + z = stretchInverseCDF(engine.rolls[j*step.numberRandoms], step.scale); + proposed = engine.pivots[j] + z*(engine.walkers[j]-engine.pivots[j]); + + engine.update[j] = 0; + + try { + newStat = engine.fit.eval_statistic(proposed; nocopy); + + % accept or reject dimansionally normalized. Assuming statistic is -2 log likelihood + if (log(engine.rolls[j*step.numberRandoms+1]) + <= (log(z)*(engine.fit.num_vary-1)+(engine.stat[j]-newStat)/2.)) { + engine.stat[j] = newStat; + engine.walkers[j][*] = proposed; + engine.update[j] = 1; } + } catch IsisError; + } +} +%}}} +%}}} +private define emceeStepStretch () %{{{ +{ + variable step = struct { @EmceeStep, scale }; + step.move = &emceeStepStretchMove; + step.numberRandoms = 2; + step.scale = qualifier("scale", 2); - _for j (0, set_len-1, 1) { % ... let them move ... - (walker, update, stat) = move.__f(this.fit, this.walkers[j+len_offset], - this.pivots[j+len_offset], - this.randoms[[0:this.num_rands-1]+(j+len_offset)*this.num_rands], - this.stat[j+len_offset]); - this.walkers[j+len_offset] = walker; - this.update[j+len_offset] = update; - this.stat[j+len_offset] = stat; - } + return step; +} +%}}} +EmceeStepRegister["stretch"] = &emceeStepStretch; +%}}} + +%%% emcee call +private define emceeOption (str) %{{{ +{ + variable s = strchop(str, ';', 0); + return strtrim(s[0]), length(s)>1 ? eval(sprintf("struct {%s}", s[1])) : NULL; +} +%}}} + +private define emceeLoop (ship, step, output) %{{{ +{ + variable engine = ship.engine; + variable s, j, set, cycle, leader, size; + + variable k; + if (engine.id==0) { + _for k (0, length(engine.walkers)-1) + writecol(stdout, Int_Type[engine.numberParameters]-1, + Int_Type[engine.numberParameters]+k, + engine.walkers[k]); + vmessage(""); + } + + _for s (0, engine.numberSteps-1) { + _for set (1, 2) { + emceeDrawSet(engine, set); + + if (0 == engine.id) + ship.leaderSend(); + else + ship.memberReceive(); - catch_walkers_mpi(node, num_nodes, this); % ... and catch 'em! + step.move(engine); + + if (0 == engine.id) + ship.leaderReceive(); + else + ship.memberSend(); + } + + _for k (0, engine.numberWalkers-1) { + writecol(stdout, Int_Type[engine.numberParameters]+engine.id, + Int_Type[engine.numberParameters]+k, + engine.walkers[k]); } - % if cycle end is reached write the chain - ifnot (node) { % master only - if (not cycle_step && s > 0) - io.__f_write(this.fit, walker_cycle, update_cycle, stat_cycle); - _for j (0, length(this.walkers)-1, 1) { - walker_cycle[j+cycle_step*length(this.walkers)] = @(this.walkers[j]); - update_cycle[j+cycle_step*length(this.walkers)] = this.update[j]; - stat_cycle[j+cycle_step*length(this.walkers)] = this.stat[j]; + if (engine.id==0) { + _for k (0, length(engine.walkers)-1) + writecol(stdout, Int_Type[engine.numberParameters]-1, + Int_Type[engine.numberParameters]+k, + engine.walkers[k]); + vmessage(""); + } + + if (0 == engine.id) { + leader = engine.leader; + size = leader.writeBuffer.size; + cycle = s mod leader.writeBuffer.cycle; + % write to buffer + _for j (0, engine.totalNumberWalkers-1) { + leader.writeBuffer.walkers[j+cycle*engine.totalNumberWalkers][*] = @(engine.walkers[j]); + leader.writeBuffer.stat[j+cycle*engine.totalNumberWalkers] = engine.stat[j]; + leader.writeBuffer.update[j+cycle*engine.totalNumberWalkers] = engine.update[j]; } + + if (cycle == (leader.writeBuffer.cycle-1)) + output.write(engine, size); } } - % we might have unwritten steps left, so better write them here - cycle_step++; % the last step is never written in the loop, we have to deal with it here - ifnot (node) { - io.__f_write(this.fit, walker_cycle[[:cycle_step*length(this.walkers)-1]], - update_cycle[[:cycle_step*length(this.walkers)-1]], - stat_cycle[[:cycle_step*length(this.walkers)-1]]); + % write remaining steps + if (0 == engine.id) { + if (cycle < (leader.writeBuffer.cycle-1)) + output.write(engine, (cycle+1)*engine.totalNumberWalkers); + output.close(engine); } +} +%}}} - % and finally call the finalizing function - ifnot (node) { % master only - io.__f_finalize(steps, walker_per_par, number_par, this.fit); - io.__f_close(); +private define emceeSetup (ship, steps, options) %{{{ +{ + variable leader, size, engine; + variable j, set; + + engine = ship.engine; + if (0 == engine.id) { + emceeSetupLeader(engine, options.input, options.output); + options.init.pick(engine); } -} -%}}}% -define emcee_new (walkers_per_par, steps) { - variable qs = struct { - move = "stretch", % defined move - urand = &rand_uniform, % double random generator - upick = &rand_int, % int random generator - init = "uniform", % initialization function - load = NULL, % initialize from file - read = "fits", % specifier for read - write = "fits", % specifier for write - output = strftime("%Y%m%d-%H%M%S_mcmc_chain.fits"), % output file - continue = NULL, % continue file - }; + % set walkers and eval once + _for set (1, 2) { + emceeDrawSet(engine, set); - if (NULL == get_fit_fun()) - throw UsageError, "No fit function loaded"; + if (0 == engine.id) + ship.leaderSend(); + else + ship.memberReceive(); + } - if (NULL == all_data()) - throw UsageError, "No data set loaded"; + _for j (0, length(engine.walkers)-1) + engine.stat[j] = engine.fit.eval_statistic(engine.walkers[j]; nocopy); - ifnot (0 ((1<<29)-1)) - throw UsageError, "Unable to create ensemble for this large number of walkers"; + % write initial walkers to buffer + _for j (0, length(engine.walkers)-1) { + leader.writeBuffer.walkers[j] = @(engine.walkers[j]); + leader.writeBuffer.stat[j] = engine.stat[j]; + leader.writeBuffer.update[j] = 1; + } - emcee_mpi(walkers_per_par, num_free_params(), steps;; struct { @qs, @__qualifiers() }); -#ifexists rcl_mpi_init -% rcl_mpi_finalize(); -#endif + % if we create new file write initial walkers to it + ifnot (options.continue) + options.output.write(engine, engine.totalNumberWalkers); + } +} +%}}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +define emcee_hammer (steps) +%%%%%%%%%%%%%%%%%%%%%%%%%%% +%!%+ +%\function{emcee_hammer} +%\synopsis{Explore parameter space with MCMC method} +%\usage{emcee_hammer (Int_Type);} +%#c%{{{ +%\qualifiers{ +% Basic +% \qualifier{walkers}{[=10]: Number of walkers per parameter} +% \qualifier{continue}{: If given (and possible set to a file) continue chain from this file} +% \qualifier{infile}{: Set the input file name for reading and continuing} +% \qualifier{outfile}{: Set the output file name} +% Advanced +% \qualifier{init}{[="uniform" or "file"]: The walker initialization method} +% \qualifier{driver}{[="mpi"]: The parallelization method} +% \qualifier{step}{[="stretch"]: The walker step algorithm} +% \qualifier{input}{[="fits"]: The file reading method} +% \qualifier{output}{[="fits"]: The file writing method} +% \qualifier{urand}{[=&rand_uniform]: PRNG for uniform numbers (Double_Type[] = urand(Int_Type))} +% \qualifier{upick}{[=&rand_int]: PRNG to chose complement walker (Int_Type[] = upick(Int_Type, Int_Type, Int_Type))} +%} +% +%\description +% The MCMC parameter space exploration algorithm as described by +% Foreman-Mackey et al. The function expects that data and a model is loaded. +% The only input parameter gives the number of iterations the algorithm +% performs. The resulting walker positions are written to a file which can +% be set with the "outfile" qualifier. +% +% The function allows to choose other algorithms for the step proposition, +% the read and write routines and how the walker ensamble is initialized. +% To get more information about the methods read 'help emcee_'. +% +% Per default a new chain is started when the function is called. To continue +% a chain use the "continue" qualifier. +% +%\seealso{emcee--init, emcee--step, emcee--driver, emcee--input, emcee--output} +%!%- +{ + % options + variable oContinue = qualifier("continue"); + variable oInfile = qualifier("infile", oContinue); + variable oOutfile = qualifier("outfile", oContinue); + + % advanced options + variable initHandle, initOption; + if (NULL != oInfile) + (initHandle, initOption) = emceeOption(qualifier("init", "file")); + else + (initHandle, initOption) = emceeOption(qualifier("init", "uniform")); + + variable shipHandle, shipOption; + (shipHandle, shipOption) = emceeOption(qualifier("driver", "mpi")); + + variable stepHandle, stepOption; + (stepHandle, stepOption) = emceeOption(qualifier("step", "stretch")); + + variable inputHandle, inputOption; + (inputHandle, inputOption) = emceeOption(qualifier("input", "fits")); + if (NULL != oInfile) inputOption = struct { @inputOption, filename=oInfile }; + + variable outputHandle, outputOption; + (outputHandle, outputOption) = emceeOption(qualifier("output", "fits")); + if (NULL != oOutfile) outputOption = struct { @outputOption, filename=oOutfile }; + + variable totalNumberWalkers = qualifier("walkers", 10)*num_free_params(); + variable ship = @(EmceeShipRegister[shipHandle])(;;shipOption); + emceeSetupEngine(ship, totalNumberWalkers, steps); + + variable options = struct { + init = @(EmceeInitRegister[initHandle])(;;initOption), + step = @(EmceeStepRegister[stepHandle])(;;stepOption), + output = NULL, + input = NULL, + urand = qualifier("urand", &rand_uniform), + upick = qualifier("upick", &rand_int), + continue = qualifier_exists("continue"), + }; + emceeSetupGears(ship.engine, options.urand, options.upick, options.step); + + if (0 == ship.engine.id) { + options.input = @(EmceeFileRegister[inputHandle])(;;inputOption); + options.output = @(EmceeFileRegister[outputHandle])(;;outputOption); + } + + emceeSetup(ship, steps, options); + + emceeLoop(ship, options.step, options.output); + + ship.enterHarbor(); } +%}}} -- GitLab From f27f34130a8e49335096532ba554f0b40cad3b98 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 22 Feb 2021 11:22:37 +0100 Subject: [PATCH 75/89] Communication is working Uniform init working, others need testing. Things to change: Add function initialization method, add approx_inv initialization from valid chain file. Maybe change fork sockets to non-blocking. --- src/fitting/ensemble-samplers/emcee.sl | 79 +++++++++++++++----------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 42acc4a4..48117f64 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -3,6 +3,7 @@ require("rand"); require("fork"); require("socket"); +%require("select"); % Implementation of the emcee hammer () with the principle idea % that multiple nodes (engines) are responsible for a part of the @@ -67,7 +68,7 @@ private variable EmceeLeader = struct { walkersPerSet1, % number of walkers for each engine id in set 1 walkersPerSet2, % number of walkers for each engine id in set 2 walkersPerSet, % selected set walkers - totalOffset, % start of ALL walkers in set + totalOffset, % start of set in walker array writeBuffer, % total write buffer array inFile, % input file handle outFile, % output file handle @@ -92,22 +93,23 @@ private define emceeDrawSet (engine, set) %{{{ if (1 == set) { totalNumberSet = engine.totalNumberSet1; totalNumberComplement = engine.totalNumberSet2; - totalOffset = 0; engine.setOffset = 0; engine.setLength = engine.numberWalkersSet1; + totalOffset = 0; if (0 == engine.id) { - engine.leader.walkersPerSet = engine.leader.walkersPerSet1; engine.leader.totalOffset = totalOffset; + engine.leader.walkersPerSet = engine.leader.walkersPerSet1; } } else if (2 == set) { totalNumberSet = engine.totalNumberSet2; totalNumberComplement = engine.totalNumberSet1; - totalOffset = engine.totalNumberSet1; engine.setOffset = engine.numberWalkersSet1; engine.setLength = engine.numberWalkersSet2; + totalOffset = engine.totalNumberSet1; if (0 == engine.id) { - engine.leader.walkersPerSet = engine.leader.walkersPerSet2; engine.leader.totalOffset = totalOffset; + engine.leader.walkersPerSet = engine.leader.walkersPerSet2; + engine.setOffset = totalOffset; } } @@ -382,6 +384,12 @@ EmceeInitRegister["file"] = &emceeInitFile; % "method;parameter" % % Available methods: +% serial : The serial driver. No parallelization at all +% +% fork : The fork (& socket) parallel driver. Per default uses +% _num_cpus many tasks. +% ; tasks : [=_num_cpus] Number of total processes used +% % mpi : The mpi parallel driver using as many nodes as registered % in an mpi environment %!%- @@ -404,6 +412,35 @@ private variable EmceeShip = struct { % private data }; +%{{{ serial ship functions +private define void () %{{{ +{ + variable args = __pop_list(_NARGS); +} +%}}} + +private define emceeSerialInit (ship) %{{{ +{ + ship.engine.numberEngines = 1; + ship.engine.id = 1; +} +%}}} +%}}} +private define emceeShipSerial () %{{{ +{ + variable ship = struct { @EmceeShip }; + ship.setSail = &emceeSerialInit; + ship.leaderSend = &void; + ship.memberSend = &void; + ship.leaderReceive = &void; + ship.memberReceive = &void; + ship.enterHarbor = &void; + + return ship; +} +%}}} +EmceeShipRegister["serial"] = &emceeShipSerial; + %{{{ fork ship functions private define elementType (t) %{{{ { @@ -471,7 +508,7 @@ private define emceeForkSetSail (ship) %{{{ } else { % set master pipes if (NULL == ship.socket) ship.socket = FD_Type[ship.tasks]; - flags = fcntl_getfd(sockWrite); +% flags = fcntl_getfd(sockWrite); % fcntl_setfd(sockWrite, flags | O_NONBLOCK); ship.socket[cid] = sockWrite; close(sockRead); @@ -557,7 +594,7 @@ private define emceeForkLeaderReceive (ship) %{{{ engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; - firstIndex += walkersPerSet[0]; + firstIndex += walkersPerSet[i]; } } %}}} @@ -682,8 +719,6 @@ private define emceeMPILeaderReceive (ship) %{{{ _for j (0, walkersPerSet[i]-1, 1) { () = rcl_mpi_org_recv_double(walker, length(walker), i, i); engine.walkers[j+firstIndex+totalOffset][*] = walker; -% vmessage("recv %d: (%g, %g)", j+firstIndex+totalOffset, -% engine.walkers[j+firstIndex+totalOffset][0], engine.walkers[j+firstIndex+totalOffset][1]); } () = rcl_mpi_org_recv_int(update, length(update), i, i); @@ -707,7 +742,6 @@ private define emceeMPIMemberSend (ship) %{{{ variable i; _for i (0, setLength-1, 1) { -% vmessage("send: (%g, %g)", engine.walkers[i+setOffset][0], engine.walkers[i+setOffset][1]); () = rcl_mpi_org_isend_double(engine.walkers[i+setOffset], length(engine.walkers[0]), 0, engine.id); } @@ -1207,15 +1241,6 @@ private define emceeLoop (ship, step, output) %{{{ variable engine = ship.engine; variable s, j, set, cycle, leader, size; - variable k; - if (engine.id==0) { - _for k (0, length(engine.walkers)-1) - writecol(stdout, Int_Type[engine.numberParameters]-1, - Int_Type[engine.numberParameters]+k, - engine.walkers[k]); - vmessage(""); - } - _for s (0, engine.numberSteps-1) { _for set (1, 2) { emceeDrawSet(engine, set); @@ -1233,20 +1258,6 @@ private define emceeLoop (ship, step, output) %{{{ ship.memberSend(); } - _for k (0, engine.numberWalkers-1) { - writecol(stdout, Int_Type[engine.numberParameters]+engine.id, - Int_Type[engine.numberParameters]+k, - engine.walkers[k]); - } - - if (engine.id==0) { - _for k (0, length(engine.walkers)-1) - writecol(stdout, Int_Type[engine.numberParameters]-1, - Int_Type[engine.numberParameters]+k, - engine.walkers[k]); - vmessage(""); - } - if (0 == engine.id) { leader = engine.leader; size = leader.writeBuffer.size; @@ -1372,7 +1383,7 @@ define emcee_hammer (steps) (initHandle, initOption) = emceeOption(qualifier("init", "uniform")); variable shipHandle, shipOption; - (shipHandle, shipOption) = emceeOption(qualifier("driver", "mpi")); + (shipHandle, shipOption) = emceeOption(qualifier("driver", "serial")); variable stepHandle, stepOption; (stepHandle, stepOption) = emceeOption(qualifier("step", "stretch")); -- GitLab From 43a7a806dea26e8fdefde1836f106ae3e1f5b610 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 22 Feb 2021 12:32:10 +0100 Subject: [PATCH 76/89] Serial driver was setting ID wrong There was no node responsible for writing... --- src/fitting/ensemble-samplers/emcee.sl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 48117f64..be2acd26 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -422,7 +422,7 @@ private define void () %{{{ private define emceeSerialInit (ship) %{{{ { ship.engine.numberEngines = 1; - ship.engine.id = 1; + ship.engine.id = 0; } %}}} %}}} -- GitLab From 4a981e0e050bfbd166a565b25235b3e13f1af2d5 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 23 Feb 2021 15:15:17 +0100 Subject: [PATCH 77/89] Emcee communication seems to work Added init methods for files Checked communication (hopefully) Remaining tasks: Check performance, add function initialization (how?) --- src/fitting/ensemble-samplers/emcee.sl | 87 +++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 9 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index be2acd26..77b075a0 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -241,6 +241,8 @@ private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ %\description % The initialization method can be set with the function string % "method;parameter" +% Initialization methods that read from file use the defined input +% method (default: fits). % % Available methods: % uniform : Draw initial walker positions from a uniform distribution @@ -253,15 +255,18 @@ private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ % % file : Load initial walkers from a valid chain file created by the % emcee method -% ; filename : The file to load +% +% chain : Draw initial walkers from an approximated CDF of an existing +% chain file. +% ; steps : [=10] The number of steps to concider for constructing the CDF +% (from the end of the chain) +% ; rng : [=&rand_uniform] uniform random number generator % %!%- % 1: pick - get walkers from parameters and distribution or file private variable EmceeInit = struct { pick, % function - filename, % the filename (if any) - % private data }; @@ -286,7 +291,6 @@ private define emceeInitUniform () %{{{ { variable init = struct { @EmceeInit }; init.pick = &emceeInitUniformPick; - init.filename = NULL; return init; } @@ -323,7 +327,6 @@ private define emceeInitGauss () %{{{ { variable init = struct { @EmceeInit, sigma }; init.pick = &emceeInitGaussPick; - init.filename = NULL; init.sigma = qualifier("sigma", 10); return init; @@ -331,7 +334,7 @@ private define emceeInitGauss () %{{{ %}}} EmceeInitRegister["gauss"] = &emceeInitGauss; -%{{{ file initialization function +%{{{ File initialization function private define fisher_yates (a, n) %{{{ { if (n>length(a)) @@ -350,6 +353,7 @@ private define emceeInitFilePick (init, engine) %{{{ variable file = engine.leader.inFile; variable walkerDistribution, readNumber; (walkerDistribution, readNumber) = file.read(engine, engine.totalNumberWalkers); + variable i,j; % randomize (and bootstrap if necessary) variable randomize = fisher_yates(walkerDistribution, readNumber); @@ -366,13 +370,77 @@ private define emceeInitFile () %{{{ { variable init = struct { @EmceeInit }; init.pick = &emceeInitFilePick; - init.filename = qualifier("filename"); return init; } %}}} EmceeInitRegister["file"] = &emceeInitFile; + +%{{{ Chain initialization function +define empiric_cdf_inverse (p, a, amin, amax) %{{{ +{ + variable s = array_sort(p); + + if (p[s][0]<0 || p[s][-1]>=1) + throw DomainError, "not in range 0<=p<1"; + + a = a[array_sort(a)]; + a = a[where(amin<=a<=amax)]; % restrict to cdf in range + + variable u = unique(a); + variable ecdf = [u/1./length(a), 1.]; + variable lo = [amin, a[u]]; + variable hi = [a[u], amax]; + + variable r = Double_Type[length(p)]; + variable k, i = 0; + variable m = .5*([(ecdf[[1:]]-ecdf[[:-2]])/(lo[[1:]]-lo[[:-2]]), 0.] + +[0., (ecdf[[1:]]-ecdf[[:-2]])/(hi[[1:]]-hi[[:-2]])]); + + variable hitsmin = (amin == a[0]); % gives NaN if true and p == 0 + _for k (0, length(p)-1) { + while (p[s[k]] > ecdf[i+1]) i++; + if (hitsmin && p[s[k]]==0) + r[s[k]] = amin; + else + r[s[k]] = (p[s[k]]-ecdf[i])/m[i]+lo[i]; + } + + return r; +} +%}}} + +private define emceeInitChainPick (init, engine) %{{{ +{ + variable file = engine.leader.inFile; + variable walkerDistribution, numberSteps; + (walkerDistribution, numberSteps) = file.read(engine, init.steps); + variable par = get_params(); + + variable parRand; + variable i,j; + _for i (0, engine.numberParameters-1) { + parRand = empiric_cdf_inverse(@(init.rng)(engine.totalNumberWalkers), + walkerDistribution[i], + par[i].min, + par[i].max); + _for j (0, engine.totalNumberWalkers-1) + engine.walkers[j][i] = parRand[j]; + } +} +%}}} +%}}} +private define emceeInitChain () %{{{ +{ + variable init = struct { @EmceeInit, rng, steps }; + init.pick = &emceeInitChainPick; + init.rng = qualifier("rng", &rand_uniform); + init.steps = qualifier("steps", 10); + + return init; +} %}}} +EmceeInitRegister["chain"] = &emceeInitChain; %{{{ Ship interface: %!%+ @@ -1062,7 +1130,7 @@ private define emceeFileFitsRead (file, engine, numberWalkers) %{{{ variable totalNumberRecords = fits_get_num_rows(file.handle); variable walkerDistribution; % this has to be an array of arrays with the parameter distribution in each () = _fits_read_cols(file.handle, - [2:numberParameters], + [3:numberParameters], max([0, totalNumberRecords-numberWalkers]), numberWalkers, &walkerDistribution); @@ -1340,7 +1408,8 @@ define emcee_hammer (steps) %\qualifiers{ % Basic % \qualifier{walkers}{[=10]: Number of walkers per parameter} -% \qualifier{continue}{: If given (and possible set to a file) continue chain from this file} +% \qualifier{continue}{: If given (and possible set to a file) continue chain from this file +% (using init="file", file="fits" per default)} % \qualifier{infile}{: Set the input file name for reading and continuing} % \qualifier{outfile}{: Set the output file name} % Advanced -- GitLab From d97147ed668a345045c0b0cae829bf35cceed393 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 24 Feb 2021 00:36:02 +0100 Subject: [PATCH 78/89] Fix fork driver issue close return value not catched, and setup was not done correctly --- src/fitting/ensemble-samplers/emcee.sl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 77b075a0..56a8c9cb 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -441,6 +441,7 @@ private define emceeInitChain () %{{{ } %}}} EmceeInitRegister["chain"] = &emceeInitChain; +%}}} %{{{ Ship interface: %!%+ @@ -564,13 +565,14 @@ private define emceeForkSetSail (ship) %{{{ variable pid, cid=0, t; variable flags; - _for cid (1, ship.tasks-1) { + _for t (1, ship.tasks-1) { (sockRead, sockWrite) = socketpair(AF_UNIX, SOCK_STREAM, 0); + cid++; pid = fork(); if (pid == -1) throw InternalError, sprintf("Unable to fork process %d", cid); else if (pid == 0) { - close(sockWrite); + () = close(sockWrite); ship.socket = sockRead; break; } else { % set master pipes @@ -579,7 +581,7 @@ private define emceeForkSetSail (ship) %{{{ % flags = fcntl_getfd(sockWrite); % fcntl_setfd(sockWrite, flags | O_NONBLOCK); ship.socket[cid] = sockWrite; - close(sockRead); + () = close(sockRead); } } @@ -697,9 +699,9 @@ private define emceeForkEnterHarbor (ship) %{{{ variable id; if (ship.engine.id == 0) { _for id (1, ship.engine.numberEngines-1) - close(ship.socket[id]); + () = close(ship.socket[id]); } else { - close(ship.socket); + () = close(ship.socket); exit(); } } @@ -1377,7 +1379,7 @@ private define emceeSetup (ship, steps, options) %{{{ if (0 == ship.engine.id) { if (options.continue) - options.output.open(engine); + options.output.open(engine); else options.output.create(engine); -- GitLab From 7c17cce66c5b7949c074bbb86b4252cd678a86ab Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 2 Jun 2021 23:26:11 +0200 Subject: [PATCH 79/89] Add emcee writout every 10 min If a model is very slow it might happen that we have just not enough output. This is not a problem if everything works just fine, but if we stop early we might not get a single evaluation of walkers written to output. To overcome this we add a write every 10 min from the last write. Maybe make the time interval an option? --- src/fitting/ensemble-samplers/emcee.sl | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 56a8c9cb..5047039c 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1309,8 +1309,10 @@ private define emceeOption (str) %{{{ private define emceeLoop (ship, step, output) %{{{ { variable engine = ship.engine; - variable s, j, set, cycle, leader, size; + variable s, j, set, cycle, leader, size, offset, timer; + offset = 0; + tic; % start timer _for s (0, engine.numberSteps-1) { _for set (1, 2) { emceeDrawSet(engine, set); @@ -1327,11 +1329,12 @@ private define emceeLoop (ship, step, output) %{{{ else ship.memberSend(); } + timer = toc; % get elapsed time if (0 == engine.id) { leader = engine.leader; size = leader.writeBuffer.size; - cycle = s mod leader.writeBuffer.cycle; + cycle = (s-offset) mod leader.writeBuffer.cycle; % write to buffer _for j (0, engine.totalNumberWalkers-1) { leader.writeBuffer.walkers[j+cycle*engine.totalNumberWalkers][*] = @(engine.walkers[j]); @@ -1339,8 +1342,15 @@ private define emceeLoop (ship, step, output) %{{{ leader.writeBuffer.update[j+cycle*engine.totalNumberWalkers] = engine.update[j]; } - if (cycle == (leader.writeBuffer.cycle-1)) + if (cycle == (leader.writeBuffer.cycle-1)) { output.write(engine, size); + offset = 0; % we write full buffer, so no offset at all + tic; % restart timer + } else if (timer >= 6e2) { % write every 10 min + output.write(engine, (cycle+1)*engine.totalNumberWalkers); + offset = cycle mod leader.writeBuffer.cycle; + tic; % restart timer + } } } -- GitLab From 5a6c0797b413da874e77fa26c0374edbacc491e1 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Fri, 4 Jun 2021 16:24:01 +0200 Subject: [PATCH 80/89] Halfway par;par for emcee For more flexibility and compatibility with mikes code --- src/fitting/ensemble-samplers/emcee.sl | 1607 +++++++++++++----------- 1 file changed, 851 insertions(+), 756 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 5047039c..8f418d79 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -233,992 +233,1080 @@ private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ %}}} %}}} -%{{{ Init interface +%{{{ File interface: %!%+ -%\function{emcee--init} -%\synopsis{Set emcee initialization function} -%\usage{init="method;parameters";} +%\function{emcee--file} +%\synopsis{Set emcee file input and output methods} +%\usage{input="method;options" +% \altusage{output="method;options"}} %\description -% The initialization method can be set with the function string +% The file input/output methods can be set with the function string % "method;parameter" -% Initialization methods that read from file use the defined input -% method (default: fits). % % Available methods: -% uniform : Draw initial walker positions from a uniform distribution -% within the parameter ranges. -% -% gauss : Draw initial walker positions from a gaussian distribution -% within parameter ranges. -% ; sigma : [=10.] Sigma of the gauss function in terms of the -% parameter range. -% -% file : Load initial walkers from a valid chain file created by the -% emcee method -% -% chain : Draw initial walkers from an approximated CDF of an existing -% chain file. -% ; steps : [=10] The number of steps to concider for constructing the CDF -% (from the end of the chain) -% ; rng : [=&rand_uniform] uniform random number generator +% fits : Fits file interface to write the chain as fits table extension +% ; filename : [emcee-.fits] The input/output file name. +% ; parameter : If given, on read we draw new starting positions from the +% parameter settings stored in the file instead of reading +% the last iterations. % +% par : Parameter file interface to draw initial walkers from parameter +% files. +% ; filename : [emcee-.fits] Multiple parameter files can be separated +% by a semi colon with an optional additional multiplier +% (separated by a colon). A string of the form +% 'file1.par:2;file2.par' means that 2/3 of all walkers are +% drawn from file1.par and 1/3 is drawn from file2.par. +% Files are processed first to last, last files are ignored if +% all walkers have been determined. %!%- -% 1: pick - get walkers from parameters and distribution or file -private variable EmceeInit = struct { - pick, % function +% 1: create - open new file pointer and write necessary intial values +% 2: open - open existing file for read/write +% 3: read - open file and return n walkers and how many walkers were used +% 4: write - write cycle steps to the file (n) +% 5: close - close open file at end +private variable EMCEE_FILE_READ = 0x1, EMCEE_FILE_WRITE=0x2, EMCEE_FILE_RANGE = 0x4; +private variable EmceeFile = struct { + create, % function + open, % function + read, % function + write, % function + close, % function - % private data + mode = 0, % 1 read, 2 write, 4 range bit (read parameter range instead of position) + handle, % file handle + filename, % full file name + cycle, % number of steps before file gets written + + % additional private data }; -%{{{ Uniform initialization function -% pick random parameter values within the boundaries -private define emceeInitUniformPick (init, engine) %{{{ +%{{{ Fits file functions + +% Create function %{{{ +private define __emceeFitsWriteT1(handle, engine) %{{{ { - variable i; + variable dataInfo; + list_data(&dataInfo); + variable par = __parameters(engine.fit.object); - variable numParameter = length(par.value); + variable params = get_params(); + variable numberTotalParams = length(params); - % throw an error on unspecified bounds - if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) - throw UsageError, "Some parameters have unspecified parameter ranges, unable to set inital walkers"; + %variable parNames = array_map(String_Type, &get_struct_field, get_params(), "name")[par.index-1]; - _for i (0, engine.totalNumberWalkers-1) - engine.walkers[i] = rand_uniform(numParameter)*(par.max-par.min)+par.min; -} -%}}} -%}}} -private define emceeInitUniform () %{{{ -{ - variable init = struct { @EmceeInit }; - init.pick = &emceeInitUniformPick; +% fits_create_binary_table(handle, "PARAMETERS", num_free_params(), +% ["FREE_PAR", "FREE_PAR_NAME"], +% ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, parNames)))], +% [" parameter indices", " parameter names"]); + variable paramsTable = struct { + name=String_Type[numberTotalParams], + index=Int_Type[numberTotalParams], + value=Double_Type[numberTotalParams], + min=Double_Type[numberTotalParams], + max=Double_Type[numberTotalParams], + hard_min=Double_Type[numberTotalParams], + hard_max=Double_Type[numberTotalParams], + freeze=Int_Type[numberTotalParams], + tie=String_Type[numberTotalParams], + units=String_Type[numberTotalParams], + fun=String_Type[numberTotalParams], + free=Int_Type[numberTotalParams], % combines freeze, fun and tie + }; + variable j; + _for j (0, numberTotalParams-1) { + paramsTable.name[j] = params[j].name; + paramsTable.index[j] = params[j].index; + paramsTable.value[j] = params[j].value; + paramsTable.min[j] = params[j].min; + paramsTable.max[j] = params[j].max; + paramsTable.hard_min[j] = params[j].hard_min; + paramsTable.hard_max[j] = params[j].hard_max; + paramsTable.freeze[j] = params[j].freeze; + paramsTable.tie[j] = (params[j].tie == NULL) ? "" : params[j].tie; + paramsTable.units[j] = params[j].units; + paramsTable.fun[j] = (params[j].fun == NULL) ? "" : params[j].fun; + paramsTable.free[j] = (not params[j].freeze) and (params[j].fun == NULL) and (params[j].tie == NULL); + } + fits_write_binary_table(handle, "PARAMETERS", paramsTable); - return init; -} -%}}} -EmceeInitRegister["uniform"] = &emceeInitUniform; + fits_update_key(handle, "MODEL", get_fit_fun(), "model function"); + fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + fits_update_key(handle, "SLOPPY", 0, " sloppy level"); -%{{{ Gauss initialization function -private define rand_gauss_cut (sigma, v, bmin, bmax) %{{{ -{ - variable upper = Real(cerf((bmax-v)/sqrt(2.)/sigma)); - variable lower = Real(cerf((bmin-v)/sqrt(2.)/sigma)); + array_map(&fits_write_comment, handle, strchop(dataInfo, '\n', 0)); - return sqrt(2)*erfinv(rand_uniform(length(v))*(upper-lower)+lower)*sigma+v; + % sort to index order here +% if (_fits_write_col(handle, fits_get_colnum(handle, "FREE_PAR"), 1, 1, par.index[array_sort(par.index)]) +% && _fits_write_col(handle, fits_get_colnum(handle, "FREE_PAR_NAME"), 1, 1, parNames)) +% throw IOError; } %}}} -private define emceeInitGaussPick (init, engine) %{{{ +private define __emceeFitsWriteT2(handle, engine) %{{{ { - variable i,w; variable par = __parameters(engine.fit.object); - variable numParameter = length(par.value); - - % throw an error on unspecified bounds - if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) - throw UsageError, "Some parameters have unspecified parameter ranges, unable to set initial walkers"; - - variable sigma = (par.max-par.min)/init.sigma; - _for i (0, engine.totalNumberWalkers-1) - engine.walkers[i] = rand_gauss_cut(sigma, par.value, par.min, par.max); -} -%}}} -%}}} -private define emceeInitGauss () %{{{ -{ - variable init = struct { @EmceeInit, sigma }; - init.pick = &emceeInitGaussPick; - init.sigma = qualifier("sigma", 10); - return init; + fits_create_binary_table(handle, "MCMCCHAIN", 0, + ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], + ["D", "J", ["D"][par.index*0]], + [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); + fits_update_key(handle, "NWALKERS", engine.totalNumberWalkers/engine.numberParameters, " Number of walkers per free parameter"); + fits_update_key(handle, "NFREEPAR", engine.numberParameters, " Number of free parameters"); + fits_update_key(handle, "NSTEPS", engine.numberSteps, " Numer of iteration steps done"); } %}}} -EmceeInitRegister["gauss"] = &emceeInitGauss; -%{{{ File initialization function -private define fisher_yates (a, n) %{{{ +private define __emceeFitsWriteT3(handle, engine) %{{{ { - if (n>length(a)) - return rand_int(0, length(a)-1, n); - - variable p = [length(a)-n:length(a)-1]; - variable j; - _for j (0, n-2) - array_swap(p, j, j+rand_int(0, n-1)); - return p; + fits_create_binary_table(handle, "CHAINSTATS", 0, + ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], + [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); + fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); } %}}} -private define emceeInitFilePick (init, engine) %{{{ -{ - variable file = engine.leader.inFile; - variable walkerDistribution, readNumber; - (walkerDistribution, readNumber) = file.read(engine, engine.totalNumberWalkers); - - variable i,j; - % randomize (and bootstrap if necessary) - variable randomize = fisher_yates(walkerDistribution, readNumber); - variable parV = Double_Type[engine.numberParameters]; - _for i (0, length(engine.walkers)-1) { - _for j (0, length(parV)-1) - parV[j] = walkerDistribution[randomize[i]][j]; - engine.walkers[i] = @parV; - } -} -%}}} -%}}} -private define emceeInitFile () %{{{ +private define emceeFileFitsCreate (file, engine) %{{{ { - variable init = struct { @EmceeInit }; - init.pick = &emceeInitFilePick; - - return init; -} -%}}} -EmceeInitRegister["file"] = &emceeInitFile; + file.mode |= EMCEE_FILE_WRITE; + file.mode &= ~EMCEE_FILE_READ; -%{{{ Chain initialization function -define empiric_cdf_inverse (p, a, amin, amax) %{{{ -{ - variable s = array_sort(p); + % Create fits file and write headers + file.handle = fits_open_file(file.filename, "c"); - if (p[s][0]<0 || p[s][-1]>=1) - throw DomainError, "not in range 0<=p<1"; + % write first table + __emceeFitsWriteT1(file.handle, engine); - a = a[array_sort(a)]; - a = a[where(amin<=a<=amax)]; % restrict to cdf in range + % write second table + __emceeFitsWriteT2(file.handle, engine); - variable u = unique(a); - variable ecdf = [u/1./length(a), 1.]; - variable lo = [amin, a[u]]; - variable hi = [a[u], amax]; + % write third table + %__emceeFitsWriteT3(file.handle, engine); - variable r = Double_Type[length(p)]; - variable k, i = 0; - variable m = .5*([(ecdf[[1:]]-ecdf[[:-2]])/(lo[[1:]]-lo[[:-2]]), 0.] - +[0., (ecdf[[1:]]-ecdf[[:-2]])/(hi[[1:]]-hi[[:-2]])]); + % move back to chain table + () = _fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); - variable hitsmin = (amin == a[0]); % gives NaN if true and p == 0 - _for k (0, length(p)-1) { - while (p[s[k]] > ecdf[i+1]) i++; - if (hitsmin && p[s[k]]==0) - r[s[k]] = amin; - else - r[s[k]] = (p[s[k]]-ecdf[i])/m[i]+lo[i]; - } + % set write cycle + () = _fits_get_rowsize(file.handle, &(file.cycle)); + file.cycle = file.cycle/engine.totalNumberWalkers; + if (file.cycle < 1) + file.cycle = 1; - return r; + % fits routine customs + file.numberSteps = 0; + file.sloppy = 0; } %}}} +%}}} -private define emceeInitChainPick (init, engine) %{{{ +% Open function %{{{ + +private define __emceeFitsReadChecks (file, engine) %{{{ { - variable file = engine.leader.inFile; - variable walkerDistribution, numberSteps; - (walkerDistribution, numberSteps) = file.read(engine, init.steps); - variable par = get_params(); + variable handle = file.handle; + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } - variable parRand; - variable i,j; - _for i (0, engine.numberParameters-1) { - parRand = empiric_cdf_inverse(@(init.rng)(engine.totalNumberWalkers), - walkerDistribution[i], - par[i].min, - par[i].max); - _for j (0, engine.totalNumberWalkers-1) - engine.walkers[j][i] = parRand[j]; + if (fits_read_key(handle, "MODEL") != get_fit_fun()) { + fits_close_file(handle); + handle = NULL; + throw IsisError, "Current model and chain model do not match"; } -} -%}}} -%}}} -private define emceeInitChain () %{{{ -{ - variable init = struct { @EmceeInit, rng, steps }; - init.pick = &emceeInitChainPick; - init.rng = qualifier("rng", &rand_uniform); - init.steps = qualifier("steps", 10); - return init; + variable tab = fits_read_table(handle); + ifnot (struct_field_exists(tab, "free") + || struct_field_exists(tab, "value")) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + variable par = __parameters(engine.fit.object); + if ((length(where(tab.free)) != num_free_params()) + || any(tab.index[where(tab.free)] != par.index[array_sort(par.index)])) { + fits_close_file(handle); + handle = NULL; + throw UsageError, "Free parameters and chain parameters differ"; + } } %}}} -EmceeInitRegister["chain"] = &emceeInitChain; -%}}} -%{{{ Ship interface: -%!%+ -%\function{emcee--driver} -%\synopsis{Set emcee parallel computation method} -%\usage{driver="method;options"} -%\description -% The driver method can be set with the function string -% "method;parameter" -% -% Available methods: -% serial : The serial driver. No parallelization at all -% -% fork : The fork (& socket) parallel driver. Per default uses -% _num_cpus many tasks. -% ; tasks : [=_num_cpus] Number of total processes used -% -% mpi : The mpi parallel driver using as many nodes as registered -% in an mpi environment -%!%- -% 1: setSail - set id for engines and how many there are -% 2: leader_send - leader sends to members -% 3: member_send - members send to leader -% 4: leader_receive - leader receives members -% 5: member_receive - member receive leader -% 6: enterHarbor - cleanup if necessary -private variable EmceeShip = struct { - setSail, % function - leaderSend, % function - memberSend, % function - leaderReceive, % function - memberReceive, % function - enterHarbor, % function +private define __emceeFitsWriteChecks (file, engine) %{{{ +{ + variable handle = file.handle; + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } - engine, % the working horse + if ((fits_read_key(handle, "STATISTIC") != get_fit_statistic()) && (file.sloppy<2)) { + fits_close_file(handle); + handle = NULL; + throw UsageError, "Current fit statistic and chain fit statistic differ, increase sloppy level (at least 2) to continue anyway"; + } + fits_update_key(handle, "STATISTIC", get_fit_statistic()); - % private data -}; + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } -%{{{ serial ship functions -private define void () %{{{ -{ - variable args = __pop_list(_NARGS); + if ((fits_read_key(handle, "NWALKERS")*fits_read_key(handle, "NFREEPAR")) != length(engine.walkers) + && (handle.sloppy<1)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Number of walkers differs from number used in chain file, increase sloppy level to continue"; + } } %}}} -private define emceeSerialInit (ship) %{{{ -{ - ship.engine.numberEngines = 1; - ship.engine.id = 0; -} -%}}} -%}}} -private define emceeShipSerial () %{{{ +private define emceeFileFitsOpen (file, engine) %{{{ { - variable ship = struct { @EmceeShip }; - ship.setSail = &emceeSerialInit; - ship.leaderSend = &void; - ship.memberSend = &void; - ship.leaderReceive = &void; - ship.memberReceive = &void; - ship.enterHarbor = &void; + file.mode |= EMCEE_FILE_READ | EMCEE_FILE_WRITE; - return ship; -} -%}}} -EmceeShipRegister["serial"] = &emceeShipSerial; + file.handle = fits_open_file(file.filename, "w"); -%{{{ fork ship functions -private define elementType (t) %{{{ -{ - switch (t) - { case Char_Type: "c"; } - { case UChar_Type: "C"; } - { case Short_Type: "h"; } - { case UShort_Type: "H"; } - { case Int_Type: "i"; } - { case UInt_Type: "I"; } - { case Long_Type: "l"; } - { case ULong_Type: "L"; } - { case LLong_Type: "m"; } - { case ULLong_Type: "M"; } - { case Int16_Type: "j"; } - { case UInt16_Type: "J"; } - { case Int32_Type: "k"; } - { case UInt32_Type: "K"; } - { case Int64_Type: "q"; } - { case UInt64_Type: "Q"; } - { case Float_Type: "f"; } - { case Double_Type: "d"; } - { case Float32_Type: "F"; } - { case Float64_Type: "D"; } - { case String_Type: "s"; } - { case Null_Type: "x"; } -} -%}}} + __emceeFitsReadChecks(file, engine); + __emceeFitsWriteChecks(file, engine); -private define writeArray (fp, array) %{{{ -{ - variable bytes, msg, fmt; - fmt = sprintf("%s%d", elementType(_typeof(array)), length(array)); - msg = pack(fmt, array); - bytes = write(fp, msg); - return bstrlen(msg)-bytes; -} -%}}} + if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(file.handle); + file.handle = NULL; + throw IOError, "Not a emcee chain file"; + } -private define readArray (fp, array) %{{{ -{ - variable i, bytes, msg, fmt; - fmt = sprintf("%s%d", elementType(_typeof(array)), length(array)); - bytes = read(fp, &msg, sizeof_pack(fmt)); - array[*] = unpack(fmt, msg); - return bstrlen(msg)-bytes; + () = _fits_get_rowsize(file.handle, &(file.cycle)); + file.cycle = file.cycle/length(engine.walkers); + if (file.cycle < 1) + file.cycle = 1; + + file.numberSteps = fits_get_num_rows(file.handle); } %}}} +%}}} -private define emceeForkSetSail (ship) %{{{ +% Read function %{{{ +private define emceeFileFitsRead (file, engine, numberWalkers) %{{{ { - variable sockRead, sockWrite; - variable pid, cid=0, t; - variable flags; + file.mode |= EMCEE_FILE_READ; + file.mode &= ~EMCEE_FILE_WRITE; - _for t (1, ship.tasks-1) { - (sockRead, sockWrite) = socketpair(AF_UNIX, SOCK_STREAM, 0); - cid++; - pid = fork(); - if (pid == -1) - throw InternalError, sprintf("Unable to fork process %d", cid); - else if (pid == 0) { - () = close(sockWrite); - ship.socket = sockRead; - break; - } else { % set master pipes - if (NULL == ship.socket) - ship.socket = FD_Type[ship.tasks]; -% flags = fcntl_getfd(sockWrite); -% fcntl_setfd(sockWrite, flags | O_NONBLOCK); - ship.socket[cid] = sockWrite; - () = close(sockRead); + file.handle = fits_open_file(file.filename, "r"); + + __emceeFitsReadChecks(file, engine); + + if (file.mode & EMCEE_FILE_RANGE) { + if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { + fits_close_file(file.handle); + file.handle = NULL; + throw IOError, "Not a emcee chain file"; } - } - ship.engine.id = (pid == 0) ? cid : 0; - ship.engine.numberEngines = ship.tasks; + variable params = fits_read_table(file.handle); + return struct { + name = params.name, + value = params.value, + min = params.min, + max = params.max + }, numberWalkers; + } else { + if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(file.handle); + file.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + variable totalNumberWalkers = fits_read_key(file.handle, "NWALKERS") + *fits_read_key(file.handle, "NFREEPAR"); + variable numberParameters = fits_get_num_cols(file.handle); + variable totalNumberRecords = fits_get_num_rows(file.handle); + variable walkerDistribution; % this has to be an array of arrays with the parameter distribution in each + () = _fits_read_cols(file.handle, + [3:numberParameters], + max([0, totalNumberRecords-numberWalkers]), + numberWalkers, + &walkerDistribution); + + fits_close_file(file.handle); + + return walkerDistribution, totalNumberWalkers; + } } %}}} +%}}} -private define emceeForkLeaderSend (ship) %{{{ +% Write function %{{{ +private define emceeFileFitsWrite (file, engine, numberWalkersSteps) %{{{ { - variable walkers, - pivots, - rolls; - variable engine = ship.engine; - variable totalOffset = engine.leader.totalOffset; - variable walkersPerSet = engine.leader.walkersPerSet; + if (numberWalkersSteps > engine.leader.writeBuffer.size) + throw InternalError, "Trying to write more than accessible"; + + variable par = __parameters(engine.fit.object); + variable npar = engine.numberParameters; + %variable walkersPerCycle = engine.leader.writeBuffer.size; % total_walkers*steps_per_cycle variable i,j; - variable firstIndex = walkersPerSet[0]; - _for i (1, engine.numberEngines-1) { - % set the walkers for node i - walkers = engine.walkers[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; - % pick the pivots for node i - pivots = engine.pivots[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; - % set the randoms for node i - rolls = engine.rolls[[0:walkersPerSet[i]*engine.gears.step.numberRandoms-1] - +(firstIndex+totalOffset)*engine.gears.step.numberRandoms]; + variable firstIndex = fits_get_num_rows(file.handle)+1; % first index of this cycle + variable parCycle = Double_Type[numberWalkersSteps]; - _for j (0, walkersPerSet[i]-1, 1) { - () = writeArray(ship.socket[i], walkers[j]); - () = writeArray(ship.socket[i], pivots[j]); - } - () = writeArray(ship.socket[i], rolls); - firstIndex += walkersPerSet[i]; + _for j (0, npar-1, 1) { + _for i (0, numberWalkersSteps-1, 1) + parCycle[i] = engine.leader.writeBuffer.walkers[i][j]; + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, sprintf("CHAINS%d", par.index[j])), + firstIndex, + 1, + parCycle); } + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, "FITSTAT"), + firstIndex, + 1, + engine.leader.writeBuffer.stat[[:numberWalkersSteps-1]]); + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, "UPDATE"), + firstIndex, + 1, + engine.leader.writeBuffer.update[[:numberWalkersSteps-1]]); } %}}} +%}}} -private define emceeForkMemberSend (ship) %{{{ +% Close function %{{{ +private define emceeFileFitsClose (file, engine) %{{{ { - variable engine = ship.engine; - variable setOffset = engine.setOffset; - variable setLength = engine.setLength; - + % todo: write fitstat table + variable nHDUs = fits_get_num_hdus(file.handle); variable i; - _for i (0, setLength-1, 1) - () = writeArray(ship.socket, engine.walkers[i+setOffset]); - () = writeArray(ship.socket, engine.update[[0:setLength-1]+setOffset]); - () = writeArray(ship.socket, engine.stat[[0:setLength-1]+setOffset]); + if (file.mode) { + _for i (1, nHDUs) { + () = _fits_movabs_hdu(file.handle, i); + fits_write_chksum(file.handle); + } + } + + fits_close_file(file.handle); } %}}} - -private define emceeForkLeaderReceive (ship) %{{{ +%}}} +%}}} +private define emceeFileFits () %{{{ { - variable walker, - stat, - update; - - variable engine = ship.engine; - variable totalOffset = engine.leader.totalOffset; - variable walkersPerSet = engine.leader.walkersPerSet; + variable file = struct { @EmceeFile, numberSteps, sloppy }; + file.create = &emceeFileFitsCreate; + file.open = &emceeFileFitsOpen; + file.read = &emceeFileFitsRead; + file.write = &emceeFileFitsWrite; + file.close = &emceeFileFitsClose; - variable i,j; - variable firstIndex = walkersPerSet[0]; % skip master walkers + file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.fits")); + file.mode |= qualifier_exists("parameter") ? EMCEE_FILE_RANGE : 0; + file.cycle = 1; - walker = Double_Type[engine.numberParameters]; - _for i (1, engine.numberEngines-1) { - stat = Double_Type[walkersPerSet[i]]; - update = Int_Type[walkersPerSet[i]]; + file.numberSteps = 0; + file.sloppy = 0; - _for j (0, walkersPerSet[i]-1, 1) { - () = readArray(ship.socket[i], walker); - engine.walkers[j+firstIndex+totalOffset][*] = walker; - } + return file; +} +%}}} +EmceeFileRegister["fits"] = &emceeFileFits; - () = readArray(ship.socket[i], update); - () = readArray(ship.socket[i], stat); +%{{{ Par file functions - engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; - engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; +%{{{ create function +%}}} - firstIndex += walkersPerSet[i]; - } -} +%{{{ open function %}}} -private define emceeForkMemberReceive (ship) %{{{ +%{{{ read function +private define emceeFileParRead (file, engine, numberWalkers) %{{{ { - variable rolls, - param; - - variable engine = ship.engine; - variable setOffset = engine.setOffset; - variable setLength = engine.setLength; - variable nRolls = engine.gears.step.numberRandoms; - - variable j; - rolls = Double_Type[setLength*nRolls]; - param = Double_Type[engine.numberParameters]; - _for j (0, setLength-1, 1) { - () = readArray(ship.socket, param); - engine.walkers[j+setOffset][*] = param; - () = readArray(ship.socket, param); - engine.pivots[j+setOffset][*] = param; + % we read a number of par files (eventually with multipliers) + variable file_list = strchop(file.filename, ';', 0); + variable weights = UInt_Type[length(file_list)]; + variable i, tmp; + + _for i (0, length(file_list)-1) { + tmp = strchop(file_list[i], ':', 0); + if (length(tmp)>1) weights[i] = atoi(tmp[1]); % zero, if error + else weights[i] = 1; } - () = readArray(ship.socket, rolls); - engine.rolls[[0:setLength*nRolls-1]+setOffset*nRolls] = @rolls; + } %}}} +%}}} -private define emceeForkEnterHarbor (ship) %{{{ -{ - variable id; - if (ship.engine.id == 0) { - _for id (1, ship.engine.numberEngines-1) - () = close(ship.socket[id]); - } else { - () = close(ship.socket); - exit(); - } -} +%{{{ write function %}}} + +%{{{ close function %}}} -private define emceeShipFork () %{{{ -{ - variable ship = struct { @EmceeShip, socket, tasks }; - ship.setSail = &emceeForkSetSail; - ship.leaderSend = &emceeForkLeaderSend; - ship.memberSend = &emceeForkMemberSend; - ship.leaderReceive = &emceeForkLeaderReceive; - ship.memberReceive = &emceeForkMemberReceive; - ship.enterHarbor = &emceeForkEnterHarbor; - ship.tasks = qualifier("tasks", _num_cpus()); - return ship; -} %}}} -EmceeShipRegister["fork"] = &emceeShipFork; -%{{{ MPI Ship functions -private define emceeMPISetSail (ship) %{{{ +private define emceeFilePar () %{{{ { - variable engine = ship.engine; - engine.id = rcl_mpi_init(); - engine.numberEngines = rcl_mpi_numtasks(); - rcl_init_mpi_request(engine.numberEngines); + variable file = struct { @EmceeFile }; + file.create = &NULL; + file.open = &NULL; + file.read = &emceeFileParRead; + file.write = &NULL; + file.close = &NULL; + + file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.fits")); + % there is no other mode + file.mode = EMCEE_FILE_READ | EMCEE_FILE_RANGE; + file.cycle = 1; + + return file; } %}}} +EmceeFileRegister["par"] = &emceeFilePar; +%}}} +%}}} -private define emceeMPILeaderSend (ship) %{{{ +%{{{ Init interface +%!%+ +%\function{emcee--init} +%\synopsis{Set emcee initialization function} +%\usage{init="method;parameters";} +%\description +% The initialization method can be set with the function string +% "method;parameter" +% Initialization methods that read from file use the defined input +% method (default: fits). +% +% Available methods: +% uniform : Draw initial walker positions from a uniform distribution +% within the parameter ranges. +% +% gauss : Draw initial walker positions from a gaussian distribution +% within parameter ranges. +% ; sigma : [=10.] Sigma of the gauss function in terms of the +% parameter range. +% +% file : Load initial walkers from a valid chain file created by the +% emcee method +% +% chain : Draw initial walkers from an approximated CDF of an existing +% chain file. +% ; steps : [=10] The number of steps to concider for constructing the CDF +% (from the end of the chain) +% ; rng : [=&rand_uniform] uniform random number generator +% +%!%- +% 1: pick - get walkers from parameters and distribution or file +private variable EmceeInit = struct { + pick, % function + + % private data +}; + +%{{{ Uniform initialization function +% pick random parameter values within the boundaries +private define emceeInitUniformPick (init, engine) %{{{ { -#ifexists rcl_mpi_init - variable walkers, - pivots, - rolls; - variable engine = ship.engine; - variable totalOffset = engine.leader.totalOffset; - variable walkersPerSet = engine.leader.walkersPerSet; + variable i; + variable par = __parameters(engine.fit.object); + variable numParameter = length(par.value); - variable i,j; - variable firstIndex = walkersPerSet[0]; % skip master walkers + % throw an error on unspecified bounds + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges, unable to set inital walkers"; - _for i (1, engine.numberEngines-1) { % loop over the slave nodes and send relevant data - % set the walkers for node i - walkers = engine.walkers[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; - % pick the pivots for node i - pivots = engine.pivots[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; - % set the randoms for node i - rolls = engine.rolls[[0:walkersPerSet[i]*engine.gears.step.numberRandoms-1] - +(firstIndex+totalOffset)*engine.gears.step.numberRandoms]; + _for i (0, engine.totalNumberWalkers-1) + engine.walkers[i] = rand_uniform(numParameter)*(par.max-par.min)+par.min; +} +%}}} +%}}} +private define emceeInitUniform () %{{{ +{ + variable init = struct { @EmceeInit }; + init.pick = &emceeInitUniformPick; - _for j (0, walkersPerSet[i]-1, 1) { - () = rcl_mpi_org_isend_double(walkers[j], length(walkers[j]), i, 0); % send current walkers with tag 0 - () = rcl_mpi_org_isend_double(pivots[j], length(pivots[j]), i, 1); % send pivots from other set with tag 1 - } + return init; +} +%}}} +EmceeInitRegister["uniform"] = &emceeInitUniform; - () = rcl_mpi_org_isend_double(rolls, length(rolls), i, 2); % send random numbers with tag 2 - firstIndex += walkersPerSet[i]; - } -#endif +%{{{ Gauss initialization function +private define rand_gauss_cut (sigma, v, bmin, bmax) %{{{ +{ + variable upper = Real(cerf((bmax-v)/sqrt(2.)/sigma)); + variable lower = Real(cerf((bmin-v)/sqrt(2.)/sigma)); + + return sqrt(2)*erfinv(rand_uniform(length(v))*(upper-lower)+lower)*sigma+v; } %}}} -private define emceeMPILeaderReceive (ship) %{{{ +private define emceeInitGaussPick (init, engine) %{{{ { -#ifexists rcl_mpi_init - variable walker, - stat, - update; - - variable engine = ship.engine; - variable totalOffset = engine.leader.totalOffset; - variable walkersPerSet = engine.leader.walkersPerSet; - - variable i,j; - variable firstIndex = walkersPerSet[0]; % skip master walkers + variable i,w; + variable par = __parameters(engine.fit.object); + variable numParameter = length(par.value); - walker = Double_Type[engine.numberParameters]; - _for i (1, engine.numberEngines-1) { - stat = Double_Type[walkersPerSet[i]]; - update = Int_Type[walkersPerSet[i]]; + % throw an error on unspecified bounds + if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) + throw UsageError, "Some parameters have unspecified parameter ranges, unable to set initial walkers"; - _for j (0, walkersPerSet[i]-1, 1) { - () = rcl_mpi_org_recv_double(walker, length(walker), i, i); - engine.walkers[j+firstIndex+totalOffset][*] = walker; - } + variable sigma = (par.max-par.min)/init.sigma; + _for i (0, engine.totalNumberWalkers-1) + engine.walkers[i] = rand_gauss_cut(sigma, par.value, par.min, par.max); +} +%}}} +%}}} +private define emceeInitGauss () %{{{ +{ + variable init = struct { @EmceeInit, sigma }; + init.pick = &emceeInitGaussPick; + init.sigma = qualifier("sigma", 10); - () = rcl_mpi_org_recv_int(update, length(update), i, i); - () = rcl_mpi_org_recv_double(stat, length(stat), i, i); + return init; +} +%}}} +EmceeInitRegister["gauss"] = &emceeInitGauss; - engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; - engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; +%{{{ File initialization function +private define fisher_yates (a, n) %{{{ +{ + if (n>length(a)) + return rand_int(0, length(a)-1, n); - firstIndex += walkersPerSet[i]; - } -#endif + variable p = [length(a)-n:length(a)-1]; + variable j; + _for j (0, n-2) + array_swap(p, j, j+rand_int(0, n-1)); + return p; } %}}} -private define emceeMPIMemberSend (ship) %{{{ +private define emceeInitFilePick (init, engine) %{{{ { -#ifexists rcl_mpi_init - variable engine = ship.engine; - variable setOffset = engine.setOffset; - variable setLength = engine.setLength; + variable file = engine.leader.inFile; + variable walkerDistribution, readNumber; + (walkerDistribution, readNumber) = file.read(engine, engine.totalNumberWalkers); - variable i; - _for i (0, setLength-1, 1) { - () = rcl_mpi_org_isend_double(engine.walkers[i+setOffset], - length(engine.walkers[0]), 0, engine.id); + variable i,j; + % randomize (and bootstrap if necessary) + variable randomize = fisher_yates(walkerDistribution, readNumber); + variable parV = Double_Type[engine.numberParameters]; + _for i (0, length(engine.walkers)-1) { + _for j (0, length(parV)-1) + parV[j] = walkerDistribution[randomize[i]][j]; + engine.walkers[i] = @parV; } +} +%}}} +%}}} +private define emceeInitFile () %{{{ +{ + variable init = struct { @EmceeInit }; + init.pick = &emceeInitFilePick; - () = rcl_mpi_org_isend_int(engine.update[[0:setLength-1]+setOffset], setLength, 0, engine.id); - () = rcl_mpi_org_isend_double(engine.stat[[0:setLength-1]+setOffset], setLength, 0, engine.id); -#endif + return init; } %}}} +EmceeInitRegister["file"] = &emceeInitFile; -private define emceeMPIMemberReceive (ship) %{{{ +%{{{ Chain initialization function +define empiric_cdf_inverse (p, a, amin, amax) %{{{ { -#ifexists rcl_mpi_init - variable rolls; - variable engine = ship.engine; - variable setOffset = engine.setOffset; - variable setLength = engine.setLength; - variable nRolls = engine.gears.step.numberRandoms; + variable s = array_sort(p); - variable j; - rolls = Double_Type[setLength*nRolls]; - _for j (0, setLength-1, 1) { - () = rcl_mpi_org_recv_double(engine.walkers[j+setOffset], - length(engine.walkers[j+setOffset]), 0, 0); % receive walkers (tag 0) - () = rcl_mpi_org_recv_double(engine.pivots[j+setOffset], - length(engine.pivots[j+setOffset]), 0, 1); % receive pivot points (tag 1) + if (p[s][0]<0 || p[s][-1]>=1) + throw DomainError, "not in range 0<=p<1"; + + a = a[array_sort(a)]; + a = a[where(amin<=a<=amax)]; % restrict to cdf in range + + variable u = unique(a); + variable ecdf = [u/1./length(a), 1.]; + variable lo = [amin, a[u]]; + variable hi = [a[u], amax]; + + variable r = Double_Type[length(p)]; + variable k, i = 0; + variable m = .5*([(ecdf[[1:]]-ecdf[[:-2]])/(lo[[1:]]-lo[[:-2]]), 0.] + +[0., (ecdf[[1:]]-ecdf[[:-2]])/(hi[[1:]]-hi[[:-2]])]); + + variable hitsmin = (amin == a[0]); % gives NaN if true and p == 0 + _for k (0, length(p)-1) { + while (p[s[k]] > ecdf[i+1]) i++; + if (hitsmin && p[s[k]]==0) + r[s[k]] = amin; + else + r[s[k]] = (p[s[k]]-ecdf[i])/m[i]+lo[i]; } - () = rcl_mpi_org_recv_double(rolls, - setLength*engine.gears.step.numberRandoms, 0, 2); % receive random numbers (tag 2) - engine.rolls[[0:setLength*nRolls-1]+setOffset*nRolls] = @rolls; -#endif + return r; } %}}} -private define emceeMPIEnterHarbor (ship) %{{{ +private define emceeInitChainPick (init, engine) %{{{ { - + variable file = engine.leader.inFile; + variable walkerDistribution, numberSteps; + (walkerDistribution, numberSteps) = file.read(engine, init.steps); + variable par = get_params(); + + variable parRand; + variable i,j; + _for i (0, engine.numberParameters-1) { + parRand = empiric_cdf_inverse(@(init.rng)(engine.totalNumberWalkers), + walkerDistribution[i], + par[i].min, + par[i].max); + _for j (0, engine.totalNumberWalkers-1) + engine.walkers[j][i] = parRand[j]; + } } %}}} %}}} -private define emceeShipMPI () %{{{ +private define emceeInitChain () %{{{ { - variable ship = struct { @EmceeShip }; - ship.setSail = &emceeMPISetSail; - ship.leaderSend = &emceeMPILeaderSend; - ship.memberSend = &emceeMPIMemberSend; - ship.leaderReceive = &emceeMPILeaderReceive; - ship.memberReceive = &emceeMPIMemberReceive; - ship.enterHarbor = &emceeMPIEnterHarbor; + variable init = struct { @EmceeInit, rng, steps }; + init.pick = &emceeInitChainPick; + init.rng = qualifier("rng", &rand_uniform); + init.steps = qualifier("steps", 10); - return ship; + return init; } %}}} -EmceeShipRegister["mpi"] = &emceeShipMPI; +EmceeInitRegister["chain"] = &emceeInitChain; %}}} -%{{{ File interface: +%{{{ Ship interface: %!%+ -%\function{emcee--file} -%\synopsis{Set emcee file input and output methods} -%\usage{input="method;options" -% \altusage{output="method;options"}} +%\function{emcee--driver} +%\synopsis{Set emcee parallel computation method} +%\usage{driver="method;options"} %\description -% The file inpu/output methods can be set with the function string -% "method;parameter" -% -% Available methods: -% fit : Fits file interface to write the chain as fits table extension -%!%- -% 1: create - open new file pointer and write necessary intial values -% 2: open - open existing file for read/write -% 3: read - open file and return n walkers and how many walkers were used -% 4: write - write cycle steps to the file (n) -% 5: close - close open file at end -private variable EmceeFile = struct { - create, % function - open, % function - read, % function - write, % function - close, % function - - mode, % 0 read, 1 write, 2 read | write - handle, % file handle - filename, % full file name - cycle, % number of steps before file gets written - - % additional private data -}; - -%{{{ Fits file functions - -% Create function %{{{ -private define __emceeFitsWriteT1(handle, engine) %{{{ -{ - variable dataInfo; - list_data(&dataInfo); - - variable par = __parameters(engine.fit.object); - variable params = get_params(); - variable numberTotalParams = length(params); - - %variable parNames = array_map(String_Type, &get_struct_field, get_params(), "name")[par.index-1]; - -% fits_create_binary_table(handle, "PARAMETERS", num_free_params(), -% ["FREE_PAR", "FREE_PAR_NAME"], -% ["J", sprintf("%dA", max(array_map(Int_Type, &strlen, parNames)))], -% [" parameter indices", " parameter names"]); - variable paramsTable = struct { - name=String_Type[numberTotalParams], - index=Int_Type[numberTotalParams], - value=Double_Type[numberTotalParams], - min=Double_Type[numberTotalParams], - max=Double_Type[numberTotalParams], - hard_min=Double_Type[numberTotalParams], - hard_max=Double_Type[numberTotalParams], - freeze=Int_Type[numberTotalParams], - tie=String_Type[numberTotalParams], - units=String_Type[numberTotalParams], - fun=String_Type[numberTotalParams], - free=Int_Type[numberTotalParams], % combines freeze, fun and tie - }; - variable j; - _for j (0, numberTotalParams-1) { - paramsTable.name[j] = params[j].name; - paramsTable.index[j] = params[j].index; - paramsTable.value[j] = params[j].value; - paramsTable.min[j] = params[j].min; - paramsTable.max[j] = params[j].max; - paramsTable.hard_min[j] = params[j].hard_min; - paramsTable.hard_max[j] = params[j].hard_max; - paramsTable.freeze[j] = params[j].freeze; - paramsTable.tie[j] = (params[j].tie == NULL) ? "" : params[j].tie; - paramsTable.units[j] = params[j].units; - paramsTable.fun[j] = (params[j].fun == NULL) ? "" : params[j].fun; - paramsTable.free[j] = (not params[j].freeze) and (params[j].fun == NULL) and (params[j].tie == NULL); - } - fits_write_binary_table(handle, "PARAMETERS", paramsTable); +% The driver method can be set with the function string +% "method;parameter" +% +% Available methods: +% serial : The serial driver. No parallelization at all +% +% fork : The fork (& socket) parallel driver. Per default uses +% _num_cpus many tasks. +% ; tasks : [=_num_cpus] Number of total processes used +% +% mpi : The mpi parallel driver using as many nodes as registered +% in an mpi environment +%!%- +% 1: setSail - set id for engines and how many there are +% 2: leader_send - leader sends to members +% 3: member_send - members send to leader +% 4: leader_receive - leader receives members +% 5: member_receive - member receive leader +% 6: enterHarbor - cleanup if necessary +private variable EmceeShip = struct { + setSail, % function + leaderSend, % function + memberSend, % function + leaderReceive, % function + memberReceive, % function + enterHarbor, % function - fits_update_key(handle, "MODEL", get_fit_fun(), "model function"); - fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); - fits_update_key(handle, "SLOPPY", 0, " sloppy level"); + engine, % the working horse - array_map(&fits_write_comment, handle, strchop(dataInfo, '\n', 0)); + % private data +}; - % sort to index order here -% if (_fits_write_col(handle, fits_get_colnum(handle, "FREE_PAR"), 1, 1, par.index[array_sort(par.index)]) -% && _fits_write_col(handle, fits_get_colnum(handle, "FREE_PAR_NAME"), 1, 1, parNames)) -% throw IOError; +%{{{ serial ship functions +private define void () %{{{ +{ + variable args = __pop_list(_NARGS); } %}}} -private define __emceeFitsWriteT2(handle, engine) %{{{ +private define emceeSerialInit (ship) %{{{ { - variable par = __parameters(engine.fit.object); + ship.engine.numberEngines = 1; + ship.engine.id = 0; +} +%}}} +%}}} +private define emceeShipSerial () %{{{ +{ + variable ship = struct { @EmceeShip }; + ship.setSail = &emceeSerialInit; + ship.leaderSend = &void; + ship.memberSend = &void; + ship.leaderReceive = &void; + ship.memberReceive = &void; + ship.enterHarbor = &void; - fits_create_binary_table(handle, "MCMCCHAIN", 0, - ["FITSTAT", "UPDATE", array_map(String_Type, &sprintf, "CHAINS%d", par.index)], - ["D", "J", ["D"][par.index*0]], - [" fit statistics", " update indicator", [" parameter values"][par.index*0]]); - fits_update_key(handle, "NWALKERS", engine.totalNumberWalkers/engine.numberParameters, " Number of walkers per free parameter"); - fits_update_key(handle, "NFREEPAR", engine.numberParameters, " Number of free parameters"); - fits_update_key(handle, "NSTEPS", engine.numberSteps, " Numer of iteration steps done"); + return ship; } %}}} +EmceeShipRegister["serial"] = &emceeShipSerial; -private define __emceeFitsWriteT3(handle, engine) %{{{ +%{{{ fork ship functions +private define elementType (t) %{{{ { - fits_create_binary_table(handle, "CHAINSTATS", 0, - ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], - [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); - fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + switch (t) + { case Char_Type: "c"; } + { case UChar_Type: "C"; } + { case Short_Type: "h"; } + { case UShort_Type: "H"; } + { case Int_Type: "i"; } + { case UInt_Type: "I"; } + { case Long_Type: "l"; } + { case ULong_Type: "L"; } + { case LLong_Type: "m"; } + { case ULLong_Type: "M"; } + { case Int16_Type: "j"; } + { case UInt16_Type: "J"; } + { case Int32_Type: "k"; } + { case UInt32_Type: "K"; } + { case Int64_Type: "q"; } + { case UInt64_Type: "Q"; } + { case Float_Type: "f"; } + { case Double_Type: "d"; } + { case Float32_Type: "F"; } + { case Float64_Type: "D"; } + { case String_Type: "s"; } + { case Null_Type: "x"; } } %}}} -private define emceeFileFitsCreate (file, engine) %{{{ +private define writeArray (fp, array) %{{{ { - file.mode = 1; + variable bytes, msg, fmt; + fmt = sprintf("%s%d", elementType(_typeof(array)), length(array)); + msg = pack(fmt, array); + bytes = write(fp, msg); + return bstrlen(msg)-bytes; +} +%}}} - % Create fits file and write headers - file.handle = fits_open_file(file.filename, "c"); +private define readArray (fp, array) %{{{ +{ + variable i, bytes, msg, fmt; + fmt = sprintf("%s%d", elementType(_typeof(array)), length(array)); + bytes = read(fp, &msg, sizeof_pack(fmt)); + array[*] = unpack(fmt, msg); + return bstrlen(msg)-bytes; +} +%}}} - % write first table - __emceeFitsWriteT1(file.handle, engine); +private define emceeForkSetSail (ship) %{{{ +{ + variable sockRead, sockWrite; + variable pid, cid=0, t; + variable flags; - % write second table - __emceeFitsWriteT2(file.handle, engine); + _for t (1, ship.tasks-1) { + (sockRead, sockWrite) = socketpair(AF_UNIX, SOCK_STREAM, 0); + cid++; + pid = fork(); + if (pid == -1) + throw InternalError, sprintf("Unable to fork process %d", cid); + else if (pid == 0) { + () = close(sockWrite); + ship.socket = sockRead; + break; + } else { % set master pipes + if (NULL == ship.socket) + ship.socket = FD_Type[ship.tasks]; +% flags = fcntl_getfd(sockWrite); +% fcntl_setfd(sockWrite, flags | O_NONBLOCK); + ship.socket[cid] = sockWrite; + () = close(sockRead); + } + } - % write third table - %__emceeFitsWriteT3(file.handle, engine); + ship.engine.id = (pid == 0) ? cid : 0; + ship.engine.numberEngines = ship.tasks; +} +%}}} - % move back to chain table - () = _fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); +private define emceeForkLeaderSend (ship) %{{{ +{ + variable walkers, + pivots, + rolls; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; - % set write cycle - () = _fits_get_rowsize(file.handle, &(file.cycle)); - file.cycle = file.cycle/engine.totalNumberWalkers; - if (file.cycle < 1) - file.cycle = 1; + variable i,j; + variable firstIndex = walkersPerSet[0]; + _for i (1, engine.numberEngines-1) { + % set the walkers for node i + walkers = engine.walkers[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; + % pick the pivots for node i + pivots = engine.pivots[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; + % set the randoms for node i + rolls = engine.rolls[[0:walkersPerSet[i]*engine.gears.step.numberRandoms-1] + +(firstIndex+totalOffset)*engine.gears.step.numberRandoms]; - % fits routine customs - file.numberSteps = 0; - file.sloppy = 0; + _for j (0, walkersPerSet[i]-1, 1) { + () = writeArray(ship.socket[i], walkers[j]); + () = writeArray(ship.socket[i], pivots[j]); + } + () = writeArray(ship.socket[i], rolls); + firstIndex += walkersPerSet[i]; + } } %}}} -%}}} - -% Open function %{{{ -private define __emceeFitsReadChecks (file, engine) %{{{ +private define emceeForkMemberSend (ship) %{{{ { - variable handle = file.handle; - if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { - fits_close_file(handle); - handle = NULL; - throw IOError, "Not a emcee chain file"; - } - - if (fits_read_key(handle, "MODEL") != get_fit_fun()) { - fits_close_file(handle); - handle = NULL; - throw IsisError, "Current model and chain model do not match"; - } + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; - variable tab = fits_read_table(handle); - ifnot (struct_field_exists(tab, "free") - || struct_field_exists(tab, "value")) { - fits_close_file(handle); - handle = NULL; - throw IOError, "Not a emcee chain file"; - } + variable i; + _for i (0, setLength-1, 1) + () = writeArray(ship.socket, engine.walkers[i+setOffset]); - variable par = __parameters(engine.fit.object); - if ((length(where(tab.free)) != num_free_params()) - || any(tab.index[where(tab.free)] != par.index[array_sort(par.index)])) { - fits_close_file(handle); - handle = NULL; - throw UsageError, "Free parameters and chain parameters differ"; - } + () = writeArray(ship.socket, engine.update[[0:setLength-1]+setOffset]); + () = writeArray(ship.socket, engine.stat[[0:setLength-1]+setOffset]); } %}}} -private define __emceeFitsWriteChecks (file, engine) %{{{ +private define emceeForkLeaderReceive (ship) %{{{ { - variable handle = file.handle; - if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { - fits_close_file(handle); - handle = NULL; - throw IOError, "Not a emcee chain file"; - } + variable walker, + stat, + update; - if ((fits_read_key(handle, "STATISTIC") != get_fit_statistic()) && (file.sloppy<2)) { - fits_close_file(handle); - handle = NULL; - throw UsageError, "Current fit statistic and chain fit statistic differ, increase sloppy level (at least 2) to continue anyway"; - } - fits_update_key(handle, "STATISTIC", get_fit_statistic()); + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; + + variable i,j; + variable firstIndex = walkersPerSet[0]; % skip master walkers + + walker = Double_Type[engine.numberParameters]; + _for i (1, engine.numberEngines-1) { + stat = Double_Type[walkersPerSet[i]]; + update = Int_Type[walkersPerSet[i]]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = readArray(ship.socket[i], walker); + engine.walkers[j+firstIndex+totalOffset][*] = walker; + } - if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(handle); - handle = NULL; - throw IOError, "Not a emcee chain file"; - } + () = readArray(ship.socket[i], update); + () = readArray(ship.socket[i], stat); - if ((fits_read_key(handle, "NWALKERS")*fits_read_key(handle, "NFREEPAR")) != length(engine.walkers) - && (handle.sloppy<1)) { - fits_close_file(handle); - handle = NULL; - throw IOError, "Number of walkers differs from number used in chain file, increase sloppy level to continue"; + engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; + engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; + + firstIndex += walkersPerSet[i]; } } %}}} -private define emceeFileFitsOpen (file, engine) %{{{ +private define emceeForkMemberReceive (ship) %{{{ { - file.mode = 2; - - file.handle = fits_open_file(file.filename, "w"); + variable rolls, + param; - __emceeFitsReadChecks(file, engine); - __emceeFitsWriteChecks(file, engine); + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + variable nRolls = engine.gears.step.numberRandoms; - if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(file.handle); - file.handle = NULL; - throw IOError, "Not a emcee chain file"; + variable j; + rolls = Double_Type[setLength*nRolls]; + param = Double_Type[engine.numberParameters]; + _for j (0, setLength-1, 1) { + () = readArray(ship.socket, param); + engine.walkers[j+setOffset][*] = param; + () = readArray(ship.socket, param); + engine.pivots[j+setOffset][*] = param; } + () = readArray(ship.socket, rolls); - () = _fits_get_rowsize(file.handle, &(file.cycle)); - file.cycle = file.cycle/length(engine.walkers); - if (file.cycle < 1) - file.cycle = 1; + engine.rolls[[0:setLength*nRolls-1]+setOffset*nRolls] = @rolls; +} +%}}} - file.numberSteps = fits_get_num_rows(file.handle); +private define emceeForkEnterHarbor (ship) %{{{ +{ + variable id; + if (ship.engine.id == 0) { + _for id (1, ship.engine.numberEngines-1) + () = close(ship.socket[id]); + } else { + () = close(ship.socket); + exit(); + } } %}}} +%}}} +private define emceeShipFork () %{{{ +{ + variable ship = struct { @EmceeShip, socket, tasks }; + ship.setSail = &emceeForkSetSail; + ship.leaderSend = &emceeForkLeaderSend; + ship.memberSend = &emceeForkMemberSend; + ship.leaderReceive = &emceeForkLeaderReceive; + ship.memberReceive = &emceeForkMemberReceive; + ship.enterHarbor = &emceeForkEnterHarbor; + ship.tasks = qualifier("tasks", _num_cpus()); + return ship; +} %}}} +EmceeShipRegister["fork"] = &emceeShipFork; -% Read function %{{{ -private define emceeFileFitsRead (file, engine, numberWalkers) %{{{ +%{{{ MPI Ship functions +private define emceeMPISetSail (ship) %{{{ { - file.mode = 0; - - file.handle = fits_open_file(file.filename, "r"); + variable engine = ship.engine; + engine.id = rcl_mpi_init(); + engine.numberEngines = rcl_mpi_numtasks(); + rcl_init_mpi_request(engine.numberEngines); +} +%}}} - __emceeFitsReadChecks(file, engine); +private define emceeMPILeaderSend (ship) %{{{ +{ +#ifexists rcl_mpi_init + variable walkers, + pivots, + rolls; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; - if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { - fits_close_file(file.handle); - file.handle = NULL; - throw IOError, "Not a emcee chain file"; - } + variable i,j; + variable firstIndex = walkersPerSet[0]; % skip master walkers - variable totalNumberWalkers = fits_read_key(file.handle, "NWALKERS") - *fits_read_key(file.handle, "NFREEPAR"); - variable numberParameters = fits_get_num_cols(file.handle); - variable totalNumberRecords = fits_get_num_rows(file.handle); - variable walkerDistribution; % this has to be an array of arrays with the parameter distribution in each - () = _fits_read_cols(file.handle, - [3:numberParameters], - max([0, totalNumberRecords-numberWalkers]), - numberWalkers, - &walkerDistribution); + _for i (1, engine.numberEngines-1) { % loop over the slave nodes and send relevant data + % set the walkers for node i + walkers = engine.walkers[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; + % pick the pivots for node i + pivots = engine.pivots[[0:walkersPerSet[i]-1]+firstIndex+totalOffset]; + % set the randoms for node i + rolls = engine.rolls[[0:walkersPerSet[i]*engine.gears.step.numberRandoms-1] + +(firstIndex+totalOffset)*engine.gears.step.numberRandoms]; - fits_close_file(file.handle); + _for j (0, walkersPerSet[i]-1, 1) { + () = rcl_mpi_org_isend_double(walkers[j], length(walkers[j]), i, 0); % send current walkers with tag 0 + () = rcl_mpi_org_isend_double(pivots[j], length(pivots[j]), i, 1); % send pivots from other set with tag 1 + } - return walkerDistribution, totalNumberWalkers; + () = rcl_mpi_org_isend_double(rolls, length(rolls), i, 2); % send random numbers with tag 2 + firstIndex += walkersPerSet[i]; + } +#endif } %}}} -%}}} -% Write function %{{{ -private define emceeFileFitsWrite (file, engine, numberWalkersSteps) %{{{ +private define emceeMPILeaderReceive (ship) %{{{ { - if (numberWalkersSteps > engine.leader.writeBuffer.size) - throw InternalError, "Trying to write more than accessible"; +#ifexists rcl_mpi_init + variable walker, + stat, + update; - variable par = __parameters(engine.fit.object); - variable npar = engine.numberParameters; + variable engine = ship.engine; + variable totalOffset = engine.leader.totalOffset; + variable walkersPerSet = engine.leader.walkersPerSet; - %variable walkersPerCycle = engine.leader.writeBuffer.size; % total_walkers*steps_per_cycle variable i,j; - variable firstIndex = fits_get_num_rows(file.handle)+1; % first index of this cycle - variable parCycle = Double_Type[numberWalkersSteps]; + variable firstIndex = walkersPerSet[0]; % skip master walkers - _for j (0, npar-1, 1) { - _for i (0, numberWalkersSteps-1, 1) - parCycle[i] = engine.leader.writeBuffer.walkers[i][j]; - () = _fits_write_col(file.handle, - fits_get_colnum(file.handle, sprintf("CHAINS%d", par.index[j])), - firstIndex, - 1, - parCycle); + walker = Double_Type[engine.numberParameters]; + _for i (1, engine.numberEngines-1) { + stat = Double_Type[walkersPerSet[i]]; + update = Int_Type[walkersPerSet[i]]; + + _for j (0, walkersPerSet[i]-1, 1) { + () = rcl_mpi_org_recv_double(walker, length(walker), i, i); + engine.walkers[j+firstIndex+totalOffset][*] = walker; + } + + () = rcl_mpi_org_recv_int(update, length(update), i, i); + () = rcl_mpi_org_recv_double(stat, length(stat), i, i); + + engine.update[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = update; + engine.stat[[0:walkersPerSet[i]-1]+firstIndex+totalOffset] = stat; + + firstIndex += walkersPerSet[i]; } - () = _fits_write_col(file.handle, - fits_get_colnum(file.handle, "FITSTAT"), - firstIndex, - 1, - engine.leader.writeBuffer.stat[[:numberWalkersSteps-1]]); - () = _fits_write_col(file.handle, - fits_get_colnum(file.handle, "UPDATE"), - firstIndex, - 1, - engine.leader.writeBuffer.update[[:numberWalkersSteps-1]]); +#endif } %}}} -%}}} -% Close function %{{{ -private define emceeFileFitsClose (file, engine) %{{{ +private define emceeMPIMemberSend (ship) %{{{ { - % todo: write fitstat table - variable nHDUs = fits_get_num_hdus(file.handle); +#ifexists rcl_mpi_init + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + variable i; + _for i (0, setLength-1, 1) { + () = rcl_mpi_org_isend_double(engine.walkers[i+setOffset], + length(engine.walkers[0]), 0, engine.id); + } - if (file.mode) { - _for i (1, nHDUs) { - () = _fits_movabs_hdu(file.handle, i); - fits_write_chksum(file.handle); - } + () = rcl_mpi_org_isend_int(engine.update[[0:setLength-1]+setOffset], setLength, 0, engine.id); + () = rcl_mpi_org_isend_double(engine.stat[[0:setLength-1]+setOffset], setLength, 0, engine.id); +#endif +} +%}}} + +private define emceeMPIMemberReceive (ship) %{{{ +{ +#ifexists rcl_mpi_init + variable rolls; + variable engine = ship.engine; + variable setOffset = engine.setOffset; + variable setLength = engine.setLength; + variable nRolls = engine.gears.step.numberRandoms; + + variable j; + rolls = Double_Type[setLength*nRolls]; + _for j (0, setLength-1, 1) { + () = rcl_mpi_org_recv_double(engine.walkers[j+setOffset], + length(engine.walkers[j+setOffset]), 0, 0); % receive walkers (tag 0) + () = rcl_mpi_org_recv_double(engine.pivots[j+setOffset], + length(engine.pivots[j+setOffset]), 0, 1); % receive pivot points (tag 1) } + () = rcl_mpi_org_recv_double(rolls, + setLength*engine.gears.step.numberRandoms, 0, 2); % receive random numbers (tag 2) - fits_close_file(file.handle); + engine.rolls[[0:setLength*nRolls-1]+setOffset*nRolls] = @rolls; +#endif } %}}} + +private define emceeMPIEnterHarbor (ship) %{{{ +{ + +} %}}} %}}} -private define emceeFileFits () %{{{ +private define emceeShipMPI () %{{{ { - variable file = struct { @EmceeFile, numberSteps, sloppy }; - file.create = &emceeFileFitsCreate; - file.open = &emceeFileFitsOpen; - file.read = &emceeFileFitsRead; - file.write = &emceeFileFitsWrite; - file.close = &emceeFileFitsClose; - - file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.fits")); - file.cycle = 1; - - file.numberSteps = 0; - file.sloppy = 0; + variable ship = struct { @EmceeShip }; + ship.setSail = &emceeMPISetSail; + ship.leaderSend = &emceeMPILeaderSend; + ship.memberSend = &emceeMPIMemberSend; + ship.leaderReceive = &emceeMPILeaderReceive; + ship.memberReceive = &emceeMPIMemberReceive; + ship.enterHarbor = &emceeMPIEnterHarbor; - return file; + return ship; } %}}} -EmceeFileRegister["fits"] = &emceeFileFits; +EmceeShipRegister["mpi"] = &emceeShipMPI; %}}} %{{{ Step interface: @@ -1387,7 +1475,7 @@ private define emceeSetup (ship, steps, options) %{{{ _for j (0, length(engine.walkers)-1) engine.stat[j] = engine.fit.eval_statistic(engine.walkers[j]; nocopy); - if (0 == ship.engine.id) { + if (0 == engine.id) { if (options.continue) options.output.open(engine); else @@ -1495,6 +1583,13 @@ define emcee_hammer (steps) if (0 == ship.engine.id) { options.input = @(EmceeFileRegister[inputHandle])(;;inputOption); options.output = @(EmceeFileRegister[outputHandle])(;;outputOption); + + if ( not (options.input.mode & EMCEE_FILE_READ) + || ( options.input.read == &NULL ) ) + throw UsageError, sprintf("File handle '%s' can not be used for input", inputHandle); + if ( not (options.output.mode & EMCEE_FILE_WRITE) + || ( options.output.write == &NULL ) ) + throw UsageError, sprintf("File handle '%s' can not be used for output", outputHandle); } emceeSetup(ship, steps, options); -- GitLab From 4a933315a52042cf175fe1bfb115891cffe36ab4 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 8 Jun 2021 13:09:28 +0200 Subject: [PATCH 81/89] emcee par1;par2 part 2 Might work , it is a bit messy because the init and file interfaces interact in both ways now. We have on the one side the init from file, i.e., using the last walkers from a chain either directly or as the empiric CDF and we can use ranges specified in a file to draw from them with the init methods (currently gauss and uniform). --- src/fitting/ensemble-samplers/emcee.sl | 104 ++++++++++++++++++------- 1 file changed, 77 insertions(+), 27 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 8f418d79..8b119c16 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -257,8 +257,6 @@ private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ % (separated by a colon). A string of the form % 'file1.par:2;file2.par' means that 2/3 of all walkers are % drawn from file1.par and 1/3 is drawn from file2.par. -% Files are processed first to last, last files are ignored if -% all walkers have been determined. %!%- % 1: create - open new file pointer and write necessary intial values % 2: open - open existing file for read/write @@ -508,12 +506,13 @@ private define emceeFileFitsRead (file, engine, numberWalkers) %{{{ } variable params = fits_read_table(file.handle); - return struct { + return [struct { + weight = 1, name = params.name, value = params.value, min = params.min, max = params.max - }, numberWalkers; + }], numberWalkers; } else { if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { fits_close_file(file.handle); @@ -618,7 +617,6 @@ private define emceeFileFits () %{{{ EmceeFileRegister["fits"] = &emceeFileFits; %{{{ Par file functions - %{{{ create function %}}} @@ -630,16 +628,28 @@ private define emceeFileParRead (file, engine, numberWalkers) %{{{ { % we read a number of par files (eventually with multipliers) variable file_list = strchop(file.filename, ';', 0); - variable weights = UInt_Type[length(file_list)]; - variable i, tmp; + variable params = Struct_Type[length(file_list)]; + variable weight; + variable i, s, p; _for i (0, length(file_list)-1) { - tmp = strchop(file_list[i], ':', 0); - if (length(tmp)>1) weights[i] = atoi(tmp[1]); % zero, if error - else weights[i] = 1; + s = strchop(file_list[i], ':', 0); + if (length(s)>1) weight = atoi(s[1]); % zero, if error, effectively disabling that file + else weight = 1; + + p = read_par(s[0]); + if (weight>0) + params[i] = struct { + weight = weight, + name = p.name, + value = p.value, + min = p.min, + max = p.max + }; + } - + return params, numberWalkers; } %}}} %}}} @@ -649,7 +659,6 @@ private define emceeFileParRead (file, engine, numberWalkers) %{{{ %{{{ close function %}}} - %}}} private define emceeFilePar () %{{{ @@ -661,7 +670,7 @@ private define emceeFilePar () %{{{ file.write = &NULL; file.close = &NULL; - file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.fits")); + file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.par")); % there is no other mode file.mode = EMCEE_FILE_READ | EMCEE_FILE_RANGE; file.cycle = 1; @@ -671,7 +680,6 @@ private define emceeFilePar () %{{{ %}}} EmceeFileRegister["par"] = &emceeFilePar; %}}} -%}}} %{{{ Init interface %!%+ @@ -710,20 +718,53 @@ private variable EmceeInit = struct { % private data }; +private define __emcee_params (par, feed, total) %{{{ +{ + variable n = UInt_Type[length(feed)]; + variable c = 0; + variable i,j,w; + _for i (0, length(feed)-1) + c += (feed[i].weight>0) ? feed[i].weight : 0; + _for i (0, length(feed)-1) { + _for j (0, length(par.name)-1) { + w = where(par.name[j] == feed[i].name); + if (length(w) != 1) % also errors if for whatever reason the parameter is twice in feed + throw UsageError, sprintf("Unspecified parameter '%s'", par.name[j]); + if ((par.min[j] > feed[i].min[w]) || (par.max[j] < feed[i].max[w])) + throw UsageError, sprintf("Parameter ranges for '%s' not containing all settings", par.name[j]); + if ((feed[i].min[w] == -DOUBLE_MAX) || (feed[i].max[w] == DOUBLE_MAX)) + throw UsageError, sprintf("Unspecified ranges for '%s'", par.name[j]); + n[i] = int(round(1.*feed[i].weight/c*total)); + } + } + + n[0] += sum(n)-total; + return n; +} +%}}} + %{{{ Uniform initialization function % pick random parameter values within the boundaries private define emceeInitUniformPick (init, engine) %{{{ { - variable i; + variable i, j, k, c, feed; variable par = __parameters(engine.fit.object); variable numParameter = length(par.value); - % throw an error on unspecified bounds - if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) - throw UsageError, "Some parameters have unspecified parameter ranges, unable to set inital walkers"; + if (engine.input.mode & EMCEE_FILE_RANGE) + feed = engine.input.read(engine, engine.totalNumberWalkers); + else + feed = [struct { @par, weight=1 }]; - _for i (0, engine.totalNumberWalkers-1) - engine.walkers[i] = rand_uniform(numParameter)*(par.max-par.min)+par.min; + c = __emcee_params(par, feed); + + k = 0; + _for i (0, length(c)-1) { + _for j (0, c[i]-1){ + k += j; + engine.walkers[k] = rand_uniform(numParameter)*(feed[i].max-feed[i].min)+feed[i].min; + } + } } %}}} %}}} @@ -749,17 +790,26 @@ private define rand_gauss_cut (sigma, v, bmin, bmax) %{{{ private define emceeInitGaussPick (init, engine) %{{{ { - variable i,w; + variable i,j,k,c,feed; variable par = __parameters(engine.fit.object); variable numParameter = length(par.value); + variable sigma; + + if (engine.input.mode & EMCEE_FILE_RANGE) + feed = engine.input.read(engine, engine.totalNumberWalkers); + else + feed = [struct { @par, weight=1}]; - % throw an error on unspecified bounds - if (any(par.min == -DOUBLE_MAX) || any(par.max == DOUBLE_MAX)) - throw UsageError, "Some parameters have unspecified parameter ranges, unable to set initial walkers"; + c = __emcee_params(par, feed); - variable sigma = (par.max-par.min)/init.sigma; - _for i (0, engine.totalNumberWalkers-1) - engine.walkers[i] = rand_gauss_cut(sigma, par.value, par.min, par.max); + k = 0; + _for i (0, length(c)-1) { + _for j (0, c[i]-1) { + k += j; + sigma = (feed[i].max-feed[i].min)/init.sigma; + engine.walkers[k] = rand_gauss_cut(sigma, feed[i].value, feed[i].min, feed[i].max); + } + } } %}}} %}}} -- GitLab From 0dfc878bafa9a72da8815700290478317279017b Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 9 Jun 2021 01:59:39 +0200 Subject: [PATCH 82/89] Fix bug in init pick from file Paramters got wrongly assigned. Index mixup. --- src/fitting/ensemble-samplers/emcee.sl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 8b119c16..0ad311e6 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -850,7 +850,7 @@ private define emceeInitFilePick (init, engine) %{{{ variable parV = Double_Type[engine.numberParameters]; _for i (0, length(engine.walkers)-1) { _for j (0, length(parV)-1) - parV[j] = walkerDistribution[randomize[i]][j]; + parV[j] = walkerDistribution[j][randomize[i]]; engine.walkers[i] = @parV; } } -- GitLab From ad406338db35360a4d696d062b556e705cfdfa34 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 9 Jun 2021 02:03:56 +0200 Subject: [PATCH 83/89] More par;par --- src/fitting/ensemble-samplers/emcee.sl | 154 +++++++++++++++++-------- 1 file changed, 103 insertions(+), 51 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 0ad311e6..76553dc0 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -272,6 +272,7 @@ private variable EmceeFile = struct { close, % function mode = 0, % 1 read, 2 write, 4 range bit (read parameter range instead of position) + has = 0, % same as mode, but lists all available (if FILE_RANGE is given it means it is the prevered method) handle, % file handle filename, % full file name cycle, % number of steps before file gets written @@ -606,6 +607,7 @@ private define emceeFileFits () %{{{ file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.fits")); file.mode |= qualifier_exists("parameter") ? EMCEE_FILE_RANGE : 0; + file.has |= EMCEE_FILE_READ | EMCEE_FILE_WRITE; file.cycle = 1; file.numberSteps = 0; @@ -630,7 +632,7 @@ private define emceeFileParRead (file, engine, numberWalkers) %{{{ variable file_list = strchop(file.filename, ';', 0); variable params = Struct_Type[length(file_list)]; variable weight; - variable i, s, p; + variable i, j, s, p; _for i (0, length(file_list)-1) { s = strchop(file_list[i], ':', 0); @@ -638,15 +640,21 @@ private define emceeFileParRead (file, engine, numberWalkers) %{{{ else weight = 1; p = read_par(s[0]); - if (weight>0) + if (weight>0) { params[i] = struct { weight = weight, - name = p.name, - value = p.value, - min = p.min, - max = p.max + name = String_Type[length(p)], + value = Double_Type[length(p)], + min = Double_Type[length(p)], + max = Double_Type[length(p)], }; - + _for j (0, length(p)-1) { + params[i].name = p[j].name; + params[i].value = p[j].value; + params[i].min = p[j].min; + params[i].max = p[j].max; + } + } } return params, numberWalkers; @@ -673,6 +681,7 @@ private define emceeFilePar () %{{{ file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.par")); % there is no other mode file.mode = EMCEE_FILE_READ | EMCEE_FILE_RANGE; + file.has = EMCEE_FILE_READ | EMCEE_FILE_RANGE; file.cycle = 1; return file; @@ -720,26 +729,45 @@ private variable EmceeInit = struct { private define __emcee_params (par, feed, total) %{{{ { + variable params = Assoc_Type[UInt_Type, 0]; % need full information to match names == index variable n = UInt_Type[length(feed)]; + variable eparams = Struct_Type[length(feed)]; variable c = 0; - variable i,j,w; - _for i (0, length(feed)-1) + variable i,j,w,p; + + _for i (0, length(feed)-1) { + if (feed[i] == NULL) continue; c += (feed[i].weight>0) ? feed[i].weight : 0; + } + + foreach p (get_params) + params[p.name] = p.index; + _for i (0, length(feed)-1) { - _for j (0, length(par.name)-1) { - w = where(par.name[j] == feed[i].name); + if (feed[i] == NULL) continue; + eparams[i] = @par; % as prototype + + _for j (0, length(par.index)-1) { + w = where(par.index[j] == params[feed[i].name]); if (length(w) != 1) % also errors if for whatever reason the parameter is twice in feed - throw UsageError, sprintf("Unspecified parameter '%s'", par.name[j]); - if ((par.min[j] > feed[i].min[w]) || (par.max[j] < feed[i].max[w])) - throw UsageError, sprintf("Parameter ranges for '%s' not containing all settings", par.name[j]); - if ((feed[i].min[w] == -DOUBLE_MAX) || (feed[i].max[w] == DOUBLE_MAX)) - throw UsageError, sprintf("Unspecified ranges for '%s'", par.name[j]); - n[i] = int(round(1.*feed[i].weight/c*total)); + throw UsageError, sprintf("Unspecified parameter %d", par.index[j]); + if ((par.min[j] > feed[i].min[w[0]]) || (par.max[j] < feed[i].max[w[0]])) { + vmessage("%f vs %f or %f vs %f", par.min[j], feed[i].min[w[0]], par.max[j], feed[i].max[w[0]]); + throw UsageError, sprintf("Parameter ranges for paramter %d not enclosed", par.index[j]); + } + + if ((feed[i].min[w[0]] == -DOUBLE_MAX) || (feed[i].max[w[0]] == DOUBLE_MAX)) + throw UsageError, sprintf("Unspecified ranges for parameter %d", par.index[j]); + eparams[i].value[w] = feed[i].value[j]; + eparams[i].min[w] = feed[i].min[j]; + eparams[i].max[w] = feed[i].max[j]; } + + n[i] = int(round(1.*feed[i].weight/c*total)); } n[0] += sum(n)-total; - return n; + return eparams, n; } %}}} @@ -747,22 +775,24 @@ private define __emcee_params (par, feed, total) %{{{ % pick random parameter values within the boundaries private define emceeInitUniformPick (init, engine) %{{{ { - variable i, j, k, c, feed; + variable i, j, k, feed; + variable c, p; + variable file = engine.leader.inFile; variable par = __parameters(engine.fit.object); variable numParameter = length(par.value); - if (engine.input.mode & EMCEE_FILE_RANGE) - feed = engine.input.read(engine, engine.totalNumberWalkers); + if (file.mode & EMCEE_FILE_RANGE) + (feed, ) = file.read(engine, engine.totalNumberWalkers); else feed = [struct { @par, weight=1 }]; - c = __emcee_params(par, feed); + (p,c) = __emcee_params(par, feed, engine.totalNumberWalkers); k = 0; _for i (0, length(c)-1) { _for j (0, c[i]-1){ k += j; - engine.walkers[k] = rand_uniform(numParameter)*(feed[i].max-feed[i].min)+feed[i].min; + engine.walkers[k] = rand_uniform(numParameter)*(p[i].max-p[i].min)+p[i].min; } } } @@ -790,24 +820,26 @@ private define rand_gauss_cut (sigma, v, bmin, bmax) %{{{ private define emceeInitGaussPick (init, engine) %{{{ { - variable i,j,k,c,feed; + variable i,j,k,feed; + variable c,p; + variable file = engine.leader.inFile; variable par = __parameters(engine.fit.object); variable numParameter = length(par.value); variable sigma; - if (engine.input.mode & EMCEE_FILE_RANGE) - feed = engine.input.read(engine, engine.totalNumberWalkers); + if (file.mode & EMCEE_FILE_RANGE) + (feed, ) = file.read(engine, engine.totalNumberWalkers); else feed = [struct { @par, weight=1}]; - c = __emcee_params(par, feed); + (p,c) = __emcee_params(par, feed, engine.totalNumberWalkers); k = 0; _for i (0, length(c)-1) { _for j (0, c[i]-1) { k += j; - sigma = (feed[i].max-feed[i].min)/init.sigma; - engine.walkers[k] = rand_gauss_cut(sigma, feed[i].value, feed[i].min, feed[i].max); + sigma = (p[i].max-p[i].min)/init.sigma; + engine.walkers[k] = rand_gauss_cut(sigma, p[i].value, p[i].min, p[i].max); } } } @@ -1547,6 +1579,19 @@ private define emceeSetup (ship, steps, options) %{{{ } %}}} +private define emceeDefaultFile (file) %{{{ +{ + variable default = "fits"; + if (NULL != file) { + variable ext = path_extname(file); + if (strlen(ext)) + return ext[[1:]]; + } + + return default; +} +%}}} + %%%%%%%%%%%%%%%%%%%%%%%%%%% define emcee_hammer (steps) %%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1593,14 +1638,10 @@ define emcee_hammer (steps) variable oContinue = qualifier("continue"); variable oInfile = qualifier("infile", oContinue); variable oOutfile = qualifier("outfile", oContinue); + variable oInread = emceeDefaultFile(oInfile); + variable oOutwrite = emceeDefaultFile(oOutfile); % advanced options - variable initHandle, initOption; - if (NULL != oInfile) - (initHandle, initOption) = emceeOption(qualifier("init", "file")); - else - (initHandle, initOption) = emceeOption(qualifier("init", "uniform")); - variable shipHandle, shipOption; (shipHandle, shipOption) = emceeOption(qualifier("driver", "serial")); @@ -1608,40 +1649,51 @@ define emcee_hammer (steps) (stepHandle, stepOption) = emceeOption(qualifier("step", "stretch")); variable inputHandle, inputOption; - (inputHandle, inputOption) = emceeOption(qualifier("input", "fits")); + (inputHandle, inputOption) = emceeOption(qualifier("input", oInread)); if (NULL != oInfile) inputOption = struct { @inputOption, filename=oInfile }; variable outputHandle, outputOption; - (outputHandle, outputOption) = emceeOption(qualifier("output", "fits")); + (outputHandle, outputOption) = emceeOption(qualifier("output", oOutwrite)); if (NULL != oOutfile) outputOption = struct { @outputOption, filename=oOutfile }; variable totalNumberWalkers = qualifier("walkers", 10)*num_free_params(); variable ship = @(EmceeShipRegister[shipHandle])(;;shipOption); emceeSetupEngine(ship, totalNumberWalkers, steps); + variable Input = NULL, Output = NULL; + variable Initfile = "uniform"; + if (0 == ship.engine.id) { + Input = @(EmceeFileRegister[inputHandle])(;;inputOption); + Output = @(EmceeFileRegister[outputHandle])(;;outputOption); + + if ( not (Input.has & EMCEE_FILE_READ) + || ( Input.read == &NULL ) ) + throw UsageError, sprintf("File handle '%s' can not be used for input", inputHandle); + if ( not (Output.has & EMCEE_FILE_WRITE) + || ( Output.write == &NULL ) ) + throw UsageError, sprintf("File handle '%s' can not be used for output", outputHandle); + + ifnot (Input.has & EMCEE_FILE_RANGE) + Initfile = "file"; + } + + variable initHandle, initOption; + if (NULL != oInfile) + (initHandle, initOption) = emceeOption(qualifier("init", Initfile)); + else + (initHandle, initOption) = emceeOption(qualifier("init", "uniform")); + variable options = struct { init = @(EmceeInitRegister[initHandle])(;;initOption), step = @(EmceeStepRegister[stepHandle])(;;stepOption), - output = NULL, - input = NULL, + output = Output, + input = Input, urand = qualifier("urand", &rand_uniform), upick = qualifier("upick", &rand_int), continue = qualifier_exists("continue"), }; emceeSetupGears(ship.engine, options.urand, options.upick, options.step); - if (0 == ship.engine.id) { - options.input = @(EmceeFileRegister[inputHandle])(;;inputOption); - options.output = @(EmceeFileRegister[outputHandle])(;;outputOption); - - if ( not (options.input.mode & EMCEE_FILE_READ) - || ( options.input.read == &NULL ) ) - throw UsageError, sprintf("File handle '%s' can not be used for input", inputHandle); - if ( not (options.output.mode & EMCEE_FILE_WRITE) - || ( options.output.write == &NULL ) ) - throw UsageError, sprintf("File handle '%s' can not be used for output", outputHandle); - } - emceeSetup(ship, steps, options); emceeLoop(ship, options.step, options.output); -- GitLab From 751553e862334c13d5790c8d0ec67e7c38d7510d Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 9 Jun 2021 02:45:18 +0200 Subject: [PATCH 84/89] Simple test has worked --- src/fitting/ensemble-samplers/emcee.sl | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 76553dc0..57536a87 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -732,7 +732,7 @@ private define __emcee_params (par, feed, total) %{{{ variable params = Assoc_Type[UInt_Type, 0]; % need full information to match names == index variable n = UInt_Type[length(feed)]; variable eparams = Struct_Type[length(feed)]; - variable c = 0; + variable c = 0, cc = 0; variable i,j,w,p; _for i (0, length(feed)-1) { @@ -745,28 +745,31 @@ private define __emcee_params (par, feed, total) %{{{ _for i (0, length(feed)-1) { if (feed[i] == NULL) continue; - eparams[i] = @par; % as prototype + eparams[i] = @Struct_Type(get_struct_field_names(par)); % as prototype + eparams[i].value = Double_Type[length(par.index)]; + eparams[i].min = Double_Type[length(par.index)]; + eparams[i].max = Double_Type[length(par.index)]; _for j (0, length(par.index)-1) { w = where(par.index[j] == params[feed[i].name]); if (length(w) != 1) % also errors if for whatever reason the parameter is twice in feed throw UsageError, sprintf("Unspecified parameter %d", par.index[j]); - if ((par.min[j] > feed[i].min[w[0]]) || (par.max[j] < feed[i].max[w[0]])) { - vmessage("%f vs %f or %f vs %f", par.min[j], feed[i].min[w[0]], par.max[j], feed[i].max[w[0]]); + w = w[0]; + if ((par.min[j] > feed[i].min[w]) || (par.max[j] < feed[i].max[w])) throw UsageError, sprintf("Parameter ranges for paramter %d not enclosed", par.index[j]); - } - if ((feed[i].min[w[0]] == -DOUBLE_MAX) || (feed[i].max[w[0]] == DOUBLE_MAX)) throw UsageError, sprintf("Unspecified ranges for parameter %d", par.index[j]); + eparams[i].value[w] = feed[i].value[j]; eparams[i].min[w] = feed[i].min[j]; eparams[i].max[w] = feed[i].max[j]; } n[i] = int(round(1.*feed[i].weight/c*total)); + cc += n[i]; } - n[0] += sum(n)-total; + n[0] += cc-total; return eparams, n; } %}}} @@ -790,10 +793,9 @@ private define emceeInitUniformPick (init, engine) %{{{ k = 0; _for i (0, length(c)-1) { - _for j (0, c[i]-1){ - k += j; - engine.walkers[k] = rand_uniform(numParameter)*(p[i].max-p[i].min)+p[i].min; - } + _for j (0, c[i]-1) + engine.walkers[j+k] = rand_uniform(numParameter)*(p[i].max-p[i].min)+p[i].min; + k += c[i]; } } %}}} -- GitLab From 7d9fef2f14345b538e54ed54f79e249bc183cf0d Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 9 Jun 2021 14:50:26 +0200 Subject: [PATCH 85/89] Make UserBreak also abort the system --- src/fitting/ensemble-samplers/emcee.sl | 119 +++++++++++++++++-------- 1 file changed, 83 insertions(+), 36 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index 228a4f7a..fef2c18b 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -992,6 +992,7 @@ EmceeInitRegister["chain"] = &emceeInitChain; % 4: leader_receive - leader receives members % 5: member_receive - member receive leader % 6: enterHarbor - cleanup if necessary +% 7: abort - cleanup handler if SIGTERM is received private variable EmceeShip = struct { setSail, % function leaderSend, % function @@ -999,6 +1000,7 @@ private variable EmceeShip = struct { leaderReceive, % function memberReceive, % function enterHarbor, % function + abort = &NULL, % function, has a default engine, % the working horse @@ -1106,6 +1108,7 @@ private define emceeForkSetSail (ship) %{{{ % fcntl_setfd(sockWrite, flags | O_NONBLOCK); ship.socket[cid] = sockWrite; () = close(sockRead); + list_append(ship.pids, pid); } } @@ -1230,17 +1233,27 @@ private define emceeForkEnterHarbor (ship) %{{{ } } %}}} + +private define emceeForkAbort (ship) %{{{ +{ + variable p; + foreach p (ship.pids) + kill(p, SIGTERM); +} +%}}} %}}} private define emceeShipFork () %{{{ { - variable ship = struct { @EmceeShip, socket, tasks }; + variable ship = struct { @EmceeShip, socket, tasks, pids }; ship.setSail = &emceeForkSetSail; ship.leaderSend = &emceeForkLeaderSend; ship.memberSend = &emceeForkMemberSend; ship.leaderReceive = &emceeForkLeaderReceive; ship.memberReceive = &emceeForkMemberReceive; ship.enterHarbor = &emceeForkEnterHarbor; + ship.abort = &emceeForkAbort; ship.tasks = qualifier("tasks", _num_cpus()); + ship.pids = {}; return ship; } @@ -1376,6 +1389,12 @@ private define emceeMPIEnterHarbor (ship) %{{{ } %}}} + +private define emceeMPIAbort (ship) %{{{ +{ + % MPI_abort ... :( +} +%}}} %}}} private define emceeShipMPI () %{{{ { @@ -1386,6 +1405,7 @@ private define emceeShipMPI () %{{{ ship.leaderReceive = &emceeMPILeaderReceive; ship.memberReceive = &emceeMPIMemberReceive; ship.enterHarbor = &emceeMPIEnterHarbor; + ship.abort = &emceeMPIAbort; return ship; } @@ -1478,55 +1498,65 @@ private define emceeOption (str) %{{{ } %}}} +private variable EMCEE_ABORT = 0; +private variable EMCEE_OLD_HANDLE = SIG_DFL; private define emceeLoop (ship, step, output) %{{{ { variable engine = ship.engine; - variable s, j, set, cycle, leader, size, offset, timer; + variable s=0, j, set, cycle = 0, leader, size, offset, timer; offset = 0; tic; % start timer - _for s (0, engine.numberSteps-1) { - _for set (1, 2) { - emceeDrawSet(engine, set); + try { + _for s (0, engine.numberSteps-1) { + if (EMCEE_ABORT) break; + _for set (1, 2) { + if (EMCEE_ABORT) break; + emceeDrawSet(engine, set); - if (0 == engine.id) - ship.leaderSend(); - else - ship.memberReceive(); + if (0 == engine.id) + ship.leaderSend(); + else + ship.memberReceive(); - step.move(engine); - - if (0 == engine.id) - ship.leaderReceive(); - else - ship.memberSend(); - } - timer = toc; % get elapsed time + step.move(engine); - if (0 == engine.id) { - leader = engine.leader; - size = leader.writeBuffer.size; - cycle = (s-offset) mod leader.writeBuffer.cycle; - - % write to buffer - _for j (0, engine.totalNumberWalkers-1) { - leader.writeBuffer.walkers[j+cycle*engine.totalNumberWalkers][*] = @(engine.walkers[j]); - leader.writeBuffer.stat[j+cycle*engine.totalNumberWalkers] = engine.stat[j]; - leader.writeBuffer.update[j+cycle*engine.totalNumberWalkers] = engine.update[j]; + if (0 == engine.id) + ship.leaderReceive(); + else + ship.memberSend(); } - - if (cycle == (leader.writeBuffer.cycle-1)) { - output.write(engine, size); - offset = 0; % we write full buffer, so no offset at all - tic; % restart timer - } else if (timer >= 6e2) { % write every 10 min - output.write(engine, (cycle+1)*engine.totalNumberWalkers); - offset = cycle mod leader.writeBuffer.cycle; - tic; % restart timer + timer = toc; % get elapsed time + + if (0 == engine.id) { + leader = engine.leader; + size = leader.writeBuffer.size; + cycle = (s-offset) mod leader.writeBuffer.cycle; + + % write to buffer + _for j (0, engine.totalNumberWalkers-1) { + leader.writeBuffer.walkers[j+cycle*engine.totalNumberWalkers][*] = @(engine.walkers[j]); + leader.writeBuffer.stat[j+cycle*engine.totalNumberWalkers] = engine.stat[j]; + leader.writeBuffer.update[j+cycle*engine.totalNumberWalkers] = engine.update[j]; + } + + if (cycle == (leader.writeBuffer.cycle-1)) { + output.write(engine, size); + offset = 0; % we write full buffer, so no offset at all + tic; % restart timer + } else if (timer >= 6e2) { % write every 10 min + output.write(engine, (cycle+1)*engine.totalNumberWalkers); + offset = cycle mod leader.writeBuffer.cycle; + tic; % restart timer + } } } } + catch UserBreakError: + { + EMCEE_ABORT = 1; + } % write remaining steps if (0 == engine.id) { @@ -1534,6 +1564,20 @@ private define emceeLoop (ship, step, output) %{{{ output.write(engine, (cycle+1)*engine.totalNumberWalkers); output.close(engine); } + + if (EMCEE_ABORT && (&NULL != ship.abort)) + ship.abort(); + + % set signal handler back + signal(SIGTERM, EMCEE_OLD_HANDLE); +} +%}}} + +% catch SIGTERM and set abort flag +private define emceeSignalHandler (sig) %{{{ +{ + EMCEE_ABORT = 1; + signal(SIGTERM, EMCEE_OLD_HANDLE); } %}}} @@ -1580,6 +1624,9 @@ private define emceeSetup (ship, steps, options) %{{{ ifnot (options.continue) options.output.write(engine, engine.totalNumberWalkers); } + + % set handler + signal(SIGTERM, &emceeSignalHandler, &EMCEE_OLD_HANDLE); } %}}} -- GitLab From 39957b0c51a6a097024e2eaa76f78041170d4499 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Wed, 9 Jun 2021 15:05:00 +0200 Subject: [PATCH 86/89] Comment! --- src/fitting/ensemble-samplers/emcee.sl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index fef2c18b..a66fc681 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -1553,7 +1553,7 @@ private define emceeLoop (ship, step, output) %{{{ } } } - catch UserBreakError: + catch UserBreakError: % make sure we stop gracefully { EMCEE_ABORT = 1; } -- GitLab From 1511e2368084104b680f98a85fee210054f7e702 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Thu, 10 Jun 2021 13:20:34 +0200 Subject: [PATCH 87/89] Bug in init Handle current set and set from file differently Bug in Gauss init --- src/fitting/ensemble-samplers/emcee.sl | 50 ++++++++++++++++---------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index a66fc681..d27c9097 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -737,7 +737,10 @@ private define __emcee_params (par, feed, total) %{{{ _for i (0, length(feed)-1) { if (feed[i] == NULL) continue; - c += (feed[i].weight>0) ? feed[i].weight : 0; + ifnot (struct_field_exists(feed[i], "name")) % current set, weight = 1 + c++; + else + c += (feed[i].weight>0) ? feed[i].weight : 0; } foreach p (get_params) @@ -750,22 +753,31 @@ private define __emcee_params (par, feed, total) %{{{ eparams[i].min = Double_Type[length(par.index)]; eparams[i].max = Double_Type[length(par.index)]; - _for j (0, length(par.index)-1) { - w = where(par.index[j] == params[feed[i].name]); - if (length(w) != 1) % also errors if for whatever reason the parameter is twice in feed - throw UsageError, sprintf("Unspecified parameter %d", par.index[j]); - w = w[0]; - if ((par.min[j] > feed[i].min[w]) || (par.max[j] < feed[i].max[w])) - throw UsageError, sprintf("Parameter ranges for paramter %d not enclosed", par.index[j]); - if ((feed[i].min[w[0]] == -DOUBLE_MAX) || (feed[i].max[w[0]] == DOUBLE_MAX)) - throw UsageError, sprintf("Unspecified ranges for parameter %d", par.index[j]); - - eparams[i].value[w] = feed[i].value[j]; - eparams[i].min[w] = feed[i].min[j]; - eparams[i].max[w] = feed[i].max[j]; + % use current set of parameters + ifnot (struct_field_exists(feed[i], "name")) { + eparams[i].value = par.value; + eparams[i].min = par.min; + eparams[i].max = par.max; + n[i] = int(round(1./c*total)); + } else { + _for j (0, length(par.index)-1) { + w = where(par.index[j] == params[feed[i].name]); + if (length(w) != 1) % also errors if for whatever reason the parameter is twice in feed + throw UsageError, sprintf("Unspecified parameter %d", par.index[j]); + w = w[0]; + if ((par.min[j] > feed[i].min[w]) || (par.max[j] < feed[i].max[w])) + throw UsageError, sprintf("Parameter ranges for paramter %d not enclosed", par.index[j]); + if ((feed[i].min[w[0]] == -DOUBLE_MAX) || (feed[i].max[w[0]] == DOUBLE_MAX)) + throw UsageError, sprintf("Unspecified ranges for parameter %d", par.index[j]); + + eparams[i].value[w] = feed[i].value[j]; + eparams[i].min[w] = feed[i].min[j]; + eparams[i].max[w] = feed[i].max[j]; + } + + n[i] = int(round(1.*feed[i].weight/c*total)); } - n[i] = int(round(1.*feed[i].weight/c*total)); cc += n[i]; } @@ -787,7 +799,7 @@ private define emceeInitUniformPick (init, engine) %{{{ if (file.mode & EMCEE_FILE_RANGE) (feed, ) = file.read(engine, engine.totalNumberWalkers); else - feed = [struct { @par, weight=1 }]; + feed = @par; (p,c) = __emcee_params(par, feed, engine.totalNumberWalkers); @@ -832,17 +844,17 @@ private define emceeInitGaussPick (init, engine) %{{{ if (file.mode & EMCEE_FILE_RANGE) (feed, ) = file.read(engine, engine.totalNumberWalkers); else - feed = [struct { @par, weight=1}]; + feed = @par; (p,c) = __emcee_params(par, feed, engine.totalNumberWalkers); k = 0; _for i (0, length(c)-1) { _for j (0, c[i]-1) { - k += j; sigma = (p[i].max-p[i].min)/init.sigma; - engine.walkers[k] = rand_gauss_cut(sigma, p[i].value, p[i].min, p[i].max); + engine.walkers[j+k] = rand_gauss_cut(sigma, p[i].value, p[i].min, p[i].max); } + k += c[i]; } } %}}} -- GitLab From 5ebf44d1cf9cfe65b6de957f04722f0feff6afe0 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Tue, 22 Jun 2021 15:07:32 +0200 Subject: [PATCH 88/89] Add emcee compatibility mode to Mikes implementation --- src/fitting/ensemble-samplers/emcee.sl | 197 ++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index d27c9097..a24e8d9f 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -250,6 +250,13 @@ private define emceeSetupEngine (ship, totalNumberWalkers, totalSteps) %{{{ % parameter settings stored in the file instead of reading % the last iterations. % +% mike : Fits file interface (compatible to previous emcee routine) +% ; filename : [emcee-.fits] The input/output file name. +% ; parameter : if given, on read we draw new starting position from the +% header +% ; cycle : [=50] the number of steps to calculate before writing to +% file +% % par : Parameter file interface to draw initial walkers from parameter % files. % ; filename : [emcee-.fits] Multiple parameter files can be separated @@ -361,7 +368,7 @@ private define __emceeFitsWriteT3(handle, engine) %{{{ fits_create_binary_table(handle, "CHAINSTATS", 0, ["FRAC_UPDATE", "MIN_STAT", "MED_STAT", "MAX_STAT"], ["D", "D", "D", "D"], [" fraction", [sprintf(" %s", get_fit_statistic)][[0:2]*0]]); - fits_update_key(handle, "STATISTIC", get_fit_statistic(), " latest fit statistic"); + fits_update_key(handle, "STATISTIC", get_fit_statistic(), " fit statistic"); } %}}} @@ -584,7 +591,7 @@ private define emceeFileFitsClose (file, engine) %{{{ variable nHDUs = fits_get_num_hdus(file.handle); variable i; - if (file.mode) { + if (file.mode & EMCEE_FILE_WRITE) { _for i (1, nHDUs) { () = _fits_movabs_hdu(file.handle, i); fits_write_chksum(file.handle); @@ -618,6 +625,186 @@ private define emceeFileFits () %{{{ %}}} EmceeFileRegister["fits"] = &emceeFileFits; +%{{{ Mikes file format (similar to fits, but contains a third table with some stats) + +% Create function %{{{ +private define emceeFileMikeCreate (file, engine) %{{{ +{ + file.mode |= EMCEE_FILE_WRITE; + file.mode &= ~EMCEE_FILE_READ; + + % Create fits file and write headers + file.handle = fits_open_file(file.filename, "c"); + + % write first table + __emceeFitsWriteT1(file.handle, engine); + + % write second table + __emceeFitsWriteT2(file.handle, engine); + + % write third table + __emceeFitsWriteT3(file.handle, engine); + + % move back to chain table + () = _fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0); + + % set write cycle + () = _fits_get_rowsize(file.handle, &(file.cycle)); + file.cycle = file.cycle/engine.totalNumberWalkers; + if (file.cycle < 1) + file.cycle = 1; + + % fits routine customs + file.numberSteps = 0; + file.sloppy = 0; +} +%}}} +%}}} + +% Open function %{{{ +private define __emceeMikeWriteChecks (file, engine) %{{{ +{ + variable handle = file.handle; + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "PARAMETERS", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + if ((fits_read_key(handle, "STATISTIC") != get_fit_statistic())) { + fits_close_file(handle); + handle = NULL; + throw UsageError, sprintf("Current fit statistic (%s) and chain fit statistic (%s) differ.", fits_read_key(handle, "STATISTIC"), get_fit_statistic()); + } + + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "CHAINSTATS", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + if (_fits_movnam_hdu(handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + if ((fits_read_key(handle, "NWALKERS")*fits_read_key(handle, "NFREEPAR")) != length(engine.walkers)) { + fits_close_file(handle); + handle = NULL; + throw IOError, "Number of walkers differs from number used in chain file"; + } +} +%}}} + +private define emceeFileMikeOpen (file, engine) %{{{ +{ + file.mode |= EMCEE_FILE_READ | EMCEE_FILE_WRITE; + + file.handle = fits_open_file(file.filename, "w"); + + if (_fits_movnam_hdu(file.handle, _FITS_BINARY_TBL, "MCMCCHAIN", 0)) { + fits_close_file(file.handle); + file.handle = NULL; + throw IOError, "Not a emcee chain file"; + } + + __emceeFitsReadChecks(file, engine); + __emceeMikeWriteChecks(file, engine); + + if (file.cycle < 0) + file.cycle = 50; % default to 50 cycles per write + + file.numberSteps = fits_get_num_rows(file.handle); +} +%}}} +%}}} + +% Read function %{{{ +% use the fits read function +%}}} + +% Write function %{{{ +private define emceeFileMikeWrite (file, engine, numberWalkersSteps) %{{{ +{ + if (numberWalkersSteps > engine.leader.writeBuffer.size) + throw InternalError, "Trying to write more than accessible"; + + variable par = __parameters(engine.fit.object); + variable npar = engine.numberParameters; + + variable i,j; + variable firstIndex = fits_get_num_rows(file.handle)+1; % first index of this cycle + variable parCycle = Double_Type[numberWalkersSteps]; + variable stat_min, stat_med, stat_max, frac_update; + stat_min = Double_Type[numberWalkersSteps/engine.totalNumberWalkers]; + stat_med = Double_Type[numberWalkersSteps/engine.totalNumberWalkers]; + stat_max = Double_Type[numberWalkersSteps/engine.totalNumberWalkers]; + frac_update = Double_Type[numberWalkersSteps/engine.totalNumberWalkers]; + + _for j (0, npar-1, 1) { + _for i (0, numberWalkersSteps-1, 1) + parCycle[i] = engine.leader.writeBuffer.walkers[i][j]; + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, sprintf("CHAINS%d", par.index[j])), + firstIndex, + 1, + parCycle); + } + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, "FITSTAT"), + firstIndex, + 1, + engine.leader.writeBuffer.stat[[:numberWalkersSteps-1]]); + () = _fits_write_col(file.handle, + fits_get_colnum(file.handle, "UPDATE"), + firstIndex, + 1, + engine.leader.writeBuffer.update[[:numberWalkersSteps-1]]); + + () = _fits_movnam_hdu(file.handle, "CHAINSTATS", _FITS_BINARY_TBL, 0); + firstIndex = fits_get_num_rows(file.handle)+1; + _for j (0, numberWalkersSteps/engine.totalNumberWalkers-1) { + stat_min[j] = min(engine.leader.writeBuffer.stat[[0:engine.totalNumberWalkers-1]+j*engine.totalNumberWalkers]); + stat_med[j] = median(engine.leader.writeBuffer.stat[[0:engine.totalNumberWalkers-1]+j*engine.totalNumberWalkers]); + stat_max[j] = max(engine.leader.writeBuffer.stat[[0:engine.totalNumberWalkers-1]+j*engine.totalNumberWalkers]); + frac_update[j] = sum(engine.leader.writeBuffer.update[[0:engine.totalNumberWalkers-1]+j*engine.totalNumberWalkers])/engine.totalNumberWalkers; + } + () = _fits_write_col(file.handle, fits_get_colnum(file.handle, "FRAC_UPDATE"), firstIndex, 1, frac_update); + () = _fits_write_col(file.handle, fits_get_colnum(file.handle, "MIN_STAT"), firstIndex, 1, stat_min); + () = _fits_write_col(file.handle, fits_get_colnum(file.handle, "MED_STAT"), firstIndex, 1, stat_med); + () = _fits_write_col(file.handle, fits_get_colnum(file.handle, "MAX_STAT"), firstIndex, 1, stat_max); + + () = _fits_movnam_hdu(file.handle, "MCMCCHAIN", _FITS_BINARY_TBL, 0); +} +%}}} +%}}} + +% Close function %{{{ +% use fits close +%}}} +%}}} +private define emceeFileMike () %{{{ +{ + variable file = struct { @EmceeFile, numberSteps }; + file.create = &emceeFileMikeCreate; + file.open = &emceeFileMikeOpen; + file.read = &emceeFileFitsRead; + file.write = &emceeFileMikeWrite; + file.close = &emceeFileFitsClose; + + file.filename = qualifier("filename", strftime("emcee-%Y%m%d-%H%M%S.fits")); + file.mode |= qualifier_exists("parameter") ? EMCEE_FILE_RANGE : 0; + file.has |= EMCEE_FILE_READ | EMCEE_FILE_WRITE; + file.cycle = qualifier("cycle", 50); + + file.numberSteps = 0; + + return file; +} +%}}} +EmceeFileRegister["mike"] = &emceeFileMike; + %{{{ Par file functions %{{{ create function %}}} @@ -668,7 +855,6 @@ private define emceeFileParRead (file, engine, numberWalkers) %{{{ %{{{ close function %}}} %}}} - private define emceeFilePar () %{{{ { variable file = struct { @EmceeFile }; @@ -1520,6 +1706,9 @@ private define emceeLoop (ship, step, output) %{{{ offset = 0; tic; % start timer + if (0 == engine.id) + leader = engine.leader; + try { _for s (0, engine.numberSteps-1) { if (EMCEE_ABORT) break; @@ -1542,7 +1731,6 @@ private define emceeLoop (ship, step, output) %{{{ timer = toc; % get elapsed time if (0 == engine.id) { - leader = engine.leader; size = leader.writeBuffer.size; cycle = (s-offset) mod leader.writeBuffer.cycle; @@ -1579,6 +1767,7 @@ private define emceeLoop (ship, step, output) %{{{ if (EMCEE_ABORT && (&NULL != ship.abort)) ship.abort(); + EMCEE_ABORT=0; % set signal handler back signal(SIGTERM, EMCEE_OLD_HANDLE); -- GitLab From 7dfea464a6782109ce50e913f1dd38d3072355d8 Mon Sep 17 00:00:00 2001 From: Jakob Stierhof Date: Mon, 26 Jul 2021 15:44:02 +0200 Subject: [PATCH 89/89] Add progress report to emcee At some point make Mikes emcee a wrapper around emcee_hammer. This way we keep backwards compatibility and provide a simple interface. --- src/fitting/ensemble-samplers/emcee.sl | 89 ++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 4 deletions(-) diff --git a/src/fitting/ensemble-samplers/emcee.sl b/src/fitting/ensemble-samplers/emcee.sl index a24e8d9f..64e9d993 100644 --- a/src/fitting/ensemble-samplers/emcee.sl +++ b/src/fitting/ensemble-samplers/emcee.sl @@ -27,6 +27,7 @@ private variable EmceeInitRegister = Assoc_Type[Ref_Type, &NULL]; private variable EmceeStepRegister = Assoc_Type[Ref_Type, &NULL]; private variable EmceeFileRegister = Assoc_Type[Ref_Type, &NULL]; private variable EmceeShipRegister = Assoc_Type[Ref_Type, &NULL]; +private variable EmceeProgressRegister = Assoc_Type[Ref_Type, &NULL]; % Engine and Leader %{{{ @@ -72,6 +73,7 @@ private variable EmceeLeader = struct { writeBuffer, % total write buffer array inFile, % input file handle outFile, % output file handle + progress, % progress handle }; private variable EmceeGears = struct { @@ -163,7 +165,7 @@ private define emceeSetupWriteBuffer (leader, numberWalkers, numberSteps) %{{{ } %}}} -private define emceeSetupLeader (engine, inFile, outFile) %{{{ +private define emceeSetupLeader (engine, inFile, outFile, progress) %{{{ { if (0 == engine.id) { variable nEngines = engine.numberEngines; @@ -174,6 +176,7 @@ private define emceeSetupLeader (engine, inFile, outFile) %{{{ leader.walkersPerSet2 = Int_Type[nEngines]; leader.inFile = inFile; leader.outFile = outFile; + leader.progress = progress; emceeSetupWriteBuffer(leader, engine.totalNumberWalkers, engine.numberSteps); @@ -1688,6 +1691,73 @@ private define emceeStepStretch () %{{{ EmceeStepRegister["stretch"] = &emceeStepStretch; %}}} +%{{{ Progress interface +%!%+ +%\function{emcee--progress} +%\synopsis{Set emcee progress report} +%\usage{progress="method;options"} +%\description +% To get a progress report for the running emcee +% algorithm use one of the available options. +% +% Available report methods: +% none : Do not report +% report : Report the number of steps done every n steps +% ; n : [=50] Report for every n steps. +% ; overwrite : if given, overwrite last status (useful for +% interactive sessions). +% ; format : [="Status: %D/%T (%%P)"] The report format +% where %D is the current step, %T total steps and %P +% the percentage. +%!%- +private variable EmceeProgress = struct { + reporter, % function + + every = 50, % when to change report + + % private data +}; + +%{{{ None +private define emceeProgressNone () %{{{ +{ + variable progress = struct { @EmceeProgress }; + progress.reporter = &NULL; + progress.enery = -1; + + return progress; +} +%}}} +%}}} +EmceeProgressRegister["none"] = &emceeProgressNone; + +%{{{ Report +private define emceeProgressReportReporter (progress, engine, step) +{ + variable str = strreplace(progress.format, "%D", sprintf("%d", step)); + str = strreplace(str, "%T", sprintf("%d", engine.numberSteps)); + str = strreplace(str, "%P", sprintf("%.02lf", step*100./engine.numberSteps)); + if (engine.numberSteps == step) { + progress.last_out = printf("%c%s\n", progress.overwrite ? '\r' : '\0', str); + () = fflush(stdout); + } else + progress.last_out = printf("%c%s%c", progress.overwrite ? '\r' : '\0', str, progress.overwrite ? '\0' : '\n'); +} +%}}} +private define emceeProgressReport () %{{{ +{ + variable progress = struct { @EmceeProgress, last_out = 0, overwrite, format }; + progress.reporter = &emceeProgressReportReporter; + progress.every = qualifier("n", 50); + progress.overwrite = qualifier_exists("overwrite"); + progress.format = qualifier("format", "Status: %D/%T (%%P)"); + + return progress; +} +%}}} +EmceeProgressRegister["report"] = &emceeProgressReport; +%}}} + %%% emcee call private define emceeOption (str) %{{{ { @@ -1741,6 +1811,9 @@ private define emceeLoop (ship, step, output) %{{{ leader.writeBuffer.update[j+cycle*engine.totalNumberWalkers] = engine.update[j]; } + if (leader.progress.every > 0 && s mod leader.progress.every) + leader.progress.reporter(engine, s); + if (cycle == (leader.writeBuffer.cycle-1)) { output.write(engine, size); offset = 0; % we write full buffer, so no offset at all @@ -1760,6 +1833,8 @@ private define emceeLoop (ship, step, output) %{{{ % write remaining steps if (0 == engine.id) { + if (leader.progress.every > 0) + leader.progress.reporter(engine, engine.numberSteps); if (cycle < (leader.writeBuffer.cycle-1)) output.write(engine, (cycle+1)*engine.totalNumberWalkers); output.close(engine); @@ -1789,7 +1864,7 @@ private define emceeSetup (ship, steps, options) %{{{ engine = ship.engine; if (0 == engine.id) { - emceeSetupLeader(engine, options.input, options.output); + emceeSetupLeader(engine, options.input, options.output, options.progress); options.init.pick(engine); } @@ -1865,6 +1940,7 @@ define emcee_hammer (steps) % \qualifier{step}{[="stretch"]: The walker step algorithm} % \qualifier{input}{[="fits"]: The file reading method} % \qualifier{output}{[="fits"]: The file writing method} +% \qualifier{progress}{="none"]: Show progres} % \qualifier{urand}{[=&rand_uniform]: PRNG for uniform numbers (Double_Type[] = urand(Int_Type))} % \qualifier{upick}{[=&rand_int]: PRNG to chose complement walker (Int_Type[] = upick(Int_Type, Int_Type, Int_Type))} %} @@ -1883,7 +1959,7 @@ define emcee_hammer (steps) % Per default a new chain is started when the function is called. To continue % a chain use the "continue" qualifier. % -%\seealso{emcee--init, emcee--step, emcee--driver, emcee--input, emcee--output} +%\seealso{emcee--init, emcee--step, emcee--driver, emcee--input, emcee--output, emcee--progress} %!%- { % options @@ -1904,6 +1980,9 @@ define emcee_hammer (steps) (inputHandle, inputOption) = emceeOption(qualifier("input", oInread)); if (NULL != oInfile) inputOption = struct { @inputOption, filename=oInfile }; + variable progressHandle, progressOption; + (progressHandle, progressOption) = emceeOption(qualifier("progress", "none")); + variable outputHandle, outputOption; (outputHandle, outputOption) = emceeOption(qualifier("output", oOutwrite)); if (NULL != oOutfile) outputOption = struct { @outputOption, filename=oOutfile }; @@ -1912,11 +1991,12 @@ define emcee_hammer (steps) variable ship = @(EmceeShipRegister[shipHandle])(;;shipOption); emceeSetupEngine(ship, totalNumberWalkers, steps); - variable Input = NULL, Output = NULL; + variable Input = NULL, Output = NULL, Progress = NULL; variable Initfile = "uniform"; if (0 == ship.engine.id) { Input = @(EmceeFileRegister[inputHandle])(;;inputOption); Output = @(EmceeFileRegister[outputHandle])(;;outputOption); + Progress = @(EmceeProgressRegister[progressHandle])(;;progressOption); if ( not (Input.has & EMCEE_FILE_READ) || ( Input.read == &NULL ) ) @@ -1942,6 +2022,7 @@ define emcee_hammer (steps) input = Input, urand = qualifier("urand", &rand_uniform), upick = qualifier("upick", &rand_int), + progress = Progress, continue = qualifier_exists("continue"), }; emceeSetupGears(ship.engine, options.urand, options.upick, options.step); -- GitLab