-
Notifications
You must be signed in to change notification settings - Fork 83
Two locus general matrix #3426
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
lkirk
wants to merge
17
commits into
tskit-dev:main
Choose a base branch
from
lkirk:two-locus-general-matrix
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+807
−48
Open
Two locus general matrix #3426
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
cc2612f
initial stab at a general matrix (no normalisation)
lkirk d53a662
added dimension dropping, but I think transposing is better -- we don…
lkirk ea30828
finalize and add tests for single and multipop
lkirk cef7149
reformat jitter
lkirk 08e4d3f
one more reformat jitter
lkirk 7f5c1c4
one more one more reformat jitter
lkirk 904a809
turns out, the general norm function needs to know the state_dims
lkirk 688792c
fix up a bit of naming in general test funcs, remove unneeded branch,…
lkirk baebca6
flake8 does not like assigning lambdas to variables
lkirk a260cf0
and black doesn't like that
lkirk 46a4e19
do not test equality, this was useful on my local machine but is prob…
lkirk 209e554
formatting jitter
lkirk 3a8944f
lowlevel tests
lkirk b41239a
relax diff requirements (macos failure)
lkirk c9e1df1
relax diff requirements (macos failure) -- previous commit fixed one
lkirk 51ec346
new formatting tools, fix lint
lkirk e5a105f
remove TODOs, old comment and tested elsewhere
lkirk File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2298,8 +2298,9 @@ get_allele_samples(const tsk_site_t *site, tsk_size_t site_offset, | |
| } | ||
|
|
||
| static int | ||
| norm_hap_weighted(tsk_size_t result_dim, const double *hap_weights, | ||
| tsk_size_t TSK_UNUSED(n_a), tsk_size_t TSK_UNUSED(n_b), double *result, void *params) | ||
| norm_hap_weighted(tsk_size_t TSK_UNUSED(state_dim), const double *hap_weights, | ||
| tsk_size_t result_dim, tsk_size_t TSK_UNUSED(n_a), tsk_size_t TSK_UNUSED(n_b), | ||
| double *result, void *params) | ||
| { | ||
| sample_count_stat_params_t args = *(sample_count_stat_params_t *) params; | ||
| const double *weight_row; | ||
|
|
@@ -2315,8 +2316,9 @@ norm_hap_weighted(tsk_size_t result_dim, const double *hap_weights, | |
| } | ||
|
|
||
| static int | ||
| norm_hap_weighted_ij(tsk_size_t result_dim, const double *hap_weights, | ||
| tsk_size_t TSK_UNUSED(n_a), tsk_size_t TSK_UNUSED(n_b), double *result, void *params) | ||
| norm_hap_weighted_ij(tsk_size_t TSK_UNUSED(state_dim), const double *hap_weights, | ||
| tsk_size_t result_dim, tsk_size_t TSK_UNUSED(n_a), tsk_size_t TSK_UNUSED(n_b), | ||
| double *result, void *params) | ||
| { | ||
| sample_count_stat_params_t args = *(sample_count_stat_params_t *) params; | ||
| const double *weight_row; | ||
|
|
@@ -2341,8 +2343,9 @@ norm_hap_weighted_ij(tsk_size_t result_dim, const double *hap_weights, | |
| } | ||
|
|
||
| static int | ||
| norm_total_weighted(tsk_size_t result_dim, const double *TSK_UNUSED(hap_weights), | ||
| tsk_size_t n_a, tsk_size_t n_b, double *result, void *TSK_UNUSED(params)) | ||
| norm_total_weighted(tsk_size_t TSK_UNUSED(state_dim), | ||
| const double *TSK_UNUSED(hap_weights), tsk_size_t result_dim, tsk_size_t n_a, | ||
| tsk_size_t n_b, double *result, void *TSK_UNUSED(params)) | ||
| { | ||
| tsk_size_t k; | ||
| double norm = 1 / (double) (n_a * n_b); | ||
|
|
@@ -2411,8 +2414,8 @@ static int | |
| compute_general_normed_two_site_stat_result(const tsk_bitset_t *state, | ||
| const tsk_size_t *allele_counts, tsk_size_t a_off, tsk_size_t b_off, | ||
| tsk_size_t num_a_alleles, tsk_size_t num_b_alleles, tsk_size_t state_dim, | ||
| tsk_size_t result_dim, general_stat_func_t *f, sample_count_stat_params_t *f_params, | ||
| norm_func_t *norm_f, bool polarised, two_locus_work_t *restrict work, double *result) | ||
| tsk_size_t result_dim, general_stat_func_t *f, void *f_params, norm_func_t *norm_f, | ||
| bool polarised, two_locus_work_t *restrict work, double *result) | ||
| { | ||
| int ret = 0; | ||
| // Sample sets and b sites are rows, a sites are columns | ||
|
|
@@ -2445,7 +2448,7 @@ compute_general_normed_two_site_stat_result(const tsk_bitset_t *state, | |
| if (ret != 0) { | ||
| goto out; | ||
| } | ||
| ret = norm_f(result_dim, weights, num_a_alleles - is_polarised, | ||
| ret = norm_f(state_dim, weights, result_dim, num_a_alleles - is_polarised, | ||
| num_b_alleles - is_polarised, norm, f_params); | ||
| if (ret != 0) { | ||
| goto out; | ||
|
|
@@ -2463,9 +2466,8 @@ compute_general_normed_two_site_stat_result(const tsk_bitset_t *state, | |
| static int | ||
| compute_general_two_site_stat_result(const tsk_bitset_t *state, | ||
| const tsk_size_t *allele_counts, tsk_size_t a_off, tsk_size_t b_off, | ||
| tsk_size_t state_dim, tsk_size_t result_dim, general_stat_func_t *f, | ||
| sample_count_stat_params_t *f_params, two_locus_work_t *restrict work, | ||
| double *result) | ||
| tsk_size_t state_dim, tsk_size_t result_dim, general_stat_func_t *f, void *f_params, | ||
| two_locus_work_t *restrict work, double *result) | ||
| { | ||
| int ret = 0; | ||
| tsk_size_t k; | ||
|
|
@@ -2653,9 +2655,8 @@ static int | |
| tsk_treeseq_two_site_count_stat(const tsk_treeseq_t *self, tsk_size_t state_dim, | ||
| tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes, | ||
| const tsk_id_t *sample_sets, tsk_size_t result_dim, general_stat_func_t *f, | ||
| sample_count_stat_params_t *f_params, norm_func_t *norm_f, tsk_size_t n_rows, | ||
| const tsk_id_t *row_sites, tsk_size_t n_cols, const tsk_id_t *col_sites, | ||
| tsk_flags_t options, double *result) | ||
| void *f_params, norm_func_t *norm_f, tsk_size_t n_rows, const tsk_id_t *row_sites, | ||
| tsk_size_t n_cols, const tsk_id_t *col_sites, tsk_flags_t options, double *result) | ||
| { | ||
| int ret = 0; | ||
| tsk_bitset_t allele_samples, allele_sample_sets; | ||
|
|
@@ -3089,9 +3090,8 @@ advance_collect_edges(iter_state *s, tsk_id_t index) | |
| static int | ||
| compute_two_tree_branch_state_update(const tsk_treeseq_t *ts, tsk_id_t c, | ||
| const iter_state *A_state, const iter_state *B_state, tsk_size_t state_dim, | ||
| tsk_size_t result_dim, int sign, general_stat_func_t *f, | ||
| sample_count_stat_params_t *f_params, two_locus_work_t *restrict work, | ||
| double *result) | ||
| tsk_size_t result_dim, int sign, general_stat_func_t *f, void *f_params, | ||
| two_locus_work_t *restrict work, double *result) | ||
| { | ||
| int ret = 0; | ||
| double a_len, b_len; | ||
|
|
@@ -3141,8 +3141,8 @@ compute_two_tree_branch_state_update(const tsk_treeseq_t *ts, tsk_id_t c, | |
|
|
||
| static int | ||
| compute_two_tree_branch_stat(const tsk_treeseq_t *ts, const iter_state *l_state, | ||
| iter_state *r_state, general_stat_func_t *f, sample_count_stat_params_t *f_params, | ||
| tsk_size_t result_dim, tsk_size_t state_dim, double *result) | ||
| iter_state *r_state, general_stat_func_t *f, void *f_params, tsk_size_t result_dim, | ||
| tsk_size_t state_dim, double *result) | ||
| { | ||
| int ret = 0; | ||
| tsk_id_t e, c, ec, p, *updated_nodes = NULL; | ||
|
|
@@ -3243,9 +3243,9 @@ static int | |
| tsk_treeseq_two_branch_count_stat(const tsk_treeseq_t *self, tsk_size_t state_dim, | ||
| tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes, | ||
| const tsk_id_t *sample_sets, tsk_size_t result_dim, general_stat_func_t *f, | ||
| sample_count_stat_params_t *f_params, norm_func_t *TSK_UNUSED(norm_f), | ||
| tsk_size_t n_rows, const double *row_positions, tsk_size_t n_cols, | ||
| const double *col_positions, tsk_flags_t TSK_UNUSED(options), double *result) | ||
| void *f_params, norm_func_t *TSK_UNUSED(norm_f), tsk_size_t n_rows, | ||
| const double *row_positions, tsk_size_t n_cols, const double *col_positions, | ||
| tsk_flags_t TSK_UNUSED(options), double *result) | ||
| { | ||
| int ret = 0; | ||
| int r, c; | ||
|
|
@@ -3385,10 +3385,10 @@ check_sample_set_dups(tsk_size_t num_sample_sets, const tsk_size_t *sample_set_s | |
| } | ||
|
|
||
| int | ||
| tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sample_sets, | ||
| const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, | ||
| tsk_size_t result_dim, const tsk_id_t *set_indexes, general_stat_func_t *f, | ||
| norm_func_t *norm_f, tsk_size_t out_rows, const tsk_id_t *row_sites, | ||
| tsk_treeseq_two_locus_count_general_stat(const tsk_treeseq_t *self, | ||
| tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes, | ||
| const tsk_id_t *sample_sets, tsk_size_t result_dim, general_stat_func_t *f, | ||
| void *f_params, norm_func_t *norm_f, tsk_size_t out_rows, const tsk_id_t *row_sites, | ||
| const double *row_positions, tsk_size_t out_cols, const tsk_id_t *col_sites, | ||
| const double *col_positions, tsk_flags_t options, double *result) | ||
| { | ||
|
|
@@ -3398,10 +3398,6 @@ tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sampl | |
| bool stat_site = !!(options & TSK_STAT_SITE); | ||
| bool stat_branch = !!(options & TSK_STAT_BRANCH); | ||
| tsk_size_t state_dim = num_sample_sets; | ||
| sample_count_stat_params_t f_params = { .sample_sets = sample_sets, | ||
| .num_sample_sets = num_sample_sets, | ||
| .sample_set_sizes = sample_set_sizes, | ||
| .set_indexes = set_indexes }; | ||
|
|
||
| // We do not support two-locus node stats | ||
| if (!!(options & TSK_STAT_NODE)) { | ||
|
|
@@ -3441,7 +3437,7 @@ tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sampl | |
| goto out; | ||
| } | ||
| ret = tsk_treeseq_two_site_count_stat(self, state_dim, num_sample_sets, | ||
| sample_set_sizes, sample_sets, result_dim, f, &f_params, norm_f, out_rows, | ||
| sample_set_sizes, sample_sets, result_dim, f, f_params, norm_f, out_rows, | ||
| row_sites, out_cols, col_sites, options, result); | ||
| } else if (stat_branch) { | ||
| ret = check_positions( | ||
|
|
@@ -3455,13 +3451,30 @@ tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sampl | |
| goto out; | ||
| } | ||
| ret = tsk_treeseq_two_branch_count_stat(self, state_dim, num_sample_sets, | ||
| sample_set_sizes, sample_sets, result_dim, f, &f_params, norm_f, out_rows, | ||
| sample_set_sizes, sample_sets, result_dim, f, f_params, norm_f, out_rows, | ||
| row_positions, out_cols, col_positions, options, result); | ||
| } | ||
| out: | ||
| return ret; | ||
| } | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function now serves as an inner wrapper. The the general stat accepts the summary function params so that the CPython code can pass them directly. All of the specialized stats functions call this function. |
||
| int | ||
| tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sample_sets, | ||
| const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, | ||
| tsk_size_t result_dim, const tsk_id_t *set_indexes, general_stat_func_t *f, | ||
| norm_func_t *norm_f, tsk_size_t out_rows, const tsk_id_t *row_sites, | ||
| const double *row_positions, tsk_size_t out_cols, const tsk_id_t *col_sites, | ||
| const double *col_positions, tsk_flags_t options, double *result) | ||
| { | ||
| sample_count_stat_params_t f_params = { .sample_sets = sample_sets, | ||
| .num_sample_sets = num_sample_sets, | ||
| .sample_set_sizes = sample_set_sizes, | ||
| .set_indexes = set_indexes }; | ||
| return tsk_treeseq_two_locus_count_general_stat(self, num_sample_sets, | ||
| sample_set_sizes, sample_sets, result_dim, f, &f_params, norm_f, out_rows, | ||
| row_sites, row_positions, out_cols, col_sites, col_positions, options, result); | ||
| } | ||
|
|
||
| /*********************************** | ||
| * Allele frequency spectrum | ||
| ***********************************/ | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know why codecov says this line is not covered; do you? (sticking some prints or asserts in here can help track that down)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Judging from the coverage of the rest of that function (every goto being hit), my best guess is that the compiler is optimizing that label out. In fact, I could just change all of those
goto rettoreturn retand remove theout:label since we're not doing any sort of cleanup at the end of this function.