-
Notifications
You must be signed in to change notification settings - Fork 54
Some support for cis/trans arithmetic #322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a1209e7
e90905f
69f11f0
93e46a5
bac0ec3
4f984a0
c885b83
2fc6d47
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
|
|
||
| import cooler | ||
| import cooler.tools | ||
| from .coverage import coverage | ||
|
|
||
|
|
||
| def sample_pixels_approx(pixels, frac): | ||
|
|
@@ -47,6 +48,7 @@ def sample( | |
| clr, | ||
| out_clr_path, | ||
| count=None, | ||
| cis_count=None, | ||
| frac=None, | ||
| exact=False, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider adding
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would ignore diags only apply to cis_count? Or any count?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd say yes ... Also just make sure what
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dunno - maybe leave
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes meaning ... which of the options?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK! Merge then? |
||
| map_func=map, | ||
|
|
@@ -63,13 +65,17 @@ def sample( | |
| out_clr_path : str | ||
| A path/URI to the output. | ||
|
|
||
| count : float | ||
| count : int | ||
| The target number of contacts in the sample. | ||
| Mutually exclusive with `frac`. | ||
| Mutually exclusive with `cis_count` and `frac`. | ||
|
|
||
| cis_count : int | ||
| The target number of cis contacts in the sample. | ||
| Mutually exclusive with `count` and `frac`. | ||
|
|
||
| frac : float | ||
| The target sample size as a fraction of contacts in the original | ||
| dataset. Mutually exclusive with `count`. | ||
| dataset. Mutually exclusive with `count` and `cis_count`. | ||
|
|
||
| exact : bool | ||
| If True, the resulting sample size will exactly match the target value. | ||
|
|
@@ -87,12 +93,18 @@ def sample( | |
| if issubclass(type(clr), str): | ||
| clr = cooler.Cooler(clr) | ||
|
|
||
| if count is not None and frac is None: | ||
| if frac is not None and count is None and cis_count is None: | ||
| pass | ||
| elif frac is None and count is not None and cis_count is None: | ||
| frac = count / clr.info["sum"] | ||
| elif count is None and frac is not None: | ||
| count = np.round(frac * clr.info["sum"]) | ||
| elif frac is None and count is None and cis_count is not None: | ||
| cis_total = clr.info.get("cis", np.sum(coverage(clr)[0], dtype=int)) | ||
| frac = cis_count / cis_total | ||
| else: | ||
| raise ValueError("Either frac or tot_count must be specified!") | ||
| raise ValueError( | ||
| "Please specify exactly one argument among `count`, `cis_count`" | ||
| " and `frac`" | ||
| ) | ||
|
|
||
| if frac >= 1.0: | ||
| raise ValueError( | ||
|
|
@@ -101,6 +113,7 @@ def sample( | |
| ) | ||
|
|
||
| if exact: | ||
| count = np.round(frac * clr.info["sum"]).astype(int) | ||
| pixels = sample_pixels_exact(clr.pixels()[:], count) | ||
| cooler.create_cooler(out_clr_path, clr.bins()[:], pixels, ordered=True) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| import os.path as op | ||
| import cooler | ||
|
|
||
| import cooltools | ||
| import cooltools.api | ||
| from numpy import testing | ||
|
|
||
|
|
||
| def test_sample(request): | ||
| # perform test: | ||
| clr = cooler.Cooler(op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool")) | ||
|
|
||
| cooltools.api.sample.sample( | ||
| clr, | ||
| op.join(request.fspath.dirname, "data/CN.mm9.1000kb.test_sampled.cool"), | ||
| frac=0.5, | ||
| ) | ||
| clr_result = cooler.Cooler( | ||
| op.join(request.fspath.dirname, "data/CN.mm9.1000kb.test_sampled.cool") | ||
| ) | ||
| # Test that deviation from expected total is very small | ||
| testing.assert_allclose(clr_result.info["sum"], clr.info["sum"] / 2, rtol=1e-3) | ||
|
|
||
| cooltools.api.sample.sample( | ||
| clr, | ||
| op.join(request.fspath.dirname, "data/CN.mm9.1000kb.test_sampled.cool"), | ||
| count=200000000, | ||
| ) | ||
| clr_result = cooler.Cooler( | ||
| op.join(request.fspath.dirname, "data/CN.mm9.1000kb.test_sampled.cool") | ||
| ) | ||
| # Test that deviation from expected total is very small | ||
| testing.assert_allclose(clr_result.info["sum"], 200000000, rtol=1e-3) | ||
|
|
||
|
|
||
| def test_sample_exact(request): | ||
| # Exact sampling is very slow! So commented out | ||
| clr = cooler.Cooler(op.join(request.fspath.dirname, "data/CN.mm9.10000kb.cool")) | ||
|
|
||
| cooltools.api.sample.sample( | ||
| clr, | ||
| op.join(request.fspath.dirname, "data/CN.mm9.10000kb.test_sampled.cool"), | ||
| frac=0.5, | ||
| exact=True, | ||
| ) | ||
| clr_result = cooler.Cooler( | ||
| op.join(request.fspath.dirname, "data/CN.mm9.10000kb.test_sampled.cool") | ||
| ) | ||
| # Test that result matches expectation exactly | ||
| testing.assert_equal(clr_result.info["sum"], round(clr.info["sum"] * 0.5)) | ||
|
|
||
| cooltools.api.sample.sample( | ||
| clr, | ||
| op.join(request.fspath.dirname, "data/CN.mm9.10000kb.test_sampled.cool"), | ||
| count=200000000, | ||
| exact=True, | ||
| ) | ||
| clr_result = cooler.Cooler( | ||
| op.join(request.fspath.dirname, "data/CN.mm9.10000kb.test_sampled.cool") | ||
| ) | ||
| # Test that result matches expectation exactly | ||
| testing.assert_equal(clr_result.info["sum"], 200000000) |
Uh oh!
There was an error while loading. Please reload this page.